Updated PCRE source to version 8.32 (bug 5593).

This commit is contained in:
Nicholas Hastings 2013-03-17 11:32:03 -04:00
parent ed6d8521e6
commit 6ce00034a2
340 changed files with 154241 additions and 63858 deletions

View File

@ -232,6 +232,23 @@ while (<STDIN>)
redo; # Process the joined lines redo; # Process the joined lines
} }
# .EX/.EE are used in the pcredemo page to bracket the entire program,
# which is unmodified except for turning backslash into "\e".
elsif (/^\.EX\s*$/)
{
print TEMP "<PRE>\n";
while (<STDIN>)
{
last if /^\.EE\s*$/;
s/\\e/\\/g;
s/&/&amp;/g;
s/</&lt;/g;
s/>/&gt;/g;
print TEMP;
}
}
# Ignore anything not recognized # Ignore anything not recognized
next; next;

View File

@ -8,16 +8,38 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service, University of Cambridge Computing Service,
Cambridge, England. Cambridge, England.
Copyright (c) 1997-2009 University of Cambridge Copyright (c) 1997-2012 University of Cambridge
All rights reserved All rights reserved
PCRE JUST-IN-TIME COMPILATION SUPPORT
-------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2012 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2012 Zoltan Herczeg
All rights reserved.
THE C++ WRAPPER LIBRARY THE C++ WRAPPER LIBRARY
----------------------- -----------------------
Written by: Google Inc. Written by: Google Inc.
Copyright (c) 2007-2008 Google Inc Copyright (c) 2007-2012 Google Inc
All rights reserved All rights reserved
#### ####

View File

@ -35,10 +35,37 @@
# to disable the final configuration report. # to disable the final configuration report.
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that # 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
# are set by specifying a release type. # are set by specifying a release type.
# 2010-01-02 PH added test for stdint.h
# 2010-03-02 PH added test for inttypes.h
# 2011-08-01 PH added PCREGREP_BUFSIZE
# 2011-08-22 PH added PCRE_SUPPORT_JIT
# 2011-09-06 PH modified WIN32 ADD_TEST line as suggested by Sergey Cherepanov
# 2011-09-06 PH added PCRE_SUPPORT_PCREGREP_JIT
# 2011-10-04 Sheri added support for including coff data in windows shared libraries
# compiled with MINGW if pcre.rc and/or pcreposix.rc are placed in
# the source dir by the user prior to building
# 2011-10-04 Sheri changed various add_test's to use exes' location built instead
# of DEBUG location only (likely only matters in MSVC)
# 2011-10-04 Sheri added scripts to provide needed variables to RunTest and
# RunGrepTest (used for UNIX and Msys)
# 2011-10-04 Sheri added scripts to provide needed variables and to execute
# RunTest.bat in Win32 (for effortless testing with "make test")
# 2011-10-04 Sheri Increased minimum required cmake version
# 2012-01-06 PH removed pcre_info.c and added pcre_string_utils.c
# 2012-01-10 Zoltan Herczeg added libpcre16 support
# 2012-01-13 Stephen Kelly added out of source build support
# 2012-01-17 PH applied Stephen Kelly's patch to parse the version data out
# of the configure.ac file
# 2012-02-26 PH added support for libedit
# 2012-09-06 PH added support for PCRE_EBCDIC_NL25
# 2012-09-08 ChPe added PCRE32 support
# 2012-10-23 PH added support for VALGRIND and GCOV
PROJECT(PCRE C CXX) PROJECT(PCRE C CXX)
CMAKE_MINIMUM_REQUIRED(VERSION 2.4.6) # Increased minimum to 2.8.0 to support newer add_test features
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
@ -46,6 +73,7 @@ SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) # for FindReadline.cmake
FIND_PACKAGE( BZip2 ) FIND_PACKAGE( BZip2 )
FIND_PACKAGE( ZLIB ) FIND_PACKAGE( ZLIB )
FIND_PACKAGE( Readline ) FIND_PACKAGE( Readline )
FIND_PACKAGE( Editline )
# Configuration checks # Configuration checks
@ -55,6 +83,8 @@ INCLUDE(CheckFunctionExists)
INCLUDE(CheckTypeSize) INCLUDE(CheckTypeSize)
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H) CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
CHECK_INCLUDE_FILE(inttypes.h HAVE_INTTYPES_H)
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H) CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H) CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H) CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
@ -81,10 +111,19 @@ CHECK_TYPE_SIZE("unsigned long long" UNSIGNED_LONG_LONG)
SET(BUILD_SHARED_LIBS OFF CACHE BOOL SET(BUILD_SHARED_LIBS OFF CACHE BOOL
"Build shared libraries instead of static ones.") "Build shared libraries instead of static ones.")
OPTION(PCRE_BUILD_PCRE8 "Build 8 bit PCRE library" ON)
OPTION(PCRE_BUILD_PCRE16 "Build 16 bit PCRE library" OFF)
OPTION(PCRE_BUILD_PCRE32 "Build 32 bit PCRE library" OFF)
OPTION(PCRE_BUILD_PCRECPP "Build the PCRE C++ library (pcrecpp)." ON) OPTION(PCRE_BUILD_PCRECPP "Build the PCRE C++ library (pcrecpp)." ON)
SET(PCRE_EBCDIC OFF CACHE BOOL SET(PCRE_EBCDIC OFF CACHE BOOL
"Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems)") "Use EBCDIC coding instead of ASCII. (This is rarely used outside of mainframe systems.)")
SET(PCRE_EBCDIC_NL25 OFF CACHE BOOL
"Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")
SET(PCRE_LINK_SIZE "2" CACHE STRING SET(PCRE_LINK_SIZE "2" CACHE STRING
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.") "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
@ -95,6 +134,9 @@ SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
SET(PCRE_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING SET(PCRE_MATCH_LIMIT_RECURSION "MATCH_LIMIT" CACHE STRING
"Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.") "Default limit on internal recursion. See MATCH_LIMIT_RECURSION in config.h.in for details.")
SET(PCREGREP_BUFSIZE "20480" CACHE STRING
"Buffer size parameter for pcregrep. See PCREGREP_BUFSIZE in config.h.in for details.")
SET(PCRE_NEWLINE "LF" CACHE STRING SET(PCRE_NEWLINE "LF" CACHE STRING
"What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).") "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF).")
@ -104,26 +146,31 @@ SET(PCRE_NO_RECURSE OFF CACHE BOOL
SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING SET(PCRE_POSIX_MALLOC_THRESHOLD "10" CACHE STRING
"Threshold for malloc() usage. See POSIX_MALLOC_THRESHOLD in config.h.in for details.") "Threshold for malloc() usage. See POSIX_MALLOC_THRESHOLD in config.h.in for details.")
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL SET(PCRE_SUPPORT_JIT OFF CACHE BOOL
"Enable support for Unicode properties. (If set, UTF-8 support will be enabled as well)") "Enable support for Just-in-time compiling.")
SET(PCRE_SUPPORT_UTF8 OFF CACHE BOOL SET(PCRE_SUPPORT_PCREGREP_JIT ON CACHE BOOL
"Enable support for the Unicode UTF-8 encoding.") "Enable use of Just-in-time compiling in pcregrep.")
SET(PCRE_SUPPORT_UTF OFF CACHE BOOL
"Enable support for Unicode Transformation Format (UTF-8/UTF-16/UTF-32) encoding.")
SET(PCRE_SUPPORT_UNICODE_PROPERTIES OFF CACHE BOOL
"Enable support for Unicode properties (if set, UTF support will be enabled as well).")
SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL SET(PCRE_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL
"ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks") "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
SET(PCRE_SUPPORT_VALGRIND OFF CACHE BOOL
"Enable Valgrind support.")
SET(PCRE_SUPPORT_COVERAGE OFF CACHE BOOL
"Enable code coverage support using gcov.")
OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON) OPTION(PCRE_SHOW_REPORT "Show the final configuration report" ON)
OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON) OPTION(PCRE_BUILD_PCREGREP "Build pcregrep" ON)
OPTION(PCRE_BUILD_TESTS "Build the tests" ON) OPTION(PCRE_BUILD_TESTS "Build the tests" ON)
IF (PCRE_BUILD_TESTS)
IF (NOT PCRE_BUILD_PCREGREP)
MESSAGE(STATUS "** Building tests requires pcregrep: PCRE_BUILD_PCREGREP forced ON")
SET(PCRE_BUILD_PCREGREP ON)
ENDIF(NOT PCRE_BUILD_PCREGREP)
ENDIF(PCRE_BUILD_TESTS)
IF (MINGW) IF (MINGW)
OPTION(NON_STANDARD_LIB_PREFIX OPTION(NON_STANDARD_LIB_PREFIX
"ON=Shared libraries built in mingw will be named pcre.dll, etc., instead of libpcre.dll, etc." "ON=Shared libraries built in mingw will be named pcre.dll, etc., instead of libpcre.dll, etc."
@ -150,6 +197,14 @@ IF(PCRE_SUPPORT_LIBZ)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
ENDIF(PCRE_SUPPORT_LIBZ) ENDIF(PCRE_SUPPORT_LIBZ)
# editline lib
IF(EDITLINE_FOUND)
OPTION (PCRE_SUPPORT_LIBEDIT "Enable support for linking pcretest with libedit." OFF)
ENDIF(EDITLINE_FOUND)
IF(PCRE_SUPPORT_LIBEDIT)
INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
ENDIF(PCRE_SUPPORT_LIBEDIT)
# readline lib # readline lib
IF(READLINE_FOUND) IF(READLINE_FOUND)
OPTION (PCRE_SUPPORT_LIBREADLINE "Enable support for linking pcretest with libreadline." ON) OPTION (PCRE_SUPPORT_LIBREADLINE "Enable support for linking pcretest with libreadline." ON)
@ -186,18 +241,68 @@ IF(NOT BUILD_SHARED_LIBS)
SET(PCRE_STATIC 1) SET(PCRE_STATIC 1)
ENDIF(NOT BUILD_SHARED_LIBS) ENDIF(NOT BUILD_SHARED_LIBS)
IF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
MESSAGE(FATAL_ERROR "At least one of PCRE_BUILD_PCRE8, PCRE_BUILD_PCRE16 or PCRE_BUILD_PCRE32 must be enabled")
ENDIF(NOT PCRE_BUILD_PCRE8 AND NOT PCRE_BUILD_PCRE16 AND NOT PCRE_BUILD_PCRE32)
IF(PCRE_BUILD_PCRE8)
SET(SUPPORT_PCRE8 1)
ENDIF(PCRE_BUILD_PCRE8)
IF(PCRE_BUILD_PCRE16)
SET(SUPPORT_PCRE16 1)
ENDIF(PCRE_BUILD_PCRE16)
IF(PCRE_BUILD_PCRE32)
SET(SUPPORT_PCRE32 1)
ENDIF(PCRE_BUILD_PCRE32)
IF(PCRE_BUILD_PCRECPP AND NOT PCRE_BUILD_PCRE8)
MESSAGE(STATUS "** PCRE_BUILD_PCRE8 must be enabled for the C++ library support")
SET(PCRE_BUILD_PCRECPP OFF)
ENDIF(PCRE_BUILD_PCRECPP AND NOT PCRE_BUILD_PCRE8)
IF(PCRE_BUILD_PCREGREP AND NOT PCRE_BUILD_PCRE8)
MESSAGE(STATUS "** PCRE_BUILD_PCRE8 must be enabled for the pcregrep program")
SET(PCRE_BUILD_PCREGREP OFF)
ENDIF(PCRE_BUILD_PCREGREP AND NOT PCRE_BUILD_PCRE8)
IF(PCRE_SUPPORT_LIBREADLINE AND PCRE_SUPPORT_LIBEDIT)
MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
ENDIF(PCRE_SUPPORT_LIBREADLINE AND PCRE_SUPPORT_LIBEDIT)
IF(PCRE_SUPPORT_BSR_ANYCRLF) IF(PCRE_SUPPORT_BSR_ANYCRLF)
SET(BSR_ANYCRLF 1) SET(BSR_ANYCRLF 1)
ENDIF(PCRE_SUPPORT_BSR_ANYCRLF) ENDIF(PCRE_SUPPORT_BSR_ANYCRLF)
IF(PCRE_SUPPORT_UTF8 OR PCRE_SUPPORT_UNICODE_PROPERTIES) IF(PCRE_SUPPORT_UTF OR PCRE_SUPPORT_UNICODE_PROPERTIES)
SET(SUPPORT_UTF8 1) SET(SUPPORT_UTF 1)
ENDIF(PCRE_SUPPORT_UTF8 OR PCRE_SUPPORT_UNICODE_PROPERTIES) SET(PCRE_SUPPORT_UTF ON)
ENDIF(PCRE_SUPPORT_UTF OR PCRE_SUPPORT_UNICODE_PROPERTIES)
IF(PCRE_SUPPORT_UNICODE_PROPERTIES) IF(PCRE_SUPPORT_UNICODE_PROPERTIES)
SET(SUPPORT_UCP 1) SET(SUPPORT_UCP 1)
ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES) ENDIF(PCRE_SUPPORT_UNICODE_PROPERTIES)
IF(PCRE_SUPPORT_JIT)
SET(SUPPORT_JIT 1)
ENDIF(PCRE_SUPPORT_JIT)
IF(PCRE_SUPPORT_PCREGREP_JIT)
SET(SUPPORT_PCREGREP_JIT 1)
ENDIF(PCRE_SUPPORT_PCREGREP_JIT)
IF(PCRE_SUPPORT_VALGRIND)
SET(SUPPORT_VALGRIND 1)
ENDIF(PCRE_SUPPORT_VALGRIND)
IF(PCRE_SUPPORT_COVERAGE)
SET(SUPPORT_GCOV 1)
IF(NOT CMAKE_COMPILER_IS_GNUCC)
MESSAGE(FATAL_ERROR "Code coverage reports can only be generated when using GCC")
ENDIF(NOT CMAKE_COMPILER_IS_GNUCC)
ENDIF(PCRE_SUPPORT_COVERAGE)
# This next one used to contain # This next one used to contain
# SET(PCRETEST_LIBS ${READLINE_LIBRARY}) # SET(PCRETEST_LIBS ${READLINE_LIBRARY})
# but I was advised to add the NCURSES test as well, along with # but I was advised to add the NCURSES test as well, along with
@ -209,6 +314,13 @@ IF(PCRE_SUPPORT_LIBREADLINE)
SET(PCRETEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY}) SET(PCRETEST_LIBS ${READLINE_LIBRARY} ${NCURSES_LIBRARY})
ENDIF(PCRE_SUPPORT_LIBREADLINE) ENDIF(PCRE_SUPPORT_LIBREADLINE)
# libedit is a plug-compatible alternative to libreadline
IF(PCRE_SUPPORT_LIBEDIT)
SET(SUPPORT_LIBEDIT 1)
SET(PCRETEST_LIBS ${EDITLINE_LIBRARY} ${NCURSES_LIBRARY})
ENDIF(PCRE_SUPPORT_LIBEDIT)
IF(PCRE_SUPPORT_LIBZ) IF(PCRE_SUPPORT_LIBZ)
SET(SUPPORT_LIBZ 1) SET(SUPPORT_LIBZ 1)
SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${ZLIB_LIBRARIES}) SET(PCREGREP_LIBS ${PCREGREP_LIBS} ${ZLIB_LIBRARIES})
@ -243,8 +355,25 @@ ENDIF(NEWLINE STREQUAL "")
IF(PCRE_EBCDIC) IF(PCRE_EBCDIC)
SET(EBCDIC 1) SET(EBCDIC 1)
IF(PCRE_NEWLINE STREQUAL "LF")
SET(NEWLINE "21")
ENDIF(PCRE_NEWLINE STREQUAL "LF")
IF(PCRE_NEWLINE STREQUAL "CRLF")
SET(NEWLINE "3349")
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
ENDIF(PCRE_EBCDIC) ENDIF(PCRE_EBCDIC)
IF(PCRE_EBCDIC_NL25)
SET(EBCDIC 1)
SET(EBCDIC_NL25 1)
IF(PCRE_NEWLINE STREQUAL "LF")
SET(NEWLINE "37")
ENDIF(PCRE_NEWLINE STREQUAL "LF")
IF(PCRE_NEWLINE STREQUAL "CRLF")
SET(NEWLINE "3365")
ENDIF(PCRE_NEWLINE STREQUAL "CRLF")
ENDIF(PCRE_EBCDIC_NL25)
IF(PCRE_NO_RECURSE) IF(PCRE_NO_RECURSE)
SET(NO_RECURSE 1) SET(NO_RECURSE 1)
ENDIF(PCRE_NO_RECURSE) ENDIF(PCRE_NO_RECURSE)
@ -254,9 +383,29 @@ CONFIGURE_FILE(config-cmake.h.in
${PROJECT_BINARY_DIR}/config.h ${PROJECT_BINARY_DIR}/config.h
@ONLY) @ONLY)
CONFIGURE_FILE(pcre.h.generic # Parse version numbers and date out of configure.ac
file(STRINGS ${PROJECT_SOURCE_DIR}/configure.ac
configure_lines
LIMIT_COUNT 50 # Read only the first 50 lines of the file
)
set(SEARCHED_VARIABLES "pcre_major" "pcre_minor" "pcre_prerelease" "pcre_date")
foreach(configure_line ${configure_lines})
foreach(_substitution_variable ${SEARCHED_VARIABLES})
string(TOUPPER ${_substitution_variable} _substitution_variable_upper)
if (NOT ${_substitution_variable_upper})
string(REGEX MATCH "m4_define\\(${_substitution_variable}, \\[(.*)\\]" MACTHED_STRING ${configure_line})
if (CMAKE_MATCH_1)
set(${_substitution_variable_upper} ${CMAKE_MATCH_1})
endif()
endif()
endforeach()
endforeach()
CONFIGURE_FILE(pcre.h.in
${PROJECT_BINARY_DIR}/pcre.h ${PROJECT_BINARY_DIR}/pcre.h
COPYONLY) @ONLY)
# What about pcre-config and libpcre.pc? # What about pcre-config and libpcre.pc?
@ -295,8 +444,10 @@ ENDIF(PCRE_REBUILD_CHARTABLES)
SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/pcre.h) SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/pcre.h)
IF(PCRE_BUILD_PCRE8)
SET(PCRE_SOURCES SET(PCRE_SOURCES
${PROJECT_BINARY_DIR}/pcre_chartables.c pcre_byte_order.c
pcre_chartables.c
pcre_compile.c pcre_compile.c
pcre_config.c pcre_config.c
pcre_dfa_exec.c pcre_dfa_exec.c
@ -304,14 +455,14 @@ SET(PCRE_SOURCES
pcre_fullinfo.c pcre_fullinfo.c
pcre_get.c pcre_get.c
pcre_globals.c pcre_globals.c
pcre_info.c pcre_jit_compile.c
pcre_newline.c
pcre_maketables.c pcre_maketables.c
pcre_newline.c
pcre_ord2utf8.c pcre_ord2utf8.c
pcre_refcount.c pcre_refcount.c
pcre_string_utils.c
pcre_study.c pcre_study.c
pcre_tables.c pcre_tables.c
pcre_try_flipped.c
pcre_ucd.c pcre_ucd.c
pcre_valid_utf8.c pcre_valid_utf8.c
pcre_version.c pcre_version.c
@ -322,6 +473,85 @@ SET(PCREPOSIX_HEADERS pcreposix.h)
SET(PCREPOSIX_SOURCES pcreposix.c) SET(PCREPOSIX_SOURCES pcreposix.c)
ENDIF(PCRE_BUILD_PCRE8)
IF(PCRE_BUILD_PCRE16)
SET(PCRE16_SOURCES
pcre16_byte_order.c
pcre16_chartables.c
pcre16_compile.c
pcre16_config.c
pcre16_dfa_exec.c
pcre16_exec.c
pcre16_fullinfo.c
pcre16_get.c
pcre16_globals.c
pcre16_jit_compile.c
pcre16_maketables.c
pcre16_newline.c
pcre16_ord2utf16.c
pcre16_refcount.c
pcre16_string_utils.c
pcre16_study.c
pcre16_tables.c
pcre16_ucd.c
pcre16_utf16_utils.c
pcre16_valid_utf16.c
pcre16_version.c
pcre16_xclass.c
)
ENDIF(PCRE_BUILD_PCRE16)
IF(PCRE_BUILD_PCRE32)
SET(PCRE32_SOURCES
pcre32_byte_order.c
pcre32_chartables.c
pcre32_compile.c
pcre32_config.c
pcre32_dfa_exec.c
pcre32_exec.c
pcre32_fullinfo.c
pcre32_get.c
pcre32_globals.c
pcre32_jit_compile.c
pcre32_maketables.c
pcre32_newline.c
pcre32_ord2utf32.c
pcre32_refcount.c
pcre32_string_utils.c
pcre32_study.c
pcre32_tables.c
pcre32_ucd.c
pcre32_utf32_utils.c
pcre32_valid_utf32.c
pcre32_version.c
pcre32_xclass.c
)
ENDIF(PCRE_BUILD_PCRE32)
IF(MINGW AND NOT PCRE_STATIC)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcre.o
PRE-LINK
COMMAND windres ARGS pcre.rc pcre.o
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT Using pcre coff info in mingw build)
SET(PCRE_SOURCES
${PCRE_SOURCES} ${PROJECT_SOURCE_DIR}/pcre.o
)
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre.rc)
IF (EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
ADD_CUSTOM_COMMAND(OUTPUT ${PROJECT_SOURCE_DIR}/pcreposix.o
PRE-LINK
COMMAND windres ARGS pcreposix.rc pcreposix.o
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT Using pcreposix coff info in mingw build)
SET(PCREPOSIX_SOURCES
${PCREPOSIX_SOURCES} ${PROJECT_SOURCE_DIR}/pcreposix.o
)
ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcreposix.rc)
ENDIF(MINGW AND NOT PCRE_STATIC)
SET(PCRECPP_HEADERS SET(PCRECPP_HEADERS
pcrecpp.h pcrecpp.h
pcre_scanner.h pcre_scanner.h
@ -354,11 +584,13 @@ SET(targets)
# Libraries # Libraries
# pcre # pcre
IF(PCRE_BUILD_PCRE8)
ADD_LIBRARY(pcre ${PCRE_HEADERS} ${PCRE_SOURCES} ${PROJECT_BINARY_DIR}/config.h) ADD_LIBRARY(pcre ${PCRE_HEADERS} ${PCRE_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET(targets ${targets} pcre) SET(targets ${targets} pcre)
ADD_LIBRARY(pcreposix ${PCREPOSIX_HEADERS} ${PCREPOSIX_SOURCES}) ADD_LIBRARY(pcreposix ${PCREPOSIX_HEADERS} ${PCREPOSIX_SOURCES})
SET(targets ${targets} pcreposix) SET(targets ${targets} pcreposix)
TARGET_LINK_LIBRARIES(pcreposix pcre) TARGET_LINK_LIBRARIES(pcreposix pcre)
IF(MINGW AND NOT PCRE_STATIC) IF(MINGW AND NOT PCRE_STATIC)
IF(NON_STANDARD_LIB_PREFIX) IF(NON_STANDARD_LIB_PREFIX)
SET_TARGET_PROPERTIES(pcre pcreposix SET_TARGET_PROPERTIES(pcre pcreposix
@ -373,12 +605,53 @@ IF(MINGW AND NOT PCRE_STATIC)
ENDIF(NON_STANDARD_LIB_SUFFIX) ENDIF(NON_STANDARD_LIB_SUFFIX)
ENDIF(MINGW AND NOT PCRE_STATIC) ENDIF(MINGW AND NOT PCRE_STATIC)
ENDIF(PCRE_BUILD_PCRE8)
IF(PCRE_BUILD_PCRE16)
ADD_LIBRARY(pcre16 ${PCRE_HEADERS} ${PCRE16_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET(targets ${targets} pcre16)
IF(MINGW AND NOT PCRE_STATIC)
IF(NON_STANDARD_LIB_PREFIX)
SET_TARGET_PROPERTIES(pcre16
PROPERTIES PREFIX ""
)
ENDIF(NON_STANDARD_LIB_PREFIX)
IF(NON_STANDARD_LIB_SUFFIX)
SET_TARGET_PROPERTIES(pcre16
PROPERTIES SUFFIX "-0.dll"
)
ENDIF(NON_STANDARD_LIB_SUFFIX)
ENDIF(MINGW AND NOT PCRE_STATIC)
ENDIF(PCRE_BUILD_PCRE16)
IF(PCRE_BUILD_PCRE32)
ADD_LIBRARY(pcre32 ${PCRE_HEADERS} ${PCRE32_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET(targets ${targets} pcre32)
IF(MINGW AND NOT PCRE_STATIC)
IF(NON_STANDARD_LIB_PREFIX)
SET_TARGET_PROPERTIES(pcre32
PROPERTIES PREFIX ""
)
ENDIF(NON_STANDARD_LIB_PREFIX)
IF(NON_STANDARD_LIB_SUFFIX)
SET_TARGET_PROPERTIES(pcre32
PROPERTIES SUFFIX "-0.dll"
)
ENDIF(NON_STANDARD_LIB_SUFFIX)
ENDIF(MINGW AND NOT PCRE_STATIC)
ENDIF(PCRE_BUILD_PCRE32)
# pcrecpp # pcrecpp
IF(PCRE_BUILD_PCRECPP) IF(PCRE_BUILD_PCRECPP)
ADD_LIBRARY(pcrecpp ${PCRECPP_HEADERS} ${PCRECPP_SOURCES}) ADD_LIBRARY(pcrecpp ${PCRECPP_HEADERS} ${PCRECPP_SOURCES})
SET(targets ${targets} pcrecpp) SET(targets ${targets} pcrecpp)
TARGET_LINK_LIBRARIES(pcrecpp pcre) TARGET_LINK_LIBRARIES(pcrecpp pcre)
IF(MINGW AND NOT PCRE_STATIC) IF(MINGW AND NOT PCRE_STATIC)
IF(NON_STANDARD_LIB_PREFIX) IF(NON_STANDARD_LIB_PREFIX)
@ -413,14 +686,49 @@ IF(PCRE_BUILD_PCREGREP)
TARGET_LINK_LIBRARIES(pcregrep pcreposix ${PCREGREP_LIBS}) TARGET_LINK_LIBRARIES(pcregrep pcreposix ${PCREGREP_LIBS})
ENDIF(PCRE_BUILD_PCREGREP) ENDIF(PCRE_BUILD_PCREGREP)
# Testing # Testing
IF(PCRE_BUILD_TESTS) IF(PCRE_BUILD_TESTS)
ENABLE_TESTING() ENABLE_TESTING()
ADD_EXECUTABLE(pcretest pcretest.c) SET(PCRETEST_SOURCES pcretest.c)
IF(PCRE_BUILD_PCRE8)
LIST(APPEND PCRETEST_SOURCES pcre_printint.c)
ENDIF(PCRE_BUILD_PCRE8)
IF(PCRE_BUILD_PCRE16)
LIST(APPEND PCRETEST_SOURCES pcre16_printint.c)
ENDIF(PCRE_BUILD_PCRE16)
IF(PCRE_BUILD_PCRE32)
LIST(APPEND PCRETEST_SOURCES pcre32_printint.c)
ENDIF(PCRE_BUILD_PCRE32)
ADD_EXECUTABLE(pcretest ${PCRETEST_SOURCES})
SET(targets ${targets} pcretest) SET(targets ${targets} pcretest)
TARGET_LINK_LIBRARIES(pcretest pcreposix ${PCRETEST_LIBS}) IF(PCRE_BUILD_PCRE8)
LIST(APPEND PCRETEST_LIBS pcreposix pcre)
ENDIF(PCRE_BUILD_PCRE8)
IF(PCRE_BUILD_PCRE16)
LIST(APPEND PCRETEST_LIBS pcre16)
ENDIF(PCRE_BUILD_PCRE16)
IF(PCRE_BUILD_PCRE32)
LIST(APPEND PCRETEST_LIBS pcre32)
ENDIF(PCRE_BUILD_PCRE32)
TARGET_LINK_LIBRARIES(pcretest ${PCRETEST_LIBS})
IF(PCRE_SUPPORT_JIT)
ADD_EXECUTABLE(pcre_jit_test pcre_jit_test.c)
SET(targets ${targets} pcre_jit_test)
SET(PCRE_JIT_TEST_LIBS )
IF(PCRE_BUILD_PCRE8)
LIST(APPEND PCRE_JIT_TEST_LIBS pcre)
ENDIF(PCRE_BUILD_PCRE8)
IF(PCRE_BUILD_PCRE16)
LIST(APPEND PCRE_JIT_TEST_LIBS pcre16)
ENDIF(PCRE_BUILD_PCRE16)
IF(PCRE_BUILD_PCRE32)
LIST(APPEND PCRE_JIT_TEST_LIBS pcre32)
ENDIF(PCRE_BUILD_PCRE32)
TARGET_LINK_LIBRARIES(pcre_jit_test ${PCRE_JIT_TEST_LIBS})
ENDIF(PCRE_SUPPORT_JIT)
IF(PCRE_BUILD_PCRECPP) IF(PCRE_BUILD_PCRECPP)
ADD_EXECUTABLE(pcrecpp_unittest pcrecpp_unittest.cc) ADD_EXECUTABLE(pcrecpp_unittest pcrecpp_unittest.cc)
@ -432,7 +740,6 @@ IF(PCRE_BUILD_TESTS)
) )
ENDIF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC) ENDIF(MINGW AND NON_STANDARD_LIB_NAMES AND NOT PCRE_STATIC)
ADD_EXECUTABLE(pcre_scanner_unittest pcre_scanner_unittest.cc) ADD_EXECUTABLE(pcre_scanner_unittest pcre_scanner_unittest.cc)
SET(targets ${targets} pcre_scanner_unittest) SET(targets ${targets} pcre_scanner_unittest)
TARGET_LINK_LIBRARIES(pcre_scanner_unittest pcrecpp) TARGET_LINK_LIBRARIES(pcre_scanner_unittest pcrecpp)
@ -442,42 +749,101 @@ IF(PCRE_BUILD_TESTS)
TARGET_LINK_LIBRARIES(pcre_stringpiece_unittest pcrecpp) TARGET_LINK_LIBRARIES(pcre_stringpiece_unittest pcrecpp)
ENDIF(PCRE_BUILD_PCRECPP) ENDIF(PCRE_BUILD_PCRECPP)
GET_TARGET_PROPERTY(PCREGREP_EXE pcregrep DEBUG_LOCATION) # exes in Debug location tested by the RunTest shell script
# via "make test"
IF(PCRE_BUILD_PCREGREP)
GET_TARGET_PROPERTY(PCREGREP_EXE pcregrep DEBUG_LOCATION)
ENDIF(PCRE_BUILD_PCREGREP)
GET_TARGET_PROPERTY(PCRETEST_EXE pcretest DEBUG_LOCATION) GET_TARGET_PROPERTY(PCRETEST_EXE pcretest DEBUG_LOCATION)
# Write out a CTest configuration file that sets some needed environment # =================================================
# variables for the test scripts. # Write out a CTest configuration file
# #
FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest FILE(WRITE ${PROJECT_BINARY_DIR}/CTestCustom.ctest
"# This is a generated file. "# This is a generated file.
SET(ENV{srcdir} ${PROJECT_SOURCE_DIR}) MESSAGE(\"When testing is complete, review test output in the
SET(ENV{pcregrep} ${PCREGREP_EXE}) \\\"${PROJECT_BINARY_DIR}/Testing/Temporary\\\" folder.\")
SET(ENV{pcretest} ${PCRETEST_EXE}) MESSAGE(\" \")
") ")
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_test.sh
"#! /bin/sh
# This is a generated file.
srcdir=${PROJECT_SOURCE_DIR}
pcretest=${PCRETEST_EXE}
source ${PROJECT_SOURCE_DIR}/RunTest
if test \"$?\" != \"0\"; then exit 1; fi
# End
")
IF(UNIX) IF(UNIX)
ADD_TEST(pcre_test ${PROJECT_SOURCE_DIR}/RunTest) ADD_TEST(pcre_test sh ${PROJECT_BINARY_DIR}/pcre_test.sh)
ADD_TEST(pcre_grep_test ${PROJECT_SOURCE_DIR}/RunGrepTest)
ENDIF(UNIX) ENDIF(UNIX)
IF(PCRE_BUILD_PCREGREP)
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_grep_test.sh
"#! /bin/sh
# This is a generated file.
srcdir=${PROJECT_SOURCE_DIR}
pcregrep=${PCREGREP_EXE}
pcretest=${PCRETEST_EXE}
source ${PROJECT_SOURCE_DIR}/RunGrepTest
if test \"$?\" != \"0\"; then exit 1; fi
# End
")
IF(UNIX)
ADD_TEST(pcre_grep_test sh ${PROJECT_BINARY_DIR}/pcre_grep_test.sh)
ENDIF(UNIX)
ENDIF(PCRE_BUILD_PCREGREP)
IF(WIN32) IF(WIN32)
ADD_TEST(pcre_test cmd /C ${PROJECT_SOURCE_DIR}/RunTest.bat) # Provide environment for executing the bat file version of RunTest
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} winsrc)
FILE(TO_NATIVE_PATH ${PROJECT_BINARY_DIR} winbin)
FILE(TO_NATIVE_PATH ${PCRETEST_EXE} winexe)
FILE(WRITE ${PROJECT_BINARY_DIR}/pcre_test.bat
"\@REM This is a generated file.
\@echo off
setlocal
SET srcdir=\"${winsrc}\"
SET pcretest=\"${winexe}\"
if not [%CMAKE_CONFIG_TYPE%]==[] SET pcretest=\"${winbin}\\%CMAKE_CONFIG_TYPE%\\pcretest.exe\"
call %srcdir%\\RunTest.Bat
if errorlevel 1 exit /b 1
echo RunTest.bat tests successfully completed
")
ADD_TEST(NAME pcre_test_bat
COMMAND pcre_test.bat)
SET_TESTS_PROPERTIES(pcre_test_bat PROPERTIES
PASS_REGULAR_EXPRESSION "RunTest\\.bat tests successfully completed")
IF("$ENV{OSTYPE}" STREQUAL "msys")
# Both the sh and bat file versions of RunTest are run if make test is used
# in msys
ADD_TEST(pcre_test_sh sh.exe ${PROJECT_BINARY_DIR}/pcre_test.sh)
IF(PCRE_BUILD_PCREGREP)
ADD_TEST(pcre_grep_test sh.exe ${PROJECT_BINARY_DIR}/pcre_grep_test.sh)
ENDIF(PCRE_BUILD_PCREGREP)
ENDIF("$ENV{OSTYPE}" STREQUAL "msys")
ENDIF(WIN32) ENDIF(WIN32)
GET_TARGET_PROPERTY(PCRECPP_UNITTEST_EXE # Changed to accommodate testing whichever location was just built
pcrecpp_unittest
DEBUG_LOCATION)
GET_TARGET_PROPERTY(PCRE_SCANNER_UNITTEST_EXE IF(PCRE_SUPPORT_JIT)
pcre_scanner_unittest ADD_TEST(pcre_jit_test pcre_jit_test)
DEBUG_LOCATION) ENDIF(PCRE_SUPPORT_JIT)
GET_TARGET_PROPERTY(PCRE_STRINGPIECE_UNITTEST_EXE IF(PCRE_BUILD_PCRECPP)
pcre_stringpiece_unittest ADD_TEST(pcrecpp_test pcrecpp_unittest)
DEBUG_LOCATION) ADD_TEST(pcre_scanner_test pcre_scanner_unittest)
ADD_TEST(pcre_stringpiece_test pcre_stringpiece_unittest)
ENDIF(PCRE_BUILD_PCRECPP)
ADD_TEST(pcrecpp_test ${PCRECPP_UNITTEST_EXE})
ADD_TEST(pcre_scanner_test ${PCRE_SCANNER_UNITTEST_EXE})
ADD_TEST(pcre_stringpiece_test ${PCRE_STRINGPIECE_UNITTEST_EXE})
ENDIF(PCRE_BUILD_TESTS) ENDIF(PCRE_BUILD_TESTS)
# Installation # Installation
@ -507,7 +873,6 @@ ELSE(PCRE_BUILD_PCRECPP)
SET(man3 ${man3_new}) SET(man3 ${man3_new})
ENDIF(PCRE_BUILD_PCRECPP) ENDIF(PCRE_BUILD_PCRECPP)
INSTALL(FILES ${man1} DESTINATION man/man1) INSTALL(FILES ${man1} DESTINATION man/man1)
INSTALL(FILES ${man3} DESTINATION man/man3) INSTALL(FILES ${man3} DESTINATION man/man3)
INSTALL(FILES ${html} DESTINATION share/doc/pcre/html) INSTALL(FILES ${html} DESTINATION share/doc/pcre/html)
@ -537,12 +902,17 @@ IF(PCRE_SHOW_REPORT)
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}") MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
MESSAGE(STATUS " C++ compiler flags .............. : ${CMAKE_CXX_FLAGS}${cxxfsp}${CMAKE_CXX_FLAGS_${buildtype}}") MESSAGE(STATUS " C++ compiler flags .............. : ${CMAKE_CXX_FLAGS}${cxxfsp}${CMAKE_CXX_FLAGS_${buildtype}}")
MESSAGE(STATUS "") MESSAGE(STATUS "")
MESSAGE(STATUS " Build 8 bit PCRE library ........ : ${PCRE_BUILD_PCRE8}")
MESSAGE(STATUS " Build 16 bit PCRE library ....... : ${PCRE_BUILD_PCRE16}")
MESSAGE(STATUS " Build 32 bit PCRE library ....... : ${PCRE_BUILD_PCRE32}")
MESSAGE(STATUS " Build C++ library ............... : ${PCRE_BUILD_PCRECPP}") MESSAGE(STATUS " Build C++ library ............... : ${PCRE_BUILD_PCRECPP}")
MESSAGE(STATUS " Enable UTF-8 support ............ : ${PCRE_SUPPORT_UNICODE_PROPERTIES}") MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE_SUPPORT_JIT}")
MESSAGE(STATUS " Enable UTF support .............. : ${PCRE_SUPPORT_UTF}")
MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}") MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}") MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}")
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}") MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE_EBCDIC}") MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE_EBCDIC}")
MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE_EBCDIC_NL25}")
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}") MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}")
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}") MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}") MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
@ -552,22 +922,33 @@ IF(PCRE_SHOW_REPORT)
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}") MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}") MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
MESSAGE(STATUS " Build tests (implies pcretest) .. : ${PCRE_BUILD_TESTS}") MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
MESSAGE(STATUS " Build tests (implies pcretest .. : ${PCRE_BUILD_TESTS}")
MESSAGE(STATUS " and pcregrep)")
IF(ZLIB_FOUND) IF(ZLIB_FOUND)
MESSAGE(STATUS " Link pcregrep with libz ......... : ${PCRE_SUPPORT_LIBZ}") MESSAGE(STATUS " Link pcregrep with libz ......... : ${PCRE_SUPPORT_LIBZ}")
ELSE(ZLIB_FOUND) ELSE(ZLIB_FOUND)
MESSAGE(STATUS " Link pcregrep with libz ......... : None" ) MESSAGE(STATUS " Link pcregrep with libz ......... : Library not found" )
ENDIF(ZLIB_FOUND) ENDIF(ZLIB_FOUND)
IF(BZIP2_FOUND) IF(BZIP2_FOUND)
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : ${PCRE_SUPPORT_LIBBZ2}") MESSAGE(STATUS " Link pcregrep with libbz2 ....... : ${PCRE_SUPPORT_LIBBZ2}")
ELSE(BZIP2_FOUND) ELSE(BZIP2_FOUND)
MESSAGE(STATUS " Link pcregrep with libbz2 ....... : None" ) MESSAGE(STATUS " Link pcregrep with libbz2 ....... : Library not found" )
ENDIF(BZIP2_FOUND) ENDIF(BZIP2_FOUND)
IF(NOT PCRE_SUPPORT_LIBREADLINE) IF(EDITLINE_FOUND)
MESSAGE(STATUS " Link pcretest with libreadline .. : None" ) MESSAGE(STATUS " Link pcretest with libeditline .. : ${PCRE_SUPPORT_LIBEDIT}")
ELSE(NOT PCRE_SUPPORT_LIBREADLINE) ELSE(EDITLINE_FOUND)
MESSAGE(STATUS " Link pcretest with libeditline .. : Library not found" )
ENDIF(EDITLINE_FOUND)
IF(READLINE_FOUND)
MESSAGE(STATUS " Link pcretest with libreadline .. : ${PCRE_SUPPORT_LIBREADLINE}") MESSAGE(STATUS " Link pcretest with libreadline .. : ${PCRE_SUPPORT_LIBREADLINE}")
ENDIF(NOT PCRE_SUPPORT_LIBREADLINE) ELSE(READLINE_FOUND)
MESSAGE(STATUS " Link pcretest with libreadline .. : Library not found" )
ENDIF(READLINE_FOUND)
MESSAGE(STATUS " Support Valgrind .................: ${PCRE_SUPPORT_VALGRIND}")
MESSAGE(STATUS " Support coverage .................: ${PCRE_SUPPORT_COVERAGE}")
IF(MINGW AND NOT PCRE_STATIC) IF(MINGW AND NOT PCRE_STATIC)
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}") MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}") MESSAGE(STATUS " Non-standard dll names (suffix) . : ${NON_STANDARD_LIB_SUFFIX}")

File diff suppressed because it is too large Load Diff

67
tools/pcre/CheckMan Normal file
View File

@ -0,0 +1,67 @@
#! /usr/bin/perl
# A script to scan PCRE's man pages to check for typos in the control
# sequences. I use only a small set of the available repertoire, so it is
# straightforward to check that nothing else has slipped in by mistake. This
# script should be called in the doc directory.
$yield = 0;
while (scalar(@ARGV) > 0)
{
$line = 0;
$file = shift @ARGV;
open (IN, $file) || die "Failed to open $file\n";
while (<IN>)
{
$line++;
if (/^\s*$/)
{
printf "Empty line $line of $file\n";
$yield = 1;
}
elsif (/^\./)
{
if (!/^\.\s*$|
^\.B\s+\S|
^\.TH\s\S|
^\.SH\s\S|
^\.SS\s\S|
^\.TP(?:\s\d+)?\s*$|
^\.ti\s\S|
^\.SM\s*$|
^\.rs\s*$|
^\.sp\s*$|
^\.nf\s*$|
^\.fi\s*$|
^\.P\s*$|
^\.PP\s*$|
^\.\\"(?:\ HREF)?\s*$|
^\.\\"\sHTML\s<a\shref="[^"]+?">\s*$|
^\.\\"\sHTML\s<a\sname="[^"]+?"><\/a>\s*$|
^\.\\"\s<\/a>\s*$|
^\.\\"\sJOINSH\s*$|
^\.\\"\sJOIN\s*$/x
)
{
printf "Bad control line $line of $file\n";
$yield = 1;
}
}
else
{
if (/\\[^ef]|\\f[^IBP]/)
{
printf "Bad backslash in line $line of $file\n";
$yield = 1;
}
}
}
close(IN);
}
exit $yield;
# End

View File

@ -2,7 +2,9 @@ Technical Notes about PCRE
-------------------------- --------------------------
These are very rough technical notes that record potentially useful information These are very rough technical notes that record potentially useful information
about PCRE internals. about PCRE internals. For information about testing PCRE, see the pcretest
documentation and the comment at the head of the RunTest file.
Historical note 1 Historical note 1
----------------- -----------------
@ -22,6 +24,7 @@ the one matching the longest subset of the subject string was chosen. This did
not necessarily maximize the individual wild portions of the pattern, as is not necessarily maximize the individual wild portions of the pattern, as is
expected in Unix and Perl-style regular expressions. expected in Unix and Perl-style regular expressions.
Historical note 2 Historical note 2
----------------- -----------------
@ -34,6 +37,7 @@ maximizing (or, optionally, minimizing in Perl) the amount of the subject that
matches individual wild portions of the pattern. This is an "NFA algorithm" in matches individual wild portions of the pattern. This is an "NFA algorithm" in
Friedl's terminology. Friedl's terminology.
OK, here's the real stuff OK, here's the real stuff
------------------------- -------------------------
@ -44,6 +48,20 @@ in the pattern, to save on compiling time. However, because of the greater
complexity in Perl regular expressions, I couldn't do this. In any case, a complexity in Perl regular expressions, I couldn't do this. In any case, a
first pass through the pattern is helpful for other reasons. first pass through the pattern is helpful for other reasons.
Support for 16-bit and 32-bit data strings
-------------------------------------------
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
in any combination of 8-bit, 16-bit or 32-bit modes, creating different
libraries. In the description that follows, the word "short" is
used for a 16-bit data quantity, and the word "unit" is used for a quantity
that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
integer in 32-bit mode. However, so as not to over-complicate the text, the
names of PCRE functions are given in 8-bit form only.
Computing the memory requirement: how it was Computing the memory requirement: how it was
-------------------------------------------- --------------------------------------------
@ -54,6 +72,7 @@ idea was that this would turn out faster than the Henry Spencer code because
the first pass is degenerate and the second pass can just store stuff straight the first pass is degenerate and the second pass can just store stuff straight
into the vector, which it knows is big enough. into the vector, which it knows is big enough.
Computing the memory requirement: how it is Computing the memory requirement: how it is
------------------------------------------- -------------------------------------------
@ -63,26 +82,31 @@ things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
I had a flash of inspiration as to how I could run the real compile function in I had a flash of inspiration as to how I could run the real compile function in
a "fake" mode that enables it to compute how much memory it would need, while a "fake" mode that enables it to compute how much memory it would need, while
actually only ever using a few hundred bytes of working memory, and without too actually only ever using a few hundred bytes of working memory, and without too
many tests of the mode that might slow it down. So I re-factored the compiling many tests of the mode that might slow it down. So I refactored the compiling
functions to work this way. This got rid of about 600 lines of source. It functions to work this way. This got rid of about 600 lines of source. It
should make future maintenance and development easier. As this was such a major should make future maintenance and development easier. As this was such a major
change, I never released 6.8, instead upping the number to 7.0 (other quite change, I never released 6.8, instead upping the number to 7.0 (other quite
major changes are also present in the 7.0 release). major changes were also present in the 7.0 release).
A side effect of this work is that the previous limit of 200 on the nesting A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there is a downside: pcre_compile() depth of parentheses was removed. However, there is a downside: pcre_compile()
runs more slowly than before (30% or more, depending on the pattern) because it runs more slowly than before (30% or more, depending on the pattern) because it
is doing a full analysis of the pattern. My hope is that this is not a big is doing a full analysis of the pattern. My hope was that this would not be a
issue. big issue, and in the event, nobody has commented on it.
Traditional matching function Traditional matching function
----------------------------- -----------------------------
The "traditional", and original, matching function is called pcre_exec(), and The "traditional", and original, matching function is called pcre_exec(), and
it implements an NFA algorithm, similar to the original Henry Spencer algorithm it implements an NFA algorithm, similar to the original Henry Spencer algorithm
and the way that Perl works. Not surprising, since it is intended to be as and the way that Perl works. This is not surprising, since it is intended to be
compatible with Perl as possible. This is the function most users of PCRE will as compatible with Perl as possible. This is the function most users of PCRE
use most of the time. will use most of the time. From release 8.20, if PCRE is compiled with
just-in-time (JIT) support, and studying a compiled pattern with JIT is
successful, the JIT code is run instead of the normal pcre_exec() code, but the
result is the same.
Supplementary matching function Supplementary matching function
------------------------------- -------------------------------
@ -101,28 +125,39 @@ needed at compile time to produce a traditional FSM where only one state is
ever active at once. I believe some other regex matchers work this way. ever active at once. I believe some other regex matchers work this way.
Changeable options
------------------
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
change in the middle of patterns. From PCRE 8.13, their processing is handled
entirely at compile time by generating different opcodes for the different
settings. The runtime functions do not need to keep track of an options state
any more.
Format of compiled patterns Format of compiled patterns
--------------------------- ---------------------------
The compiled form of a pattern is a vector of bytes, containing items of The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
variable length. The first byte in an item is an opcode, and the length of the shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
item is either implicit in the opcode or contained in the data bytes that items of variable length. The first unit in an item contains an opcode, and
follow it. the length of the item is either implicit in the opcode or contained in the
data that follows it.
In many cases below LINK_SIZE data values are specified for offsets within the In many cases listed below, LINK_SIZE data values are specified for offsets
compiled pattern. The default value for LINK_SIZE is 2, but PCRE can be within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
compiled to use 3-byte or 4-byte values for these offsets (impairing the default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
performance). This is necessary only when patterns whose compiled length is 4-byte values for these offsets, although this impairs the performance. (3-byte
greater than 64K are going to be processed. In this description, we assume the LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
"normal" compilation options. Data values that are counts (e.g. for larger than 2 is necessary only when patterns whose compiled length is greater
quantifiers) are always just two bytes long. than 64K are going to be processed. In this description, we assume the "normal"
compilation options. Data values that are counts (e.g. for quantifiers) are
A list of the opcodes follows: always just two bytes long (one short in 16-bit mode).
Opcodes with no following data Opcodes with no following data
------------------------------ ------------------------------
These items are all just one byte long These items are all just one unit long
OP_END end of pattern OP_END end of pattern
OP_ANY match any one character other than newline OP_ANY match any one character other than newline
@ -131,7 +166,8 @@ These items are all just one byte long
OP_SOD match start of data: \A OP_SOD match start of data: \A
OP_SOM, start of match (subject + offset): \G OP_SOM, start of match (subject + offset): \G
OP_SET_SOM, set start of match (\K) OP_SET_SOM, set start of match (\K)
OP_CIRC ^ (start of data, or after \n in multiline) OP_CIRC ^ (start of data)
OP_CIRCM ^ multiline mode (start of data or after newline)
OP_NOT_WORD_BOUNDARY \W OP_NOT_WORD_BOUNDARY \W
OP_WORD_BOUNDARY \w OP_WORD_BOUNDARY \w
OP_NOT_DIGIT \D OP_NOT_DIGIT \D
@ -146,48 +182,71 @@ These items are all just one byte long
OP_WORDCHAR \w OP_WORDCHAR \w
OP_EODN match end of data or \n at end: \Z OP_EODN match end of data or \n at end: \Z
OP_EOD match end of data: \z OP_EOD match end of data: \z
OP_DOLL $ (end of data, or before \n in multiline) OP_DOLL $ (end of data, or before final newline)
OP_DOLLM $ multiline mode (end of data or before newline)
OP_EXTUNI match an extended Unicode character OP_EXTUNI match an extended Unicode character
OP_ANYNL match any Unicode newline sequence OP_ANYNL match any Unicode newline sequence
OP_ACCEPT ) OP_ACCEPT ) These are Perl 5.10's "backtracking control
OP_COMMIT ) OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
OP_FAIL ) These are Perl 5.10's "backtracking OP_FAIL ) parentheses, it may be preceded by one or more
OP_PRUNE ) control verbs". OP_PRUNE ) OP_CLOSE, followed by a 2-byte number,
OP_SKIP ) OP_SKIP ) indicating which parentheses must be closed.
OP_THEN )
Backtracking control verbs with (optional) data
-----------------------------------------------
(*THEN) without an argument generates the opcode OP_THEN and no following data.
OP_MARK is followed by the mark name, preceded by a one-unit length, and
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
following in the same format.
Matching literal characters
---------------------------
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
the character may be more than one unit long. In UTF-32 mode, characters
are always exactly one unit long.
Repeating single characters Repeating single characters
--------------------------- ---------------------------
The common repeats (*, +, ?) when applied to a single character use the The common repeats (*, +, ?), when applied to a single character, use the
following opcodes: following opcodes, which come in caseful and caseless versions:
OP_STAR Caseful Caseless
OP_MINSTAR OP_STAR OP_STARI
OP_POSSTAR OP_MINSTAR OP_MINSTARI
OP_PLUS OP_POSSTAR OP_POSSTARI
OP_MINPLUS OP_PLUS OP_PLUSI
OP_POSPLUS OP_MINPLUS OP_MINPLUSI
OP_QUERY OP_POSPLUS OP_POSPLUSI
OP_MINQUERY OP_QUERY OP_QUERYI
OP_POSQUERY OP_MINQUERY OP_MINQUERYI
OP_POSQUERY OP_POSQUERYI
In ASCII mode, these are two-byte items; in UTF-8 mode, the length is variable. Each opcode is followed by the character that is to be repeated. In ASCII mode,
Those with "MIN" in their name are the minimizing versions. Those with "POS" in these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
their names are possessive versions. Each is followed by the character that is UTF-32 mode these are one-unit items.
to be repeated. Other repeats make use of Those with "MIN" in their names are the minimizing versions. Those with "POS"
in their names are possessive versions. Other repeats make use of these
opcodes:
OP_UPTO Caseful Caseless
OP_MINUPTO OP_UPTO OP_UPTOI
OP_POSUPTO OP_MINUPTO OP_MINUPTOI
OP_EXACT OP_POSUPTO OP_POSUPTOI
OP_EXACT OP_EXACTI
which are followed by a two-byte count (most significant first) and the Each of these is followed by a two-byte (one short) count (most significant
repeated character. OP_UPTO matches from 0 to the given number. A repeat with a byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an 0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
OP_UPTO (or OP_MINUPTO or OPT_POSUPTO). coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
Repeating character types Repeating character types
@ -195,7 +254,7 @@ Repeating character types
Repeats of things like \d are done exactly as for single characters, except Repeats of things like \d are done exactly as for single characters, except
that instead of a character, the opcode for the type is stored in the data that instead of a character, the opcode for the type is stored in the data
byte. The opcodes are: unit. The opcodes are:
OP_TYPESTAR OP_TYPESTAR
OP_TYPEMINSTAR OP_TYPEMINSTAR
@ -217,65 +276,58 @@ Match by Unicode property
OP_PROP and OP_NOTPROP are used for positive and negative matches of a OP_PROP and OP_NOTPROP are used for positive and negative matches of a
character by testing its Unicode property (the \p and \P escape sequences). character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two bytes that encode the desired property as a type and a Each is followed by two units that encode the desired property as a type and a
value. value.
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three bytes: OP_PROP or OP_NOTPROP and then the desired property type and three units: OP_PROP or OP_NOTPROP, and then the desired property type and
value. value.
Matching literal characters
---------------------------
The OP_CHAR opcode is followed by a single character that is to be matched
casefully. For caseless matching, OP_CHARNC is used. In UTF-8 mode, the
character may be more than one byte long. (Earlier versions of PCRE used
multi-character strings, but this was changed to allow some new features to be
added.)
Character classes Character classes
----------------- -----------------
If there is only one character, OP_CHAR or OP_CHARNC is used for a positive If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
class, and OP_NOT for a negative one (that is, for something like [^a]). positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
However, in UTF-8 mode, the use of OP_NOT applies only to characters with something like [^a]).
values < 128, because OP_NOT is confined to single bytes.
Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a repeated, Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
negated, single-character class. The normal ones (OP_STAR etc.) are used for a repeated, negated, single-character classes. The normal single-character
repeated positive single-character class. opcodes (OP_STAR, etc.) are used for repeated positive single-character
classes.
When there's more than one character in a class and all the characters are less When there is more than one character in a class and all the characters are
than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a negative less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
one. In either case, the opcode is followed by a 32-byte bit map containing a 1 negative one. In either case, the opcode is followed by a 32-byte (16-short)
bit for every character that is acceptable. The bits are counted from the least bit map containing a 1 bit for every character that is acceptable. The bits are
significant end of each byte. counted from the least significant end of each unit. In caseless mode, bits for
both cases are set.
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 mode, The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
subject characters with values greater than 256 can be handled correctly. For subject characters with values greater than 255 can be handled correctly. For
OP_CLASS they don't match, whereas for OP_NCLASS they do. OP_CLASS they do not match, whereas for OP_NCLASS they do.
For classes containing characters with values > 255, OP_XCLASS is used. It For classes containing characters with values greater than 255, OP_XCLASS is
optionally uses a bit map (if any characters lie within it), followed by a list used. It optionally uses a bit map (if any characters lie within it), followed
of pairs and single characters. There is a flag character than indicates by a list of pairs (for a range) and single characters. In caseless mode, both
whether it's a positive or a negative class. cases are explicitly listed. There is a flag character than indicates whether
it is a positive or a negative class.
Back references Back references
--------------- ---------------
OP_REF is followed by two bytes containing the reference number. OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
containing the reference number.
Repeating character classes and back references Repeating character classes and back references
----------------------------------------------- -----------------------------------------------
Single-character classes are handled specially (see above). This section Single-character classes are handled specially (see above). This section
applies to OP_CLASS and OP_REF. In both cases, the repeat information follows applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
the base item. The matching code looks at the following opcode to see if it is follows the base item. The matching code looks at the following opcode to see
one of if it is one of
OP_CRSTAR OP_CRSTAR
OP_CRMINSTAR OP_CRMINSTAR
@ -286,10 +338,10 @@ one of
OP_CRRANGE OP_CRRANGE
OP_CRMINRANGE OP_CRMINRANGE
All but the last two are just single-byte items. The others are followed by All but the last two are just single-unit items. The others are followed by
four bytes of data, comprising the minimum and maximum repeat counts. There are four bytes (two shorts) of data, comprising the minimum and maximum repeat
no special possessive opcodes for these repeats; a possessive repeat is counts. There are no special possessive opcodes for these repeats; a possessive
compiled into an atomic group. repeat is compiled into an atomic group.
Brackets and alternation Brackets and alternation
@ -299,7 +351,8 @@ A pair of non-capturing (round) brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets. compile time, so alternation always happens in the context of brackets.
[Note for North Americans: "bracket" to some English speakers, including [Note for North Americans: "bracket" to some English speakers, including
myself, can be round, square, curly, or pointy. Hence this usage.] myself, can be round, square, curly, or pointy. Hence this usage rather than
"parentheses".]
Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99 Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
capturing brackets and it used a different opcode for each one. From release capturing brackets and it used a different opcode for each one. From release
@ -311,16 +364,17 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
next alternative OP_ALT or, if there aren't any branches, to the matching next alternative OP_ALT or, if there aren't any branches, to the matching
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
the next one, or to the OP_KET opcode. For capturing brackets, the bracket the next one, or to the OP_KET opcode. For capturing brackets, the bracket
number immediately follows the offset, always as a 2-byte item. number immediately follows the offset, always as a 2-byte (one short) item.
OP_KET is used for subpatterns that do not repeat indefinitely, while OP_KET is used for subpatterns that do not repeat indefinitely, and
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
maximally respectively. All three are followed by LINK_SIZE bytes giving (as a maximally respectively (see below for possessive repetitions). All three are
positive number) the offset back to the matching bracket opcode. followed by LINK_SIZE bytes giving (as a positive number) the offset back to
the matching bracket opcode.
If a subpattern is quantified such that it is permitted to match zero times, it If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-byte opcodes that tell the matcher that skipping the following single-unit opcodes that tell the matcher that skipping the following
subpattern entirely is a valid branch. In the case of the first two, not subpattern entirely is a valid branch. In the case of the first two, not
skipping the pattern is also valid (greedy and non-greedy). The third is used skipping the pattern is also valid (greedy and non-greedy). The third is used
when a pattern has the quantifier {0,0}. It cannot be entirely discarded, when a pattern has the quantifier {0,0}. It cannot be entirely discarded,
@ -343,6 +397,15 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop. OP_KETRMAX, and if so, to break the loop.
Possessive brackets
-------------------
When a repeated group (capturing or non-capturing) is marked as possessive by
the "+" notation, e.g. (abc)++, different opcodes are used. Their names all
have POS on the end, e.g. OP_BRAPOS instead of OP_BRA and OP_SCPBRPOS instead
of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
repetition is zero, the group is preceded by OP_BRAPOSZERO.
Assertions Assertions
---------- ----------
@ -350,11 +413,12 @@ Assertions
Forward assertions are just like other subpatterns, but starting with one of Forward assertions are just like other subpatterns, but starting with one of
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
is OP_REVERSE, followed by a two byte count of the number of characters to move is OP_REVERSE, followed by a two byte (one short) count of the number of
back the pointer in the subject string. When operating in UTF-8 mode, the count characters to move back the pointer in the subject string. In ASCII mode, the
is a character count rather than a byte count. A separate count is present in count is a number of units, but in UTF-8/16 mode each character may occupy more
each alternative of a lookbehind assertion, allowing them to have different than one unit; in UTF-32 mode each character occupies exactly one unit.
fixed lengths. A separate count is present in each alternative of a lookbehind
assertion, allowing them to have different fixed lengths.
Once-only (atomic) subpatterns Once-only (atomic) subpatterns
@ -371,13 +435,17 @@ Conditional subpatterns
These are like other subpatterns, but they start with the opcode OP_COND, or These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat. If OP_SCOND for one that might match an empty string in an unbounded repeat. If
the condition is a back reference, this is stored at the start of the the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by two bytes containing the subpattern using the opcode OP_CREF followed by two bytes (one short)
reference number. If the condition is "in recursion" (coded as "(?(R)"), or "in containing the reference number. OP_NCREF is used instead if the reference was
recursion of group x" (coded as "(?(Rx)"), the group number is stored at the generated by name (so that the runtime code knows to check for duplicate
start of the subpattern using the opcode OP_RREF, and a value of zero for "the names).
whole pattern". For a DEFINE condition, just the single byte OP_DEF is used (it
has no associated data). Otherwise, a conditional subpattern always starts with If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
one of the assertions. group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
zero for "the whole pattern". For a DEFINE condition, just the single unit
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
always starts with one of the assertions.
Recursion Recursion
@ -394,25 +462,12 @@ are not strictly a recursion.
Callout Callout
------- -------
OP_CALLOUT is followed by one byte of data that holds a callout number in the OP_CALLOUT is followed by one unit of data that holds a callout number in the
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
cases there follows a two-byte value giving the offset in the pattern to the cases there follows a two-byte (one short) value giving the offset in the
start of the following item, and another two-byte item giving the length of the pattern to the start of the following item, and another two-byte (one short)
next item. item giving the length of the next item.
Changing options
----------------
If any of the /i, /m, or /s options are changed within a pattern, an OP_OPT
opcode is compiled, followed by one byte containing the new settings of these
flags. If there are several alternatives, there is an occurrence of OP_OPT at
the start of all those following the first options change, to set appropriate
options for the start of the alternative. Immediately after the end of the
group there is another such item to reset the flags to their previous values. A
change of flag right at the very start of the pattern can be handled entirely
at compile time, and so does not cause anything to be put into the compiled
data.
Philip Hazel Philip Hazel
April 2008 February 2012

View File

@ -1,11 +1,13 @@
Installation Instructions Installation Instructions
************************* *************************
Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation,
2006, 2007, 2008 Free Software Foundation, Inc. Inc.
This file is free documentation; the Free Software Foundation gives Copying and distribution of this file, with or without modification,
unlimited permission to copy, distribute and modify it. are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. This file is offered as-is,
without warranty of any kind.
Basic Installation Basic Installation
================== ==================
@ -13,7 +15,11 @@ Basic Installation
Briefly, the shell commands `./configure; make; make install' should Briefly, the shell commands `./configure; make; make install' should
configure, build, and install this package. The following configure, build, and install this package. The following
more-detailed instructions are generic; see the `README' file for more-detailed instructions are generic; see the `README' file for
instructions specific to this package. instructions specific to this package. Some packages provide this
`INSTALL' file but do not implement all of the features documented
below. The lack of an optional feature in a given package is not
necessarily a bug. More recommendations for GNU packages can be found
in *note Makefile Conventions: (standards)Makefile Conventions.
The `configure' shell script attempts to guess correct values for The `configure' shell script attempts to guess correct values for
various system-dependent variables used during compilation. It uses various system-dependent variables used during compilation. It uses
@ -42,7 +48,7 @@ may remove or edit it.
you want to change it or regenerate `configure' using a newer version you want to change it or regenerate `configure' using a newer version
of `autoconf'. of `autoconf'.
The simplest way to compile this package is: The simplest way to compile this package is:
1. `cd' to the directory containing the package's source code and type 1. `cd' to the directory containing the package's source code and type
`./configure' to configure the package for your system. `./configure' to configure the package for your system.
@ -53,12 +59,22 @@ The simplest way to compile this package is:
2. Type `make' to compile the package. 2. Type `make' to compile the package.
3. Optionally, type `make check' to run any self-tests that come with 3. Optionally, type `make check' to run any self-tests that come with
the package. the package, generally using the just-built uninstalled binaries.
4. Type `make install' to install the programs and any data files and 4. Type `make install' to install the programs and any data files and
documentation. documentation. When installing into a prefix owned by root, it is
recommended that the package be configured and built as a regular
user, and only the `make install' phase executed with root
privileges.
5. You can remove the program binaries and object files from the 5. Optionally, type `make installcheck' to repeat any self-tests, but
this time using the binaries in their final installed location.
This target does not install anything. Running this target as a
regular user, particularly if the prior `make install' required
root privileges, verifies that the installation completed
correctly.
6. You can remove the program binaries and object files from the
source code directory by typing `make clean'. To also remove the source code directory by typing `make clean'. To also remove the
files that `configure' created (so you can compile the package for files that `configure' created (so you can compile the package for
a different kind of computer), type `make distclean'. There is a different kind of computer), type `make distclean'. There is
@ -67,8 +83,15 @@ The simplest way to compile this package is:
all sorts of other programs in order to regenerate files that came all sorts of other programs in order to regenerate files that came
with the distribution. with the distribution.
6. Often, you can also type `make uninstall' to remove the installed 7. Often, you can also type `make uninstall' to remove the installed
files again. files again. In practice, not all packages have tested that
uninstallation works correctly, even though it is required by the
GNU Coding Standards.
8. Some packages, particularly those that use Automake, provide `make
distcheck', which can by used by developers to test that all other
targets like `make install' and `make uninstall' work correctly.
This target is generally not run by end users.
Compilers and Options Compilers and Options
===================== =====================
@ -93,7 +116,8 @@ same time, by placing the object files for each architecture in their
own directory. To do this, you can use GNU `make'. `cd' to the own directory. To do this, you can use GNU `make'. `cd' to the
directory where you want the object files and executables to go and run directory where you want the object files and executables to go and run
the `configure' script. `configure' automatically checks for the the `configure' script. `configure' automatically checks for the
source code in the directory that `configure' is in and in `..'. source code in the directory that `configure' is in and in `..'. This
is known as a "VPATH" build.
With a non-GNU `make', it is safer to compile the package for one With a non-GNU `make', it is safer to compile the package for one
architecture at a time in the source code directory. After you have architecture at a time in the source code directory. After you have
@ -120,7 +144,8 @@ Installation Names
By default, `make install' installs the package's commands under By default, `make install' installs the package's commands under
`/usr/local/bin', include files under `/usr/local/include', etc. You `/usr/local/bin', include files under `/usr/local/include', etc. You
can specify an installation prefix other than `/usr/local' by giving can specify an installation prefix other than `/usr/local' by giving
`configure' the option `--prefix=PREFIX'. `configure' the option `--prefix=PREFIX', where PREFIX must be an
absolute file name.
You can specify separate installation prefixes for You can specify separate installation prefixes for
architecture-specific files and architecture-independent files. If you architecture-specific files and architecture-independent files. If you
@ -131,15 +156,46 @@ Documentation and other data files still use the regular prefix.
In addition, if you use an unusual directory layout you can give In addition, if you use an unusual directory layout you can give
options like `--bindir=DIR' to specify different values for particular options like `--bindir=DIR' to specify different values for particular
kinds of files. Run `configure --help' for a list of the directories kinds of files. Run `configure --help' for a list of the directories
you can set and what kinds of files go in them. you can set and what kinds of files go in them. In general, the
default for these options is expressed in terms of `${prefix}', so that
specifying just `--prefix' will affect all of the other directory
specifications that were not explicitly provided.
The most portable way to affect installation locations is to pass the
correct locations to `configure'; however, many packages provide one or
both of the following shortcuts of passing variable assignments to the
`make install' command line to change installation locations without
having to reconfigure or recompile.
The first method involves providing an override variable for each
affected directory. For example, `make install
prefix=/alternate/directory' will choose an alternate location for all
directory configuration variables that were expressed in terms of
`${prefix}'. Any directories that were specified during `configure',
but not in terms of `${prefix}', must each be overridden at install
time for the entire installation to be relocated. The approach of
makefile variable overrides for each directory variable is required by
the GNU Coding Standards, and ideally causes no recompilation.
However, some platforms have known limitations with the semantics of
shared libraries that end up requiring recompilation when using this
method, particularly noticeable in packages that use GNU Libtool.
The second method involves providing the `DESTDIR' variable. For
example, `make install DESTDIR=/alternate/directory' will prepend
`/alternate/directory' before all installation names. The approach of
`DESTDIR' overrides is not required by the GNU Coding Standards, and
does not work on platforms that have drive letters. On the other hand,
it does better at avoiding recompilation issues, and works well even
when some directory options were not specified in terms of `${prefix}'
at `configure' time.
Optional Features
=================
If the package supports it, you can cause programs to be installed If the package supports it, you can cause programs to be installed
with an extra prefix or suffix on their names by giving `configure' the with an extra prefix or suffix on their names by giving `configure' the
option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
Optional Features
=================
Some packages pay attention to `--enable-FEATURE' options to Some packages pay attention to `--enable-FEATURE' options to
`configure', where FEATURE indicates an optional part of the package. `configure', where FEATURE indicates an optional part of the package.
They may also pay attention to `--with-PACKAGE' options, where PACKAGE They may also pay attention to `--with-PACKAGE' options, where PACKAGE
@ -152,6 +208,13 @@ find the X include and library files automatically, but if it doesn't,
you can use the `configure' options `--x-includes=DIR' and you can use the `configure' options `--x-includes=DIR' and
`--x-libraries=DIR' to specify their locations. `--x-libraries=DIR' to specify their locations.
Some packages offer the ability to configure how verbose the
execution of `make' will be. For these packages, running `./configure
--enable-silent-rules' sets the default to minimal output, which can be
overridden with `make V=1'; while running `./configure
--disable-silent-rules' sets the default to verbose, which can be
overridden with `make V=0'.
Particular systems Particular systems
================== ==================
@ -159,10 +222,15 @@ Particular systems
CC is not installed, it is recommended to use the following options in CC is not installed, it is recommended to use the following options in
order to use an ANSI C compiler: order to use an ANSI C compiler:
./configure CC="cc -Ae" ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
and if that doesn't work, install pre-built binaries of GCC for HP-UX. and if that doesn't work, install pre-built binaries of GCC for HP-UX.
HP-UX `make' updates targets which have the same time stamps as
their prerequisites, which makes it generally unusable when shipped
generated files such as `configure' are involved. Use GNU `make'
instead.
On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
parse its `<wchar.h>' header file. The option `-nodtk' can be used as parse its `<wchar.h>' header file. The option `-nodtk' can be used as
a workaround. If GNU CC is not installed, it is therefore recommended a workaround. If GNU CC is not installed, it is therefore recommended
@ -174,6 +242,16 @@ and if that doesn't work, try
./configure CC="cc -nodtk" ./configure CC="cc -nodtk"
On Solaris, don't put `/usr/ucb' early in your `PATH'. This
directory contains several dysfunctional programs; working variants of
these programs are available in `/usr/bin'. So, if you need `/usr/ucb'
in your `PATH', put it _after_ `/usr/bin'.
On Haiku, software installed for all users goes in `/boot/common',
not `/usr/local'. It is recommended to use the following options:
./configure --prefix=/boot/common
Specifying the System Type Specifying the System Type
========================== ==========================
@ -189,7 +267,8 @@ type, such as `sun4', or a canonical name which has the form:
where SYSTEM can have one of these forms: where SYSTEM can have one of these forms:
OS KERNEL-OS OS
KERNEL-OS
See the file `config.sub' for the possible values of each field. If See the file `config.sub' for the possible values of each field. If
`config.sub' isn't included in this package, then this package doesn't `config.sub' isn't included in this package, then this package doesn't
@ -277,7 +356,7 @@ operates.
`configure' can determine that directory automatically. `configure' can determine that directory automatically.
`--prefix=DIR' `--prefix=DIR'
Use DIR as the installation prefix. *Note Installation Names:: Use DIR as the installation prefix. *note Installation Names::
for more details, including other options available for fine-tuning for more details, including other options available for fine-tuning
the installation locations. the installation locations.

View File

@ -4,12 +4,14 @@ PCRE LICENCE
PCRE is a library of functions to support regular expressions whose syntax PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE, supplied in the "doc" specified below. The documentation for PCRE, supplied in the "doc"
directory, is distributed under the same terms as the software itself. directory, is distributed under the same terms as the software itself.
The basic library functions are written in C and are freestanding. Also The basic library functions are written in C and are freestanding. Also
included in the distribution is a set of C++ wrapper functions. included in the distribution is a set of C++ wrapper functions, and a
just-in-time compiler that can be used to optimize pattern matching. These
are both optional features that can be omitted when the library is built.
THE BASIC LIBRARY FUNCTIONS THE BASIC LIBRARY FUNCTIONS
@ -22,7 +24,29 @@ Email domain: cam.ac.uk
University of Cambridge Computing Service, University of Cambridge Computing Service,
Cambridge, England. Cambridge, England.
Copyright (c) 1997-2009 University of Cambridge Copyright (c) 1997-2012 University of Cambridge
All rights reserved.
PCRE JUST-IN-TIME COMPILATION SUPPORT
-------------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2010-2012 Zoltan Herczeg
All rights reserved.
STACK-LESS JUST-IN-TIME COMPILER
--------------------------------
Written by: Zoltan Herczeg
Email local part: hzmester
Emain domain: freemail.hu
Copyright(c) 2009-2012 Zoltan Herczeg
All rights reserved. All rights reserved.
@ -31,7 +55,7 @@ THE C++ WRAPPER FUNCTIONS
Contributed by: Google Inc. Contributed by: Google Inc.
Copyright (c) 2007-2008, Google Inc. Copyright (c) 2007-2012, Google Inc.
All rights reserved. All rights reserved.

View File

@ -1,5 +1,7 @@
## Process this file with automake to produce Makefile.in. ## Process this file with automake to produce Makefile.in.
ACLOCAL_AMFLAGS = -I m4
dist_doc_DATA = \ dist_doc_DATA = \
doc/pcre.txt \ doc/pcre.txt \
doc/pcre-config.txt \ doc/pcre-config.txt \
@ -15,7 +17,9 @@ dist_doc_DATA = \
dist_html_DATA = \ dist_html_DATA = \
doc/html/index.html \ doc/html/index.html \
doc/html/pcre.html \ doc/html/pcre.html \
doc/html/pcre16.html \
doc/html/pcre-config.html \ doc/html/pcre-config.html \
doc/html/pcre_assign_jit_stack.html \
doc/html/pcre_compile.html \ doc/html/pcre_compile.html \
doc/html/pcre_compile2.html \ doc/html/pcre_compile2.html \
doc/html/pcre_config.html \ doc/html/pcre_config.html \
@ -23,6 +27,7 @@ dist_html_DATA = \
doc/html/pcre_copy_substring.html \ doc/html/pcre_copy_substring.html \
doc/html/pcre_dfa_exec.html \ doc/html/pcre_dfa_exec.html \
doc/html/pcre_exec.html \ doc/html/pcre_exec.html \
doc/html/pcre_free_study.html \
doc/html/pcre_free_substring.html \ doc/html/pcre_free_substring.html \
doc/html/pcre_free_substring_list.html \ doc/html/pcre_free_substring_list.html \
doc/html/pcre_fullinfo.html \ doc/html/pcre_fullinfo.html \
@ -31,16 +36,23 @@ dist_html_DATA = \
doc/html/pcre_get_stringtable_entries.html \ doc/html/pcre_get_stringtable_entries.html \
doc/html/pcre_get_substring.html \ doc/html/pcre_get_substring.html \
doc/html/pcre_get_substring_list.html \ doc/html/pcre_get_substring_list.html \
doc/html/pcre_info.html \ doc/html/pcre_jit_exec.html \
doc/html/pcre_jit_stack_alloc.html \
doc/html/pcre_jit_stack_free.html \
doc/html/pcre_maketables.html \ doc/html/pcre_maketables.html \
doc/html/pcre_pattern_to_host_byte_order.html \
doc/html/pcre_refcount.html \ doc/html/pcre_refcount.html \
doc/html/pcre_study.html \ doc/html/pcre_study.html \
doc/html/pcre_utf16_to_host_byte_order.html \
doc/html/pcre_version.html \ doc/html/pcre_version.html \
doc/html/pcreapi.html \ doc/html/pcreapi.html \
doc/html/pcrebuild.html \ doc/html/pcrebuild.html \
doc/html/pcrecallout.html \ doc/html/pcrecallout.html \
doc/html/pcrecompat.html \ doc/html/pcrecompat.html \
doc/html/pcredemo.html \
doc/html/pcregrep.html \ doc/html/pcregrep.html \
doc/html/pcrejit.html \
doc/html/pcrelimits.html \
doc/html/pcrematching.html \ doc/html/pcrematching.html \
doc/html/pcrepartial.html \ doc/html/pcrepartial.html \
doc/html/pcrepattern.html \ doc/html/pcrepattern.html \
@ -50,7 +62,12 @@ dist_html_DATA = \
doc/html/pcresample.html \ doc/html/pcresample.html \
doc/html/pcrestack.html \ doc/html/pcrestack.html \
doc/html/pcresyntax.html \ doc/html/pcresyntax.html \
doc/html/pcretest.html doc/html/pcretest.html \
doc/html/pcreunicode.html
# doc/html/pcre32.html \
# doc/html/pcre_utf32_to_host_byte_order.html \
#
pcrecpp_html = doc/html/pcrecpp.html pcrecpp_html = doc/html/pcrecpp.html
dist_noinst_DATA = $(pcrecpp_html) dist_noinst_DATA = $(pcrecpp_html)
@ -69,7 +86,8 @@ check_SCRIPTS =
dist_noinst_SCRIPTS = dist_noinst_SCRIPTS =
# Some of the binaries we make are to be installed, and others are # Some of the binaries we make are to be installed, and others are
# (non-user-visible) helper programs needed to build libpcre. # (non-user-visible) helper programs needed to build libpcre, libpcre16
# or libpcre32.
bin_PROGRAMS = bin_PROGRAMS =
noinst_PROGRAMS = noinst_PROGRAMS =
@ -81,15 +99,21 @@ MAINTAINERCLEANFILES =
# the Autotools include by default. # the Autotools include by default.
EXTRA_DIST = EXTRA_DIST =
# These files contain additional m4 macros that are used by autoconf.
EXTRA_DIST += \
m4/ax_pthread.m4 m4/pcre_visibility.m4
# These files contain maintenance information # These files contain maintenance information
EXTRA_DIST += \ EXTRA_DIST += \
doc/perltest.txt \ doc/perltest.txt \
NON-UNIX-USE \ NON-UNIX-USE \
NON-AUTOTOOLS-BUILD \
HACKING HACKING
# These files are used in the preparation of a release # These files are used in the preparation of a release
EXTRA_DIST += \ EXTRA_DIST += \
PrepareRelease \ PrepareRelease \
CheckMan \
CleanTxt \ CleanTxt \
Detrail \ Detrail \
132html \ 132html \
@ -109,11 +133,39 @@ EXTRA_DIST += \
pcre.h.generic \ pcre.h.generic \
config.h.generic config.h.generic
pcre.h.generic: configure.ac # The only difference between pcre.h.in and pcre.h is the setting of the PCRE
# version number. Therefore, we can create the generic version just by copying.
pcre.h.generic: pcre.h.in configure.ac
rm -f $@ rm -f $@
cp -p pcre.h $@ cp -p pcre.h $@
MAINTAINERCLEANFILES += pcre.h.generic # It is more complicated for config.h.generic. We need the version that results
# from a default configuration. We can get this by doing a configure in a
# temporary directory. However, some trickery is needed,
# because the source directory may already be configured. If you
# just try running configure in a new directory, it complains. For this reason,
# we move config.status out of the way while doing the default configuration.
# The resulting config.h is munged by perl to put #ifdefs round any #defines
# and to get rid of any gcc-specific visibility settings. Make sure that
# PCRE_EXP_DEFN is unset (in case it has visibility settings).
config.h.generic: configure.ac
rm -rf $@ _generic
mkdir _generic
cs=$(srcdir)/config.status; test ! -f $$cs || mv -f $$cs $$cs.aside
cd _generic && $(abs_top_srcdir)/configure || :
cs=$(srcdir)/config.status; test ! -f $$cs.aside || mv -f $$cs.aside $$cs
test -f _generic/config.h
perl -n \
-e 'BEGIN{$$blank=0;}' \
-e 'if(/PCRE_EXP_DEFN/){print"/* #undef PCRE_EXP_DEFN */\n";$$blank=0;next;}' \
-e 'if(/to make a symbol visible/){next;}' \
-e 'if(/__attribute__ \(\(visibility/){next;}' \
-e 'if(/^#define\s(?!PACKAGE)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;}' \
-e 'else {if(/^\s*$$/){print unless $$blank; $$blank=1;} else{print;$$blank=0;}}' \
_generic/config.h >$@
rm -rf _generic
MAINTAINERCLEANFILES += pcre.h.generic config.h.generic
# These are the header files we'll install. We do not distribute pcre.h because # These are the header files we'll install. We do not distribute pcre.h because
# it is generated from pcre.h.in. # it is generated from pcre.h.in.
@ -158,10 +210,16 @@ pcre_chartables.c: $(srcdir)/pcre_chartables.c.dist
endif # WITH_REBUILD_CHARTABLES endif # WITH_REBUILD_CHARTABLES
BUILT_SOURCES = pcre_chartables.c
## The main pcre library ## The main pcre library
# Build the 8 bit library if it is enabled.
if WITH_PCRE8
lib_LTLIBRARIES += libpcre.la lib_LTLIBRARIES += libpcre.la
libpcre_la_SOURCES = \ libpcre_la_SOURCES = \
pcre_byte_order.c \
pcre_compile.c \ pcre_compile.c \
pcre_config.c \ pcre_config.c \
pcre_dfa_exec.c \ pcre_dfa_exec.c \
@ -169,41 +227,211 @@ libpcre_la_SOURCES = \
pcre_fullinfo.c \ pcre_fullinfo.c \
pcre_get.c \ pcre_get.c \
pcre_globals.c \ pcre_globals.c \
pcre_info.c \
pcre_internal.h \ pcre_internal.h \
pcre_jit_compile.c \
pcre_maketables.c \ pcre_maketables.c \
pcre_newline.c \ pcre_newline.c \
pcre_ord2utf8.c \ pcre_ord2utf8.c \
pcre_refcount.c \ pcre_refcount.c \
pcre_string_utils.c \
pcre_study.c \ pcre_study.c \
pcre_tables.c \ pcre_tables.c \
pcre_try_flipped.c \
pcre_ucd.c \ pcre_ucd.c \
pcre_valid_utf8.c \ pcre_valid_utf8.c \
pcre_version.c \ pcre_version.c \
pcre_xclass.c \ pcre_xclass.c \
ucp.h ucp.h
libpcre_la_CFLAGS = \
$(VISIBILITY_CFLAGS) \
$(AM_CFLAGS)
libpcre_la_LIBADD =
## This file is generated as part of the building process, so don't distribute. ## This file is generated as part of the building process, so don't distribute.
nodist_libpcre_la_SOURCES = \ nodist_libpcre_la_SOURCES = \
pcre_chartables.c pcre_chartables.c
# The pcre_printint.src file is #included by some source files, so it must be endif # WITH_PCRE8
# distributed. The pcre_chartables.c.dist file is the default version of
# pcre_chartables.c, used unless --enable-rebuild-chartables is specified.
EXTRA_DIST += pcre_printint.src pcre_chartables.c.dist
# Build the 16 bit library if it is enabled.
if WITH_PCRE16
lib_LTLIBRARIES += libpcre16.la
libpcre16_la_SOURCES = \
pcre16_byte_order.c \
pcre16_chartables.c \
pcre16_compile.c \
pcre16_config.c \
pcre16_dfa_exec.c \
pcre16_exec.c \
pcre16_fullinfo.c \
pcre16_get.c \
pcre16_globals.c \
pcre16_jit_compile.c \
pcre16_maketables.c \
pcre16_newline.c \
pcre16_ord2utf16.c \
pcre16_refcount.c \
pcre16_string_utils.c \
pcre16_study.c \
pcre16_tables.c \
pcre16_ucd.c \
pcre16_utf16_utils.c \
pcre16_valid_utf16.c \
pcre16_version.c \
pcre16_xclass.c
libpcre16_la_CFLAGS = \
$(VISIBILITY_CFLAGS) \
$(AM_CFLAGS)
libpcre16_la_LIBADD =
## This file is generated as part of the building process, so don't distribute.
nodist_libpcre16_la_SOURCES = \
pcre_chartables.c
endif # WITH_PCRE16
# Build the 32 bit library if it is enabled.
if WITH_PCRE32
lib_LTLIBRARIES += libpcre32.la
libpcre32_la_SOURCES = \
pcre32_byte_order.c \
pcre32_chartables.c \
pcre32_compile.c \
pcre32_config.c \
pcre32_dfa_exec.c \
pcre32_exec.c \
pcre32_fullinfo.c \
pcre32_get.c \
pcre32_globals.c \
pcre32_jit_compile.c \
pcre32_maketables.c \
pcre32_newline.c \
pcre32_ord2utf32.c \
pcre32_refcount.c \
pcre32_string_utils.c \
pcre32_study.c \
pcre32_tables.c \
pcre32_ucd.c \
pcre32_utf32_utils.c \
pcre32_valid_utf32.c \
pcre32_version.c \
pcre32_xclass.c
libpcre32_la_CFLAGS = \
$(VISIBILITY_CFLAGS) \
$(AM_CFLAGS)
libpcre32_la_LIBADD =
## This file is generated as part of the building process, so don't distribute.
nodist_libpcre32_la_SOURCES = \
pcre_chartables.c
endif # WITH_PCRE32
# The pcre_chartables.c.dist file is the default version of pcre_chartables.c,
# used unless --enable-rebuild-chartables is specified.
EXTRA_DIST += pcre_chartables.c.dist
# The JIT compiler lives in a separate directory, but its files are #included
# when pcre_jit_compile.c is processed, so they must be distributed.
EXTRA_DIST += \
sljit/sljitConfig.h \
sljit/sljitConfigInternal.h \
sljit/sljitExecAllocator.c \
sljit/sljitLir.c \
sljit/sljitLir.h \
sljit/sljitNativeARM_Thumb2.c \
sljit/sljitNativeARM_v5.c \
sljit/sljitNativeMIPS_32.c \
sljit/sljitNativeMIPS_common.c \
sljit/sljitNativePPC_32.c \
sljit/sljitNativePPC_64.c \
sljit/sljitNativePPC_common.c \
sljit/sljitNativeSPARC_32.c \
sljit/sljitNativeSPARC_common.c \
sljit/sljitNativeX86_32.c \
sljit/sljitNativeX86_64.c \
sljit/sljitNativeX86_common.c \
sljit/sljitUtils.c
if WITH_PCRE8
libpcre_la_LDFLAGS = $(EXTRA_LIBPCRE_LDFLAGS) libpcre_la_LDFLAGS = $(EXTRA_LIBPCRE_LDFLAGS)
endif # WITH_PCRE8
if WITH_PCRE16
libpcre16_la_LDFLAGS = $(EXTRA_LIBPCRE16_LDFLAGS)
endif # WITH_PCRE16
if WITH_PCRE32
libpcre32_la_LDFLAGS = $(EXTRA_LIBPCRE32_LDFLAGS)
endif # WITH_PCRE32
if WITH_VALGRIND
if WITH_PCRE8
libpcre_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE8
if WITH_PCRE16
libpcre16_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE16
if WITH_PCRE32
libpcre32_la_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_PCRE32
endif # WITH_VALGRIND
if WITH_GCOV
if WITH_PCRE8
libpcre_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE8
if WITH_PCRE16
libpcre16_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE16
if WITH_PCRE32
libpcre32_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_PCRE32
endif # WITH_GCOV
CLEANFILES += pcre_chartables.c CLEANFILES += pcre_chartables.c
## If JIT support is enabled, arrange for the JIT test program to run.
if WITH_JIT
TESTS += pcre_jit_test
noinst_PROGRAMS += pcre_jit_test
pcre_jit_test_SOURCES = pcre_jit_test.c
pcre_jit_test_CFLAGS = $(AM_CFLAGS)
pcre_jit_test_LDADD =
if WITH_PCRE8
pcre_jit_test_LDADD += libpcre.la
endif # WITH_PCRE8
if WITH_PCRE16
pcre_jit_test_LDADD += libpcre16.la
endif # WITH_PCRE16
if WITH_PCRE32
pcre_jit_test_LDADD += libpcre32.la
endif # WITH_PCRE32
if WITH_GCOV
pcre_jit_test_CFLAGS += $(GCOV_CFLAGS)
pcre_jit_test_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_JIT
## A version of the main pcre library that has a posix re API. ## A version of the main pcre library that has a posix re API.
if WITH_PCRE8
lib_LTLIBRARIES += libpcreposix.la lib_LTLIBRARIES += libpcreposix.la
libpcreposix_la_SOURCES = \ libpcreposix_la_SOURCES = \
pcreposix.c pcreposix.c
libpcreposix_la_CFLAGS = $(VISIBILITY_CFLAGS) $(AM_CFLAGS)
libpcreposix_la_LDFLAGS = $(EXTRA_LIBPCREPOSIX_LDFLAGS) libpcreposix_la_LDFLAGS = $(EXTRA_LIBPCREPOSIX_LDFLAGS)
libpcreposix_la_LIBADD = libpcre.la libpcreposix_la_LIBADD = libpcre.la
if WITH_GCOV
libpcreposix_la_CFLAGS += $(GCOV_CFLAGS)
endif # WITH_GCOV
endif # WITH_PCRE8
## There's a C++ library as well. ## There's a C++ library as well.
if WITH_PCRE_CPP if WITH_PCRE_CPP
@ -213,24 +441,35 @@ libpcrecpp_la_SOURCES = \
pcrecpp.cc \ pcrecpp.cc \
pcre_scanner.cc \ pcre_scanner.cc \
pcre_stringpiece.cc pcre_stringpiece.cc
libpcrecpp_la_CXXFLAGS = $(VISIBILITY_CXXFLAGS) $(AM_CXXFLAGS)
libpcrecpp_la_LDFLAGS = $(EXTRA_LIBPCRECPP_LDFLAGS) libpcrecpp_la_LDFLAGS = $(EXTRA_LIBPCRECPP_LDFLAGS)
libpcrecpp_la_LIBADD = libpcre.la libpcrecpp_la_LIBADD = libpcre.la
TESTS += pcrecpp_unittest TESTS += pcrecpp_unittest
noinst_PROGRAMS += pcrecpp_unittest noinst_PROGRAMS += pcrecpp_unittest
pcrecpp_unittest_SOURCES = pcrecpp_unittest.cc pcrecpp_unittest_SOURCES = pcrecpp_unittest.cc
pcrecpp_unittest_CXXFLAGS = $(AM_CXXFLAGS)
pcrecpp_unittest_LDADD = libpcrecpp.la pcrecpp_unittest_LDADD = libpcrecpp.la
TESTS += pcre_scanner_unittest TESTS += pcre_scanner_unittest
noinst_PROGRAMS += pcre_scanner_unittest noinst_PROGRAMS += pcre_scanner_unittest
pcre_scanner_unittest_SOURCES = pcre_scanner_unittest.cc pcre_scanner_unittest_SOURCES = pcre_scanner_unittest.cc
pcre_scanner_unittest_CXXFLAGS = $(AM_CXXFLAGS)
pcre_scanner_unittest_LDADD = libpcrecpp.la pcre_scanner_unittest_LDADD = libpcrecpp.la
TESTS += pcre_stringpiece_unittest TESTS += pcre_stringpiece_unittest
noinst_PROGRAMS += pcre_stringpiece_unittest noinst_PROGRAMS += pcre_stringpiece_unittest
pcre_stringpiece_unittest_SOURCES = pcre_stringpiece_unittest.cc pcre_stringpiece_unittest_SOURCES = pcre_stringpiece_unittest.cc
pcre_stringpiece_unittest_CXXFLAGS = $(AM_CXXFLAGS)
pcre_stringpiece_unittest_LDADD = libpcrecpp.la pcre_stringpiece_unittest_LDADD = libpcrecpp.la
if WITH_GCOV
libpcrecpp_la_CXXFLAGS += $(GCOV_CXXFLAGS)
pcrecpp_unittest_LDADD += $(GCOV_LIBS)
pcre_scanner_unittest_LDADD += $(GCOV_LIBS)
pcre_stringpiece_unittest_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_PCRE_CPP endif # WITH_PCRE_CPP
## The main unit tests ## The main unit tests
@ -243,16 +482,47 @@ dist_noinst_SCRIPTS += RunTest
EXTRA_DIST += RunTest.bat EXTRA_DIST += RunTest.bat
bin_PROGRAMS += pcretest bin_PROGRAMS += pcretest
pcretest_SOURCES = pcretest.c pcretest_SOURCES = pcretest.c
pcretest_LDADD = libpcreposix.la $(LIBREADLINE) pcretest_CFLAGS = $(AM_CFLAGS)
pcretest_LDADD = $(LIBREADLINE)
if WITH_PCRE8
pcretest_SOURCES += pcre_printint.c
pcretest_LDADD += libpcre.la libpcreposix.la
endif # WITH_PCRE8
if WITH_PCRE16
pcretest_SOURCES += pcre16_printint.c
pcretest_LDADD += libpcre16.la
endif # WITH_PCRE16
if WITH_PCRE32
pcretest_SOURCES += pcre32_printint.c
pcretest_LDADD += libpcre32.la
endif # WITH_PCRE32
if WITH_VALGRIND
pcretest_CFLAGS += $(VALGRIND_CFLAGS)
endif # WITH_VALGRIND
if WITH_GCOV
pcretest_CFLAGS += $(GCOV_CFLAGS)
pcretest_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
if WITH_PCRE8
TESTS += RunGrepTest TESTS += RunGrepTest
dist_noinst_SCRIPTS += RunGrepTest dist_noinst_SCRIPTS += RunGrepTest
bin_PROGRAMS += pcregrep bin_PROGRAMS += pcregrep
pcregrep_SOURCES = pcregrep.c pcregrep_SOURCES = pcregrep.c
pcregrep_LDADD = libpcreposix.la $(LIBZ) $(LIBBZ2) pcregrep_CFLAGS = $(AM_CFLAGS)
pcregrep_LDADD = $(LIBZ) $(LIBBZ2)
pcregrep_LDADD += libpcre.la libpcreposix.la
if WITH_GCOV
pcregrep_CFLAGS += $(GCOV_CFLAGS)
pcregrep_LDADD += $(GCOV_LIBS)
endif # WITH_GCOV
endif # WITH_PCRE8
EXTRA_DIST += \ EXTRA_DIST += \
testdata/grepbinary \
testdata/grepfilelist \
testdata/grepinput \ testdata/grepinput \
testdata/grepinput3 \
testdata/grepinput8 \ testdata/grepinput8 \
testdata/grepinputv \ testdata/grepinputv \
testdata/grepinputx \ testdata/grepinputx \
@ -260,6 +530,18 @@ EXTRA_DIST += \
testdata/grepoutput \ testdata/grepoutput \
testdata/grepoutput8 \ testdata/grepoutput8 \
testdata/grepoutputN \ testdata/grepoutputN \
testdata/greppatN4 \
testdata/saved16 \
testdata/saved16BE-1 \
testdata/saved16BE-2 \
testdata/saved16LE-1 \
testdata/saved16LE-2 \
testdata/saved32 \
testdata/saved32BE-1 \
testdata/saved32BE-2 \
testdata/saved32LE-1 \
testdata/saved32LE-2 \
testdata/saved8 \
testdata/testinput1 \ testdata/testinput1 \
testdata/testinput2 \ testdata/testinput2 \
testdata/testinput3 \ testdata/testinput3 \
@ -270,6 +552,23 @@ EXTRA_DIST += \
testdata/testinput8 \ testdata/testinput8 \
testdata/testinput9 \ testdata/testinput9 \
testdata/testinput10 \ testdata/testinput10 \
testdata/testinput11 \
testdata/testinput12 \
testdata/testinput13 \
testdata/testinput14 \
testdata/testinput15 \
testdata/testinput16 \
testdata/testinput17 \
testdata/testinput18 \
testdata/testinput19 \
testdata/testinput20 \
testdata/testinput21 \
testdata/testinput22 \
testdata/testinput23 \
testdata/testinput24 \
testdata/testinput25 \
testdata/testinput26 \
testdata/testinputEBC \
testdata/testoutput1 \ testdata/testoutput1 \
testdata/testoutput2 \ testdata/testoutput2 \
testdata/testoutput3 \ testdata/testoutput3 \
@ -280,6 +579,28 @@ EXTRA_DIST += \
testdata/testoutput8 \ testdata/testoutput8 \
testdata/testoutput9 \ testdata/testoutput9 \
testdata/testoutput10 \ testdata/testoutput10 \
testdata/testoutput11-8 \
testdata/testoutput11-16 \
testdata/testoutput11-32 \
testdata/testoutput12 \
testdata/testoutput13 \
testdata/testoutput14 \
testdata/testoutput15 \
testdata/testoutput16 \
testdata/testoutput17 \
testdata/testoutput18-16 \
testdata/testoutput18-32 \
testdata/testoutput19 \
testdata/testoutput20 \
testdata/testoutput21-16 \
testdata/testoutput21-32 \
testdata/testoutput22-16 \
testdata/testoutput22-32 \
testdata/testoutput23 \
testdata/testoutput24 \
testdata/testoutput25 \
testdata/testoutput26 \
testdata/testoutputEBC \
testdata/wintestinput3 \ testdata/wintestinput3 \
testdata/wintestoutput3 \ testdata/wintestoutput3 \
perltest.pl perltest.pl
@ -287,6 +608,7 @@ EXTRA_DIST += \
CLEANFILES += \ CLEANFILES += \
testsavedregex \ testsavedregex \
teststderr \ teststderr \
testtemp* \
testtry \ testtry \
testNinput testNinput
@ -309,13 +631,12 @@ test: check ;
# A PCRE user submitted the following addition, saying that it "will allow # A PCRE user submitted the following addition, saying that it "will allow
# anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a # anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a
# nice DLL for Windows use". (It is used by the pcre.dll target.) # nice DLL for Windows use". (It is used by the pcre.dll target.)
DLL_OBJS= pcre_compile.o pcre_config.o \ DLL_OBJS= pcre_byte_order.o pcre_compile.o pcre_config.o \
pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \ pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \
pcre_globals.o pcre_info.o pcre_maketables.o \ pcre_globals.o pcre_jit_compile.o pcre_maketables.o \
pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \ pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \
pcre_study.o pcre_tables.o pcre_try_flipped.o \ pcre_study.o pcre_tables.o pcre_ucd.o \
pcre_ucd.o pcre_valid_utf8.o pcre_version.o \ pcre_valid_utf8.o pcre_version.o pcre_chartables.o \
pcre_chartables.o \
pcre_xclass.o pcre_xclass.o
# A PCRE user submitted the following addition, saying that it "will allow # A PCRE user submitted the following addition, saying that it "will allow
@ -327,14 +648,23 @@ pcre.dll: $(DLL_OBJS)
# We have .pc files for pkg-config users. # We have .pc files for pkg-config users.
pkgconfigdir = $(libdir)/pkgconfig pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libpcre.pc pkgconfig_DATA = libpcre.pc libpcreposix.pc
if WITH_PCRE16
pkgconfig_DATA += libpcre16.pc
endif
if WITH_PCRE32
pkgconfig_DATA += libpcre32.pc
endif
if WITH_PCRE_CPP if WITH_PCRE_CPP
pkgconfig_DATA += libpcrecpp.pc pkgconfig_DATA += libpcrecpp.pc
endif endif
dist_man_MANS = \ dist_man_MANS = \
doc/pcre.3 \ doc/pcre.3 \
doc/pcre16.3 \
doc/pcre32.3 \
doc/pcre-config.1 \ doc/pcre-config.1 \
doc/pcre_assign_jit_stack.3 \
doc/pcre_compile.3 \ doc/pcre_compile.3 \
doc/pcre_compile2.3 \ doc/pcre_compile2.3 \
doc/pcre_config.3 \ doc/pcre_config.3 \
@ -342,6 +672,7 @@ dist_man_MANS = \
doc/pcre_copy_substring.3 \ doc/pcre_copy_substring.3 \
doc/pcre_dfa_exec.3 \ doc/pcre_dfa_exec.3 \
doc/pcre_exec.3 \ doc/pcre_exec.3 \
doc/pcre_free_study.3 \
doc/pcre_free_substring.3 \ doc/pcre_free_substring.3 \
doc/pcre_free_substring_list.3 \ doc/pcre_free_substring_list.3 \
doc/pcre_fullinfo.3 \ doc/pcre_fullinfo.3 \
@ -350,16 +681,23 @@ dist_man_MANS = \
doc/pcre_get_stringtable_entries.3 \ doc/pcre_get_stringtable_entries.3 \
doc/pcre_get_substring.3 \ doc/pcre_get_substring.3 \
doc/pcre_get_substring_list.3 \ doc/pcre_get_substring_list.3 \
doc/pcre_info.3 \ doc/pcre_jit_exec.3 \
doc/pcre_jit_stack_alloc.3 \
doc/pcre_jit_stack_free.3 \
doc/pcre_maketables.3 \ doc/pcre_maketables.3 \
doc/pcre_pattern_to_host_byte_order.3 \
doc/pcre_refcount.3 \ doc/pcre_refcount.3 \
doc/pcre_study.3 \ doc/pcre_study.3 \
doc/pcre_utf16_to_host_byte_order.3 \
doc/pcre_utf32_to_host_byte_order.3 \
doc/pcre_version.3 \ doc/pcre_version.3 \
doc/pcreapi.3 \ doc/pcreapi.3 \
doc/pcrebuild.3 \ doc/pcrebuild.3 \
doc/pcrecallout.3 \ doc/pcrecallout.3 \
doc/pcrecompat.3 \ doc/pcrecompat.3 \
doc/pcregrep.1 \ doc/pcregrep.1 \
doc/pcrejit.3 \
doc/pcrelimits.3 \
doc/pcrematching.3 \ doc/pcrematching.3 \
doc/pcrepartial.3 \ doc/pcrepartial.3 \
doc/pcrepattern.3 \ doc/pcrepattern.3 \
@ -369,7 +707,63 @@ dist_man_MANS = \
doc/pcresample.3 \ doc/pcresample.3 \
doc/pcrestack.3 \ doc/pcrestack.3 \
doc/pcresyntax.3 \ doc/pcresyntax.3 \
doc/pcretest.1 doc/pcretest.1 \
doc/pcreunicode.3
# Arrange for the per-function man pages to have 16- and 32-bit names as well.
install-data-hook:
ln -sf pcre_assign_jit_stack.3 $(DESTDIR)$(man3dir)/pcre16_assign_jit_stack.3
ln -sf pcre_compile.3 $(DESTDIR)$(man3dir)/pcre16_compile.3
ln -sf pcre_compile2.3 $(DESTDIR)$(man3dir)/pcre16_compile2.3
ln -sf pcre_config.3 $(DESTDIR)$(man3dir)/pcre16_config.3
ln -sf pcre_copy_named_substring.3 $(DESTDIR)$(man3dir)/pcre16_copy_named_substring.3
ln -sf pcre_copy_substring.3 $(DESTDIR)$(man3dir)/pcre16_copy_substring.3
ln -sf pcre_dfa_exec.3 $(DESTDIR)$(man3dir)/pcre16_dfa_exec.3
ln -sf pcre_exec.3 $(DESTDIR)$(man3dir)/pcre16_exec.3
ln -sf pcre_free_study.3 $(DESTDIR)$(man3dir)/pcre16_free_study.3
ln -sf pcre_free_substring.3 $(DESTDIR)$(man3dir)/pcre16_free_substring.3
ln -sf pcre_free_substring_list.3 $(DESTDIR)$(man3dir)/pcre16_free_substring_list.3
ln -sf pcre_fullinfo.3 $(DESTDIR)$(man3dir)/pcre16_fullinfo.3
ln -sf pcre_get_named_substring.3 $(DESTDIR)$(man3dir)/pcre16_get_named_substring.3
ln -sf pcre_get_stringnumber.3 $(DESTDIR)$(man3dir)/pcre16_get_stringnumber.3
ln -sf pcre_get_stringtable_entries.3 $(DESTDIR)$(man3dir)/pcre16_get_stringtable_entries.3
ln -sf pcre_get_substring.3 $(DESTDIR)$(man3dir)/pcre16_get_substring.3
ln -sf pcre_get_substring_list.3 $(DESTDIR)$(man3dir)/pcre16_get_substring_list.3
ln -sf pcre_jit_exec.3 $(DESTDIR)$(man3dir)/pcre16_jit_exec.3
ln -sf pcre_jit_stack_alloc.3 $(DESTDIR)$(man3dir)/pcre16_jit_stack_alloc.3
ln -sf pcre_jit_stack_free.3 $(DESTDIR)$(man3dir)/pcre16_jit_stack_free.3
ln -sf pcre_maketables.3 $(DESTDIR)$(man3dir)/pcre16_maketables.3
ln -sf pcre_pattern_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre16_pattern_to_host_byte_order.3
ln -sf pcre_refcount.3 $(DESTDIR)$(man3dir)/pcre16_refcount.3
ln -sf pcre_study.3 $(DESTDIR)$(man3dir)/pcre16_study.3
ln -sf pcre_utf16_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre16_utf16_to_host_byte_order.3
ln -sf pcre_version.3 $(DESTDIR)$(man3dir)/pcre16_version.3
ln -sf pcre_assign_jit_stack.3 $(DESTDIR)$(man3dir)/pcre32_assign_jit_stack.3
ln -sf pcre_compile.3 $(DESTDIR)$(man3dir)/pcre32_compile.3
ln -sf pcre_compile2.3 $(DESTDIR)$(man3dir)/pcre32_compile2.3
ln -sf pcre_config.3 $(DESTDIR)$(man3dir)/pcre32_config.3
ln -sf pcre_copy_named_substring.3 $(DESTDIR)$(man3dir)/pcre32_copy_named_substring.3
ln -sf pcre_copy_substring.3 $(DESTDIR)$(man3dir)/pcre32_copy_substring.3
ln -sf pcre_dfa_exec.3 $(DESTDIR)$(man3dir)/pcre32_dfa_exec.3
ln -sf pcre_exec.3 $(DESTDIR)$(man3dir)/pcre32_exec.3
ln -sf pcre_free_study.3 $(DESTDIR)$(man3dir)/pcre32_free_study.3
ln -sf pcre_free_substring.3 $(DESTDIR)$(man3dir)/pcre32_free_substring.3
ln -sf pcre_free_substring_list.3 $(DESTDIR)$(man3dir)/pcre32_free_substring_list.3
ln -sf pcre_fullinfo.3 $(DESTDIR)$(man3dir)/pcre32_fullinfo.3
ln -sf pcre_get_named_substring.3 $(DESTDIR)$(man3dir)/pcre32_get_named_substring.3
ln -sf pcre_get_stringnumber.3 $(DESTDIR)$(man3dir)/pcre32_get_stringnumber.3
ln -sf pcre_get_stringtable_entries.3 $(DESTDIR)$(man3dir)/pcre32_get_stringtable_entries.3
ln -sf pcre_get_substring.3 $(DESTDIR)$(man3dir)/pcre32_get_substring.3
ln -sf pcre_get_substring_list.3 $(DESTDIR)$(man3dir)/pcre32_get_substring_list.3
ln -sf pcre_jit_exec.3 $(DESTDIR)$(man3dir)/pcre32_jit_exec.3
ln -sf pcre_jit_stack_alloc.3 $(DESTDIR)$(man3dir)/pcre32_jit_stack_alloc.3
ln -sf pcre_jit_stack_free.3 $(DESTDIR)$(man3dir)/pcre32_jit_stack_free.3
ln -sf pcre_maketables.3 $(DESTDIR)$(man3dir)/pcre32_maketables.3
ln -sf pcre_pattern_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre32_pattern_to_host_byte_order.3
ln -sf pcre_refcount.3 $(DESTDIR)$(man3dir)/pcre32_refcount.3
ln -sf pcre_study.3 $(DESTDIR)$(man3dir)/pcre32_study.3
ln -sf pcre_utf32_to_host_byte_order.3 $(DESTDIR)$(man3dir)/pcre32_utf32_to_host_byte_order.3
ln -sf pcre_version.3 $(DESTDIR)$(man3dir)/pcre32_version.3
pcrecpp_man = doc/pcrecpp.3 pcrecpp_man = doc/pcrecpp.3
EXTRA_DIST += $(pcrecpp_man) EXTRA_DIST += $(pcrecpp_man)
@ -378,12 +772,105 @@ if WITH_PCRE_CPP
man_MANS = $(pcrecpp_man) man_MANS = $(pcrecpp_man)
endif endif
# gcov/lcov code coverage reporting
if WITH_GCOV
# Coverage reporting targets:
#
# coverage: Create a coverage report from 'make check'
# coverage-baseline: Capture baseline coverage information
# coverage-reset: This zeros the coverage counters only
# coverage-report: This creates the coverage report only
# coverage-clean-report: This removes the generated coverage report
# without cleaning the coverage data itself
# coverage-clean-data: This removes the captured coverage data without
# removing the coverage files created at compile time (*.gcno)
# coverage-clean: This cleans all coverage data including the generated
# coverage report.
COVERAGE_TEST_NAME = $(PACKAGE)
COVERAGE_NAME = $(PACKAGE)-$(VERSION)
COVERAGE_OUTPUT_FILE = $(COVERAGE_NAME)-coverage.info
COVERAGE_OUTPUT_DIR = $(COVERAGE_NAME)-coverage
COVERAGE_LCOV_EXTRA_FLAGS =
COVERAGE_GENHTML_EXTRA_FLAGS =
coverage_quiet = $(coverage_quiet_$(V))
coverage_quiet_ = $(coverage_quiet_$(AM_DEFAULT_VERBOSITY))
coverage_quiet_0 = --quiet
coverage-check: all
-$(MAKE) $(AM_MAKEFLAGS) -k check
coverage-baseline:
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--output-file "$(COVERAGE_OUTPUT_FILE)" \
--capture \
--initial
coverage-report:
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--capture \
--output-file "$(COVERAGE_OUTPUT_FILE).tmp" \
--test-name "$(COVERAGE_TEST_NAME)" \
--no-checksum \
--compat-libtool \
$(COVERAGE_LCOV_EXTRA_FLAGS)
$(LCOV) $(coverage_quiet) \
--directory $(top_builddir) \
--output-file "$(COVERAGE_OUTPUT_FILE)" \
--remove "$(COVERAGE_OUTPUT_FILE).tmp" \
"/tmp/*" \
"/usr/include/*" \
"$(includedir)/*"
-@rm -f "$(COVERAGE_OUTPUT_FILE).tmp"
LANG=C $(GENHTML) $(coverage_quiet) \
--prefix $(top_builddir) \
--output-directory "$(COVERAGE_OUTPUT_DIR)" \
--title "$(PACKAGE) $(VERSION) Code Coverage Report" \
--show-details "$(COVERAGE_OUTPUT_FILE)" \
--legend \
$(COVERAGE_GENHTML_EXTRA_FLAGS)
@echo "Code coverage report written to file://$(abs_builddir)/$(COVERAGE_OUTPUT_DIR)/index.html"
coverage-reset:
-$(LCOV) $(coverage_quiet) --zerocounters --directory $(top_builddir)
coverage-clean-report:
-rm -f "$(COVERAGE_OUTPUT_FILE)" "$(COVERAGE_OUTPUT_FILE).tmp"
-rm -rf "$(COVERAGE_OUTPUT_DIR)"
coverage-clean-data:
-find $(top_builddir) -name "*.gcda" -delete
coverage-clean: coverage-reset coverage-clean-report coverage-clean-data
-find $(top_builddir) -name "*.gcno" -delete
coverage-distclean: coverage-clean
coverage: coverage-reset coverage-baseline coverage-check coverage-report
clean-local: coverage-clean
distclean-local: coverage-distclean
.PHONY: coverage coverage-baseline coverage-check coverage-report coverage-reset coverage-clean-report coverage-clean-data coverage-clean coverage-distclean
else
coverage:
@echo "Configuring with --enable-coverage required to generate code coverage report."
endif # WITH_GCOV
## CMake support ## CMake support
EXTRA_DIST += \ EXTRA_DIST += \
cmake/COPYING-CMAKE-SCRIPTS \ cmake/COPYING-CMAKE-SCRIPTS \
cmake/FindPackageHandleStandardArgs.cmake \ cmake/FindPackageHandleStandardArgs.cmake \
cmake/FindReadline.cmake \ cmake/FindReadline.cmake \
cmake/FindEditline.cmake \
CMakeLists.txt \ CMakeLists.txt \
config-cmake.h.in config-cmake.h.in

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,189 @@
News about PCRE releases News about PCRE releases
------------------------ ------------------------
Release 8.32 30-November-2012
-----------------------------
This release fixes a number of bugs, but also has some new features. These are
the highlights:
. There is now support for 32-bit character strings and UTF-32. Like the
16-bit support, this is done by compiling a separate 32-bit library.
. \X now matches a Unicode extended grapheme cluster.
. Case-independent matching of Unicode characters that have more than one
"other case" now makes all three (or more) characters equivalent. This
applies, for example, to Greek Sigma, which has two lowercase versions.
. Unicode character properties are updated to Unicode 6.2.0.
. The EBCDIC support, which had decayed, has had a spring clean.
. A number of JIT optimizations have been added, which give faster JIT
execution speed. In addition, a new direct interface to JIT execution is
available. This bypasses some of the sanity checks of pcre_exec() to give a
noticeable speed-up.
. A number of issues in pcregrep have been fixed, making it more compatible
with GNU grep. In particular, --exclude and --include (and variants) apply
to all files now, not just those obtained from scanning a directory
recursively. In Windows environments, the default action for directories is
now "skip" instead of "read" (which provokes an error).
. If the --only-matching (-o) option in pcregrep is specified multiple
times, each one causes appropriate output. For example, -o1 -o2 outputs the
substrings matched by the 1st and 2nd capturing parentheses. A separating
string can be specified by --om-separator (default empty).
. When PCRE is built via Autotools using a version of gcc that has the
"visibility" feature, it is used to hide internal library functions that are
not part of the public API.
Release 8.31 06-July-2012
-------------------------
This is mainly a bug-fixing release, with a small number of developments:
. The JIT compiler now supports partial matching and the (*MARK) and
(*COMMIT) verbs.
. PCRE_INFO_MAXLOOKBEHIND can be used to find the longest lookbehind in a
pattern.
. There should be a performance improvement when using the heap instead of the
stack for recursion.
. pcregrep can now be linked with libedit as an alternative to libreadline.
. pcregrep now has a --file-list option where the list of files to scan is
given as a file.
. pcregrep now recognizes binary files and there are related options.
. The Unicode tables have been updated to 6.1.0.
As always, the full list of changes is in the ChangeLog file.
Release 8.30 04-February-2012
-----------------------------
Release 8.30 introduces a major new feature: support for 16-bit character
strings, compiled as a separate library. There are a few changes to the
8-bit library, in addition to some bug fixes.
. The pcre_info() function, which has been obsolete for over 10 years, has
been removed.
. When a compiled pattern was saved to a file and later reloaded on a host
with different endianness, PCRE used automatically to swap the bytes in some
of the data fields. With the advent of the 16-bit library, where more of this
swapping is needed, it is no longer done automatically. Instead, the bad
endianness is detected and a specific error is given. The user can then call
a new function called pcre_pattern_to_host_byte_order() (or an equivalent
16-bit function) to do the swap.
. In UTF-8 mode, the values 0xd800 to 0xdfff are not legal Unicode
code points and are now faulted. (They are the so-called "surrogates"
that are reserved for coding high values in UTF-16.)
Release 8.21 12-Dec-2011
------------------------
This is almost entirely a bug-fix release. The only new feature is the ability
to obtain the size of the memory used by the JIT compiler.
Release 8.20 21-Oct-2011
------------------------
The main change in this release is the inclusion of Zoltan Herczeg's
just-in-time compiler support, which can be accessed by building PCRE with
--enable-jit. Large performance benefits can be had in many situations. 8.20
also fixes an unfortunate bug that was introduced in 8.13 as well as tidying up
a number of infelicities and differences from Perl.
Release 8.13 16-Aug-2011
------------------------
This is mainly a bug-fix release. There has been a lot of internal refactoring.
The Unicode tables have been updated. The only new feature in the library is
the passing of *MARK information to callouts. Some additions have been made to
pcretest to make testing easier and more comprehensive. There is a new option
for pcregrep to adjust its internal buffer size.
Release 8.12 15-Jan-2011
------------------------
This release fixes some bugs in pcregrep, one of which caused the tests to fail
on 64-bit big-endian systems. There are no changes to the code of the library.
Release 8.11 10-Dec-2010
------------------------
A number of bugs in the library and in pcregrep have been fixed. As always, see
ChangeLog for details. The following are the non-bug-fix changes:
. Added --match-limit and --recursion-limit to pcregrep.
. Added an optional parentheses number to the -o and --only-matching options
of pcregrep.
. Changed the way PCRE_PARTIAL_HARD affects the matching of $, \z, \Z, \b, and
\B.
. Added PCRE_ERROR_SHORTUTF8 to make it possible to distinguish between a
bad UTF-8 sequence and one that is incomplete when using PCRE_PARTIAL_HARD.
. Recognize (*NO_START_OPT) at the start of a pattern to set the PCRE_NO_
START_OPTIMIZE option, which is now allowed at compile time
Release 8.10 25-Jun-2010
------------------------
There are two major additions: support for (*MARK) and friends, and the option
PCRE_UCP, which changes the behaviour of \b, \d, \s, and \w (and their
opposites) so that they make use of Unicode properties. There are also a number
of lesser new features, and several bugs have been fixed. A new option,
--line-buffered, has been added to pcregrep, for use when it is connected to
pipes.
Release 8.02 19-Mar-2010
------------------------
Another bug-fix release.
Release 8.01 19-Jan-2010
------------------------
This is a bug-fix release. Several bugs in the code itself and some bugs and
infelicities in the build system have been fixed.
Release 8.00 19-Oct-09
----------------------
Bugs have been fixed in the library and in pcregrep. There are also some
enhancements. Restrictions on patterns used for partial matching have been
removed, extra information is given for partial matches, the partial matching
process has been improved, and an option to make a partial match override a
full match is available. The "study" process has been enhanced by finding a
lower bound matching length. Groups with duplicate numbers may now have
duplicated names without the use of PCRE_DUPNAMES. However, they may not have
different names. The documentation has been revised to reflect these changes.
The version number has been expanded to 3 digits as it is clear that the rate
of change is not slowing down.
Release 7.9 11-Apr-09 Release 7.9 11-Apr-09
--------------------- ---------------------

View File

@ -0,0 +1,639 @@
Building PCRE without using autotools
-------------------------------------
This document contains the following sections:
General
Generic instructions for the PCRE C library
The C++ wrapper functions
Building for virtual Pascal
Stack size in Windows environments
Linking programs in Windows environments
Comments about Win32 builds
Building PCRE on Windows with CMake
Use of relative paths with CMake on Windows
Testing with RunTest.bat
Building under Windows with BCC5.5
Building PCRE on OpenVMS
Building PCRE on Stratus OpenVOS
Building PCRE on native z/OS and z/VM
GENERAL
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
libraries work. The items in the PCRE distribution and Makefile that relate to
anything other than Linux systems are untested by me.
There are some other comments and files (including some documentation in CHM
format) in the Contrib directory on the FTP site:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
The basic PCRE library consists entirely of code written in Standard C, and so
should compile successfully on any system that has a Standard C compiler and
library. The C++ wrapper functions are a separate issue (see below).
The PCRE distribution includes a "configure" file for use by the configure/make
(autotools) build system, as found in many Unix-like environments. The README
file contains information about the options for "configure".
There is also support for CMake, which some users prefer, especially in Windows
environments, though it can also be run in Unix-like environments. See the
section entitled "Building PCRE on Windows with CMake" below.
Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
names config.h.generic and pcre.h.generic. These are provided for those who
build PCRE without using "configure" or CMake. If you use "configure" or CMake,
the .generic versions are not used.
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
The following are generic instructions for building the PCRE C library "by
hand". If you are going to use CMake, this section does not apply to you; you
can skip ahead to the CMake section.
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
settings that it contains to whatever is appropriate for your environment.
In particular, you can alter the definition of the NEWLINE macro to
specify what character(s) you want to be interpreted as line terminators.
In an EBCDIC environment, you MUST change NEWLINE, because its default
value is 10, an ASCII LF. The usual EBCDIC newline character is 21 (0x15,
NL), though in some cases it may be 37 (0x25).
When you compile any of the PCRE modules, you must specify -DHAVE_CONFIG_H
to your compiler so that config.h is included in the sources.
An alternative approach is not to edit config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in config.h are used has changed between releases. (In the configure/make
world, this is handled automatically.) When upgrading to a new release,
you are strongly advised to review config.h.generic before re-using what
you had previously.
(2) Copy or rename the file pcre.h.generic as pcre.h.
(3) EITHER:
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
OR:
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
you have set up config.h), and then run it with the single argument
"pcre_chartables.c". This generates a set of standard character tables
and writes them to that file. The tables are generated using the default
C locale for your system. If you want to use a locale that is specified
by LC_xxx environment variables, add the -L option to the dftables
command. You must use this method if you are building on a system that
uses EBCDIC code.
The tables in pcre_chartables.c are defaults. The caller of PCRE can
specify alternative tables at run time.
(4) Ensure that you have the following header files:
pcre_internal.h
ucp.h
(5) For an 8-bit library, compile the following source files, setting
-DHAVE_CONFIG_H as a compiler option if you have set up config.h with your
configuration, or else use other -D settings to change the configuration
as required.
pcre_byte_order.c
pcre_chartables.c
pcre_compile.c
pcre_config.c
pcre_dfa_exec.c
pcre_exec.c
pcre_fullinfo.c
pcre_get.c
pcre_globals.c
pcre_jit_compile.c
pcre_maketables.c
pcre_newline.c
pcre_ord2utf8.c
pcre_refcount.c
pcre_string_utils.c
pcre_study.c
pcre_tables.c
pcre_ucd.c
pcre_valid_utf8.c
pcre_version.c
pcre_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE header files are first
sought in the current directory. Otherwise you run the risk of picking up
a previously-installed file from somewhere else.
Note that you must still compile pcre_jit_compile.c, even if you have not
defined SUPPORT_JIT in config.h, because when JIT support is not
configured, dummy functions are compiled. When JIT support IS configured,
pcre_jit_compile.c #includes sources from the sljit subdirectory, where
there should be 16 files, all of whose names begin with "sljit".
(6) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the basic PCRE C 8-bit library.
If your system has static and shared libraries, you may have to do this
once for each type.
(7) If you want to build a 16-bit library (as well as, or instead of the 8-bit
or 32-bit libraries) repeat steps 5-6 with the following files:
pcre16_byte_order.c
pcre16_chartables.c
pcre16_compile.c
pcre16_config.c
pcre16_dfa_exec.c
pcre16_exec.c
pcre16_fullinfo.c
pcre16_get.c
pcre16_globals.c
pcre16_jit_compile.c
pcre16_maketables.c
pcre16_newline.c
pcre16_ord2utf16.c
pcre16_refcount.c
pcre16_string_utils.c
pcre16_study.c
pcre16_tables.c
pcre16_ucd.c
pcre16_utf16_utils.c
pcre16_valid_utf16.c
pcre16_version.c
pcre16_xclass.c
(7') If you want to build a 16-bit library (as well as, or instead of the 8-bit
or 32-bit libraries) repeat steps 5-6 with the following files:
pcre32_byte_order.c
pcre32_chartables.c
pcre32_compile.c
pcre32_config.c
pcre32_dfa_exec.c
pcre32_exec.c
pcre32_fullinfo.c
pcre32_get.c
pcre32_globals.c
pcre32_jit_compile.c
pcre32_maketables.c
pcre32_newline.c
pcre32_ord2utf32.c
pcre32_refcount.c
pcre32_string_utils.c
pcre32_study.c
pcre32_tables.c
pcre32_ucd.c
pcre32_utf32_utils.c
pcre32_valid_utf32.c
pcre32_version.c
pcre32_xclass.c
(8) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcreposix.h file and then compile
pcreposix.c (remembering -DHAVE_CONFIG_H if necessary). Link the result
(on its own) as the pcreposix library.
(9) The pcretest program can be linked with any combination of the 8-bit, 16-bit
and 32-bit libraries (depending on what you selected in config.h). Compile
pcretest.c and pcre_printint.c (again, don't forget -DHAVE_CONFIG_H) and
link them together with the appropriate library/ies. If you compiled an
8-bit library, pcretest also needs the pcreposix wrapper library unless
you compiled it with -DNOPOSIX.
(10) Run pcretest on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. There are
comments about what each test does in the section entitled "Testing PCRE"
in the README file. If you compiled more than one of the 8-bit, 16-bit and
32-bit libraries, you need to run pcretest with the -16 option to do 16-bit
tests and with the -32 option to do 32-bit tests.
Some tests are relevant only when certain build-time options are selected.
For example, test 4 is for UTF-8/UTF-16/UTF-32 support, and will not run if
you have built PCRE without it. See the comments at the start of each
testinput file. If you have a suitable Unix-like shell, the RunTest script
will run the appropriate tests for you.
Note that the supplied files are in Unix format, with just LF characters
as line terminators. You may need to edit them to change this if your
system uses a different convention. If you are using Windows, you probably
should use the wintestinput3 file instead of testinput3 (and the
corresponding output file). This is a locale test; wintestinput3 sets the
locale to "french" rather than "fr_FR", and there some minor output
differences.
(11) If you have built PCRE with SUPPORT_JIT, the JIT features will be tested
by the testdata files. However, you might also like to build and run
the JIT test program, pcre_jit_test.c.
(12) If you want to use the pcregrep command, compile and link pcregrep.c; it
uses only the basic 8-bit PCRE library (it does not need the pcreposix
library).
THE C++ WRAPPER FUNCTIONS
The PCRE distribution also contains some C++ wrapper functions and tests,
applicable to the 8-bit library, which were contributed by Google Inc. On a
system that can use "configure" and "make", the functions are automatically
built into a library called pcrecpp. It should be straightforward to compile
the .cc files manually on other systems. The files called xxx_unittest.cc are
test programs for each of the corresponding xxx.cc files.
BUILDING FOR VIRTUAL PASCAL
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
additional files. The following files in the distribution are for building PCRE
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
STACK SIZE IN WINDOWS ENVIRONMENTS
The default processor stack size of 1Mb in some Windows environments is too
small for matching patterns that need much recursion. In particular, test 2 may
fail because of this. Normally, running out of stack causes a crash, but there
have been cases where the test program has just died silently. See your linker
documentation for how to increase stack size if you experience problems. The
Linux default of 8Mb is a reasonable choice for the stack, though even that can
be too small for some pattern/subject combinations.
PCRE has a compile configuration option to disable the use of stack for
recursion so that heap is used instead. However, pattern matching is
significantly slower when this is done. There is more about stack usage in the
"pcrestack" documentation.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE library in the form of
a non-dll .a file, you must define PCRE_STATIC before including pcre.h or
pcrecpp.h, otherwise the pcre_malloc() and pcre_free() exported functions will
be declared __declspec(dllimport), with unwanted results.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in the
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE ON WINDOWS WITH CMAKE")
There are two ways of building PCRE using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE under Windows.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
The Cygwin DLL currently works with all recent, commercially released x86 32
bit and 64 bit versions of Windows, with the exception of Windows CE.
On both MinGW and Cygwin, PCRE should build correctly using:
./configure && make && make install
This should create two libraries called libpcre and libpcreposix, and, if you
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
independent libraries: when you link with libpcreposix or libpcrecpp you must
also link with libpcre, which contains the basic functions. (Some earlier
releases of PCRE included the basic libpcre functions in libpcreposix. This no
longer happens.)
A user submitted a special-purpose patch that makes it easy to create
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
as a special target. If you use this target, no other files are built, and in
particular, the pcretest and pcregrep programs are not built. An example of how
this might be used is:
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE are in UNIX format, with LF
characters as line terminators. Unless your PCRE library uses a default newline
option that includes LF as a valid newline, it may be necessary to change the
line terminators in the test files to get some of the tests to work.
BUILDING PCRE ON WINDOWS WITH CMAKE
CMake is an alternative configuration facility that can be used instead of
"configure". CMake creates project files (make files, solution files, etc.)
tailored to numerous development environments, including Visual Studio,
Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE source and build
directories.
The following instructions were contributed by a PCRE user. If they are not
followed exactly, errors may occur. In the event that errors do occur, it is
recommended that you delete the CMake cache before attempting to repeat the
CMake build process. In the CMake GUI, the cache can be deleted by selecting
"File > Delete Cache".
1. Install the latest CMake version available from http://www.cmake.org/, and
ensure that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE source tree into a source
directory such as C:\pcre. You should ensure your local date and time
is not earlier than the file dates in your source dir if the release is
very new.
3. Create a new, empty build directory, preferably a subdirectory of the
source dir. For example, C:\pcre\pcre-xx\build.
4. Run cmake-gui from the Shell envirornment of your build tool, for example,
Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
to start Cmake from the Windows Start menu, as this can lead to errors.
5. Enter C:\pcre\pcre-xx and C:\pcre\pcre-xx\build for the source and build
directories, respectively.
6. Hit the "Configure" button.
7. Select the particular IDE / build tool that you are using (Visual
Studio, MSYS makefiles, MinGW makefiles, etc.)
8. The GUI will then list several configuration options. This is where
you can enable UTF-8 support or other PCRE optional features.
9. Hit "Configure" again. The adjacent "Generate" button should now be
active.
10. Hit "Generate".
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc. Exit from
cmake-gui and use the generated build system with your compiler or IDE.
E.g., for MinGW you can run "make", or for Visual Studio, open the PCRE
solution, select the desired configuration (Debug, or Release, etc.) and
build the ALL_BUILD project.
12. If during configuration with cmake-gui you've elected to build the test
programs, you can execute them by building the test project. E.g., for
MinGW: "make test"; for Visual Studio build the RUN_TESTS project. The
most recent build configuration is targeted by the tests. A summary of
test results is presented. Complete test output is subsequently
available for review in Testing\Temporary under your build dir.
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
A PCRE user comments as follows:
I thought that others may want to know the current state of
CMAKE_USE_RELATIVE_PATHS support on Windows.
Here it is:
-- AdditionalIncludeDirectories is only partially modified (only the
first path - see below)
-- Only some of the contained file paths are modified - shown below for
pcre.vcproj
-- It properly modifies
I am sure CMake people can fix that if they want to. Until then one will
need to replace existing absolute paths in project files with relative
paths manually (e.g. from VS) - relative to project file location. I did
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
deal.
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
RelativePath="pcre.h">
RelativePath="pcre_chartables.c">
RelativePath="pcre_chartables.c.rule">
TESTING WITH RUNTEST.BAT
If configured with CMake, building the test project ("make test" or building
ALL_TESTS in Visual Studio) creates (and runs) pcre_test.bat (and depending
on your configuration options, possibly other test programs) in the build
directory. Pcre_test.bat runs RunTest.Bat with correct source and exe paths.
For manual testing with RunTest.bat, provided the build dir is a subdirectory
of the source directory: Open command shell window. Chdir to the location
of your pcretest.exe and pcregrep.exe programs. Call RunTest.bat with
"..\RunTest.Bat" or "..\..\RunTest.bat" as appropriate.
To run only a particular test with RunTest.Bat provide a test number argument.
Otherwise:
1. Copy RunTest.bat into the directory where pcretest.exe and pcregrep.exe
have been created.
2. Edit RunTest.bat to indentify the full or relative location of
the pcre source (wherein which the testdata folder resides), e.g.:
set srcdir=C:\pcre\pcre-8.20
3. In a Windows command environment, chdir to the location of your bat and
exe programs.
4. Run RunTest.bat. Test outputs will automatically be compared to expected
results, and discrepancies will be identified in the console output.
To independently test the just-in-time compiler, run pcre_jit_test.exe.
To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
pcre_scanner_unittest.exe.
BUILDING UNDER WINDOWS WITH BCC5.5
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
Some of the core BCC libraries have a version of PCRE from 1998 built in,
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
version mismatch. I'm including an easy workaround below, if you'd like to
include it in the non-unix instructions:
When linking a project with BCC5.5, pcre.lib must be included before any of
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
line.
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
Vincent Richomme sent a zip archive of files to help with this process. They
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
site.
BUILDING PCRE ON OPENVMS
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
relate to an older version of PCRE that used fewer source files, so the exact
commands will need changing. See the current list of source files above.
"It was quite easy to compile and link the library. I don't have a formal
make file but the attached file [reproduced below] contains the OpenVMS DCL
commands I used to build the library. I had to add #define
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
The library was built on:
O/S: HP OpenVMS v7.3-1
Compiler: Compaq C v6.5-001-48BCD
Linker: vA13-01
The test results did not match 100% due to the issues you mention in your
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
modified some of the character tables temporarily and was able to get the
results to match. Tests using the fr locale did not match since I don't have
that locale loaded. The study size was always reported to be 3 less than the
value in the standard test output files."
=========================
$! This DCL procedure builds PCRE on OpenVMS
$!
$! I followed the instructions in the non-unix-use file in the distribution.
$!
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
$ COMPILE DFTABLES.C
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
$ COMPILE MAKETABLES.C
$ COMPILE GET.C
$ COMPILE STUDY.C
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
$! did not seem to be defined anywhere.
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
$ COMPILE PCRE.C
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
$! did not seem to be defined anywhere.
$ COMPILE PCREPOSIX.C
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
$ COMPILE PCRETEST.C
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
$! C programs that want access to command line arguments must be
$! defined as a symbol
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
$! Arguments must be enclosed in quotes.
$ PCRETEST "-C"
$! Test results:
$!
$! The test results did not match 100%. The functions isprint(), iscntrl(),
$! isgraph() and ispunct() on OpenVMS must not produce the same results
$! as the system that built the test output files provided with the
$! distribution.
$!
$! The study size did not match and was always 3 less on OpenVMS.
$!
$! Locale could not be set to fr
$!
=========================
BUILDING PCRE ON STRATUS OPENVOS
These notes on the port of PCRE to VOS (lightly edited) were supplied by
Ashutosh Warikoo, whose email address has the local part awarikoo and the
domain nse.co.in. The port was for version 7.9 in August 2009.
1. Building PCRE
I built pcre on OpenVOS Release 17.0.1at using GNU Tools 3.4a without any
problems. I used the following packages to build PCRE:
ftp://ftp.stratus.com/pub/vos/posix/ga/posix.save.evf.gz
Please read and follow the instructions that come with these packages. To start
the build of pcre, from the root of the package type:
./build.sh
2. Installing PCRE
Once you have successfully built PCRE, login to the SysAdmin group, switch to
the root user, and type
[ !create_dir (master_disk)>usr --if needed ]
[ !create_dir (master_disk)>usr>local --if needed ]
!gmake install
This installs PCRE and its man pages into /usr/local. You can add
(master_disk)>usr>local>bin to your command search paths, or if you are in
BASH, add /usr/local/bin to the PATH environment variable.
4. Restrictions
This port requires readline library optionally. However during the build I
faced some yet unexplored errors while linking with readline. As it was an
optional component I chose to disable it.
5. Known Problems
I ran the test suite, but you will have to be your own judge of whether this
command, and this port, suits your purposes. If you find any problems that
appear to be related to the port itself, please let me know. Please see the
build.log file in the root of the package also.
BUILDING PCRE ON NATIVE Z/OS AND Z/VM
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
applications can be supported through UNIX System Services, and in such an
environment PCRE can be built in the same way as in other systems. However, in
native z/OS (without UNIX System Services) and in z/VM, special ports are
required. For details, please see this web site:
http://www.zaconsultants.net
There is also a mirror here:
http://www.vsoft-software.com/downloads.html
==========================
Last Updated: 21 November 2012

View File

@ -1,448 +1,7 @@
Compiling PCRE on non-Unix systems Compiling PCRE on non-Unix systems
---------------------------------- ----------------------------------
This document contains the following sections: This has been renamed to better reflect its contents. Please see the file
NON-AUTOTOOLS-BUILD for details of how to build PCRE without using autotools.
General ####
Generic instructions for the PCRE C library
The C++ wrapper functions
Building for virtual Pascal
Stack size in Windows environments
Linking programs in Windows environments
Comments about Win32 builds
Building PCRE on Windows with CMake
Use of relative paths with CMake on Windows
Testing with runtest.bat
Building under Windows with BCC5.5
Building PCRE on OpenVMS
GENERAL
I (Philip Hazel) have no experience of Windows or VMS sytems and how their
libraries work. The items in the PCRE distribution and Makefile that relate to
anything other than Unix-like systems are untested by me.
There are some other comments and files (including some documentation in CHM
format) in the Contrib directory on the FTP site:
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib
If you want to compile PCRE for a non-Unix system (especially for a system that
does not support "configure" and "make" files), note that the basic PCRE
library consists entirely of code written in Standard C, and so should compile
successfully on any system that has a Standard C compiler and library. The C++
wrapper functions are a separate issue (see below).
The PCRE distribution includes a "configure" file for use by the Configure/Make
build system, as found in many Unix-like environments. There is also support
support for CMake, which some users prefer, in particular in Windows
environments. There are some instructions for CMake under Windows in the
section entitled "Building PCRE with CMake" below. CMake can also be used to
build PCRE in Unix-like systems.
GENERIC INSTRUCTIONS FOR THE PCRE C LIBRARY
The following are generic comments about building the PCRE C library "by hand".
(1) Copy or rename the file config.h.generic as config.h, and edit the macro
settings that it contains to whatever is appropriate for your environment.
In particular, if you want to force a specific value for newline, you can
define the NEWLINE macro. When you compile any of the PCRE modules, you
must specify -DHAVE_CONFIG_H to your compiler so that config.h is included
in the sources.
An alternative approach is not to edit config.h, but to use -D on the
compiler command line to make any changes that you need to the
configuration options. In this case -DHAVE_CONFIG_H must not be set.
NOTE: There have been occasions when the way in which certain parameters
in config.h are used has changed between releases. (In the configure/make
world, this is handled automatically.) When upgrading to a new release,
you are strongly advised to review config.h.generic before re-using what
you had previously.
(2) Copy or rename the file pcre.h.generic as pcre.h.
(3) EITHER:
Copy or rename file pcre_chartables.c.dist as pcre_chartables.c.
OR:
Compile dftables.c as a stand-alone program (using -DHAVE_CONFIG_H if
you have set up config.h), and then run it with the single argument
"pcre_chartables.c". This generates a set of standard character tables
and writes them to that file. The tables are generated using the default
C locale for your system. If you want to use a locale that is specified
by LC_xxx environment variables, add the -L option to the dftables
command. You must use this method if you are building on a system that
uses EBCDIC code.
The tables in pcre_chartables.c are defaults. The caller of PCRE can
specify alternative tables at run time.
(4) Ensure that you have the following header files:
pcre_internal.h
ucp.h
(5) Also ensure that you have the following file, which is #included as source
when building a debugging version of PCRE, and is also used by pcretest.
pcre_printint.src
(6) Compile the following source files, setting -DHAVE_CONFIG_H as a compiler
option if you have set up config.h with your configuration, or else use
other -D settings to change the configuration as required.
pcre_chartables.c
pcre_compile.c
pcre_config.c
pcre_dfa_exec.c
pcre_exec.c
pcre_fullinfo.c
pcre_get.c
pcre_globals.c
pcre_info.c
pcre_maketables.c
pcre_newline.c
pcre_ord2utf8.c
pcre_refcount.c
pcre_study.c
pcre_tables.c
pcre_try_flipped.c
pcre_ucd.c
pcre_valid_utf8.c
pcre_version.c
pcre_xclass.c
Make sure that you include -I. in the compiler command (or equivalent for
an unusual compiler) so that all included PCRE header files are first
sought in the current directory. Otherwise you run the risk of picking up
a previously-installed file from somewhere else.
(7) Now link all the compiled code into an object library in whichever form
your system keeps such libraries. This is the basic PCRE C library. If
your system has static and shared libraries, you may have to do this once
for each type.
(8) Similarly, compile pcreposix.c (remembering -DHAVE_CONFIG_H if necessary)
and link the result (on its own) as the pcreposix library.
(9) Compile the test program pcretest.c (again, don't forget -DHAVE_CONFIG_H).
This needs the functions in the pcre and pcreposix libraries when linking.
It also needs the pcre_printint.src source file, which it #includes.
(10) Run pcretest on the testinput files in the testdata directory, and check
that the output matches the corresponding testoutput files. Note that the
supplied files are in Unix format, with just LF characters as line
terminators. You may need to edit them to change this if your system uses
a different convention. If you are using Windows, you probably should use
the wintestinput3 file instead of testinput3 (and the corresponding output
file). This is a locale test; wintestinput3 sets the locale to "french"
rather than "fr_FR", and there some minor output differences.
(11) If you want to use the pcregrep command, compile and link pcregrep.c; it
uses only the basic PCRE library (it does not need the pcreposix library).
THE C++ WRAPPER FUNCTIONS
The PCRE distribution also contains some C++ wrapper functions and tests,
contributed by Google Inc. On a system that can use "configure" and "make",
the functions are automatically built into a library called pcrecpp. It should
be straightforward to compile the .cc files manually on other systems. The
files called xxx_unittest.cc are test programs for each of the corresponding
xxx.cc files.
BUILDING FOR VIRTUAL PASCAL
A script for building PCRE using Borland's C++ compiler for use with VPASCAL
was contributed by Alexander Tokarev. Stefan Weber updated the script and added
additional files. The following files in the distribution are for building PCRE
for use with VP/Borland: makevp_c.txt, makevp_l.txt, makevp.bat, pcregexp.pas.
STACK SIZE IN WINDOWS ENVIRONMENTS
The default processor stack size of 1Mb in some Windows environments is too
small for matching patterns that need much recursion. In particular, test 2 may
fail because of this. Normally, running out of stack causes a crash, but there
have been cases where the test program has just died silently. See your linker
documentation for how to increase stack size if you experience problems. The
Linux default of 8Mb is a reasonable choice for the stack, though even that can
be too small for some pattern/subject combinations.
PCRE has a compile configuration option to disable the use of stack for
recursion so that heap is used instead. However, pattern matching is
significantly slower when this is done. There is more about stack usage in the
"pcrestack" documentation.
LINKING PROGRAMS IN WINDOWS ENVIRONMENTS
If you want to statically link a program against a PCRE library in the form of
a non-dll .a file, you must define PCRE_STATIC before including pcre.h,
otherwise the pcre_malloc() and pcre_free() exported functions will be declared
__declspec(dllimport), with unwanted results.
CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
It is possible to compile programs to use different calling conventions using
MSVC. Search the web for "calling conventions" for more information. To make it
easier to change the calling convention for the exported functions in the
PCRE library, the macro PCRE_CALL_CONVENTION is present in all the external
definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
not set, it defaults to empty; the default calling convention is then used
(which is what is wanted most of the time).
COMMENTS ABOUT WIN32 BUILDS (see also "BUILDING PCRE WITH CMAKE" below)
There are two ways of building PCRE using the "configure, make, make install"
paradigm on Windows systems: using MinGW or using Cygwin. These are not at all
the same thing; they are completely different from each other. There is also
support for building using CMake, which some users find a more straightforward
way of building PCRE under Windows. However, the tests are not run
automatically when CMake is used.
The MinGW home page (http://www.mingw.org/) says this:
MinGW: A collection of freely available and freely distributable Windows
specific header files and import libraries combined with GNU toolsets that
allow one to produce native Windows programs that do not rely on any
3rd-party C runtime DLLs.
The Cygwin home page (http://www.cygwin.com/) says this:
Cygwin is a Linux-like environment for Windows. It consists of two parts:
. A DLL (cygwin1.dll) which acts as a Linux API emulation layer providing
substantial Linux API functionality
. A collection of tools which provide Linux look and feel.
The Cygwin DLL currently works with all recent, commercially released x86 32
bit and 64 bit versions of Windows, with the exception of Windows CE.
On both MinGW and Cygwin, PCRE should build correctly using:
./configure && make && make install
This should create two libraries called libpcre and libpcreposix, and, if you
have enabled building the C++ wrapper, a third one called libpcrecpp. These are
independent libraries: when you like with libpcreposix or libpcrecpp you must
also link with libpcre, which contains the basic functions. (Some earlier
releases of PCRE included the basic libpcre functions in libpcreposix. This no
longer happens.)
A user submitted a special-purpose patch that makes it easy to create
"pcre.dll" under mingw32 using the "msys" environment. It provides "pcre.dll"
as a special target. If you use this target, no other files are built, and in
particular, the pcretest and pcregrep programs are not built. An example of how
this might be used is:
./configure --enable-utf --disable-cpp CFLAGS="-03 -s"; make pcre.dll
Using Cygwin's compiler generates libraries and executables that depend on
cygwin1.dll. If a library that is generated this way is distributed,
cygwin1.dll has to be distributed as well. Since cygwin1.dll is under the GPL
licence, this forces not only PCRE to be under the GPL, but also the entire
application. A distributor who wants to keep their own code proprietary must
purchase an appropriate Cygwin licence.
MinGW has no such restrictions. The MinGW compiler generates a library or
executable that can run standalone on Windows without any third party dll or
licensing issues.
But there is more complication:
If a Cygwin user uses the -mno-cygwin Cygwin gcc flag, what that really does is
to tell Cygwin's gcc to use the MinGW gcc. Cygwin's gcc is only acting as a
front end to MinGW's gcc (if you install Cygwin's gcc, you get both Cygwin's
gcc and MinGW's gcc). So, a user can:
. Build native binaries by using MinGW or by getting Cygwin and using
-mno-cygwin.
. Build binaries that depend on cygwin1.dll by using Cygwin with the normal
compiler flags.
The test files that are supplied with PCRE are in Unix format, with LF
characters as line terminators. It may be necessary to change the line
terminators in order to get some of the tests to work. We hope to improve
things in this area in future.
BUILDING PCRE ON WINDOWS WITH CMAKE
CMake is an alternative build facility that can be used instead of the
traditional Unix "configure". CMake version 2.4.7 supports Borland makefiles,
MinGW makefiles, MSYS makefiles, NMake makefiles, UNIX makefiles, Visual Studio
6, Visual Studio 7, Visual Studio 8, and Watcom W8. The following instructions
were contributed by a PCRE user.
1. Download CMake 2.4.7 or above from http://www.cmake.org/, install and ensure
that cmake\bin is on your path.
2. Unzip (retaining folder structure) the PCRE source tree into a source
directory such as C:\pcre.
3. Create a new, empty build directory: C:\pcre\build\
4. Run CMakeSetup from the Shell envirornment of your build tool, e.g., Msys
for Msys/MinGW or Visual Studio Command Prompt for VC/VC++
5. Enter C:\pcre\pcre-xx and C:\pcre\build for the source and build
directories, respectively
6. Hit the "Configure" button.
7. Select the particular IDE / build tool that you are using (Visual Studio,
MSYS makefiles, MinGW makefiles, etc.)
8. The GUI will then list several configuration options. This is where you can
enable UTF-8 support, etc.
9. Hit "Configure" again. The adjacent "OK" button should now be active.
10. Hit "OK".
11. The build directory should now contain a usable build system, be it a
solution file for Visual Studio, makefiles for MinGW, etc.
USE OF RELATIVE PATHS WITH CMAKE ON WINDOWS
A PCRE user comments as follows:
I thought that others may want to know the current state of
CMAKE_USE_RELATIVE_PATHS support on Windows.
Here it is:
-- AdditionalIncludeDirectories is only partially modified (only the
first path - see below)
-- Only some of the contained file paths are modified - shown below for
pcre.vcproj
-- It properly modifies
I am sure CMake people can fix that if they want to. Until then one will
need to replace existing absolute paths in project files with relative
paths manually (e.g. from VS) - relative to project file location. I did
just that before being told to try CMAKE_USE_RELATIVE_PATHS. Not a big
deal.
AdditionalIncludeDirectories="E:\builds\pcre\build;E:\builds\pcre\pcre-7.5;"
AdditionalIncludeDirectories=".;E:\builds\pcre\pcre-7.5;"
RelativePath="pcre.h">
RelativePath="pcre_chartables.c">
RelativePath="pcre_chartables.c.rule">
TESTING WITH RUNTEST.BAT
1. Copy RunTest.bat into the directory where pcretest.exe has been created.
2. Edit RunTest.bat and insert a line that indentifies the relative location of
the pcre source, e.g.:
set srcdir=..\pcre-7.4-RC3
3. Run RunTest.bat from a command shell environment. Test outputs will
automatically be compared to expected results, and discrepancies will
identified in the console output.
4. To test pcrecpp, run pcrecpp_unittest.exe, pcre_stringpiece_unittest.exe and
pcre_scanner_unittest.exe.
BUILDING UNDER WINDOWS WITH BCC5.5
Michael Roy sent these comments about building PCRE under Windows with BCC5.5:
Some of the core BCC libraries have a version of PCRE from 1998 built in,
which can lead to pcre_exec() giving an erroneous PCRE_ERROR_NULL from a
version mismatch. I'm including an easy workaround below, if you'd like to
include it in the non-unix instructions:
When linking a project with BCC5.5, pcre.lib must be included before any of
the libraries cw32.lib, cw32i.lib, cw32mt.lib, and cw32mti.lib on the command
line.
BUILDING UNDER WINDOWS CE WITH VISUAL STUDIO 200x
Vincent Richomme sent a zip archive of files to help with this process. They
can be found in the file "pcre-vsbuild.zip" in the Contrib directory of the FTP
site.
BUILDING PCRE ON OPENVMS
Dan Mooney sent the following comments about building PCRE on OpenVMS. They
relate to an older version of PCRE that used fewer source files, so the exact
commands will need changing. See the current list of source files above.
"It was quite easy to compile and link the library. I don't have a formal
make file but the attached file [reproduced below] contains the OpenVMS DCL
commands I used to build the library. I had to add #define
POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere.
The library was built on:
O/S: HP OpenVMS v7.3-1
Compiler: Compaq C v6.5-001-48BCD
Linker: vA13-01
The test results did not match 100% due to the issues you mention in your
documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I
modified some of the character tables temporarily and was able to get the
results to match. Tests using the fr locale did not match since I don't have
that locale loaded. The study size was always reported to be 3 less than the
value in the standard test output files."
=========================
$! This DCL procedure builds PCRE on OpenVMS
$!
$! I followed the instructions in the non-unix-use file in the distribution.
$!
$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES
$ COMPILE DFTABLES.C
$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ
$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C
$ COMPILE MAKETABLES.C
$ COMPILE GET.C
$ COMPILE STUDY.C
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
$! did not seem to be defined anywhere.
$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support.
$ COMPILE PCRE.C
$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ
$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol
$! did not seem to be defined anywhere.
$ COMPILE PCREPOSIX.C
$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ
$ COMPILE PCRETEST.C
$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB
$! C programs that want access to command line arguments must be
$! defined as a symbol
$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE"
$! Arguments must be enclosed in quotes.
$ PCRETEST "-C"
$! Test results:
$!
$! The test results did not match 100%. The functions isprint(), iscntrl(),
$! isgraph() and ispunct() on OpenVMS must not produce the same results
$! as the system that built the test output files provided with the
$! distribution.
$!
$! The study size did not match and was always 3 less on OpenVMS.
$!
$! Locale could not be set to fr
$!
=========================
Last Updated: 17 March 2009
****

View File

@ -4,12 +4,15 @@
# processing of the documentation, detrails files, and creates pcre.h.generic # processing of the documentation, detrails files, and creates pcre.h.generic
# and config.h.generic (for use by builders who can't run ./configure). # and config.h.generic (for use by builders who can't run ./configure).
# You must run this script before runnning "make dist". It makes use of the # You must run this script before runnning "make dist". If its first argument
# following files: # is "doc", it stops after preparing the documentation. There are no other
# arguments. The script makes use of the following files:
# 132html A Perl script that converts a .1 or .3 man page into HTML. It # 132html A Perl script that converts a .1 or .3 man page into HTML. It
# is called from MakeRelease. It "knows" the relevant troff # "knows" the relevant troff constructs that are used in the PCRE
# constructs that are used in the PCRE man pages. # man pages.
# CheckMan A Perl script that checks man pages for typos in the mark up.
# CleanTxt A Perl script that cleans up the output of "nroff -man" by # CleanTxt A Perl script that cleans up the output of "nroff -man" by
# removing backspaces and other redundant text so as to produce # removing backspaces and other redundant text so as to produce
@ -23,11 +26,20 @@
# doc/html can be deleted and re-created from scratch. # doc/html can be deleted and re-created from scratch.
# First, sort out the documentation # First, sort out the documentation. Remove pcredemo.3 first because it won't
# pass the markup check (it is created below, using markup that none of the
# other pages use).
cd doc cd doc
echo Processing documentation echo Processing documentation
/bin/rm -f pcredemo.3
# Check the remaining man pages
perl ../CheckMan *.1 *.3
if [ $? != 0 ] ; then exit 1; fi
# Make Text form of the documentation. It needs some mangling to make it # Make Text form of the documentation. It needs some mangling to make it
# tidy for online reading. Concatenate all the .3 stuff, but omit the # tidy for online reading. Concatenate all the .3 stuff, but omit the
# individual function pages. # individual function pages.
@ -37,20 +49,22 @@ cat <<End >pcre.txt
This file contains a concatenation of the PCRE man pages, converted to plain This file contains a concatenation of the PCRE man pages, converted to plain
text format for ease of searching with a text editor, or for use on systems text format for ease of searching with a text editor, or for use on systems
that do not have a man page processor. The small individual files that give that do not have a man page processor. The small individual files that give
synopses of each function in the library have not been included. There are synopses of each function in the library have not been included. Neither has
separate text files for the pcregrep and pcretest commands. the pcredemo program. There are separate text files for the pcregrep and
pcretest commands.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
End End
echo "Making pcre.txt" echo "Making pcre.txt"
for file in pcre pcrebuild pcrematching pcreapi pcrecallout pcrecompat \ for file in pcre pcre16 pcre32 pcrebuild pcrematching pcreapi pcrecallout \
pcrepattern pcresyntax pcrepartial pcreprecompile \ pcrecompat pcrepattern pcresyntax pcreunicode pcrejit pcrepartial \
pcreperform pcreposix pcrecpp pcresample pcrestack ; do pcreprecompile pcreperform pcreposix pcrecpp pcresample \
pcrelimits pcrestack ; do
echo " Processing $file.3" echo " Processing $file.3"
nroff -c -man $file.3 >$file.rawtxt nroff -c -man $file.3 >$file.rawtxt
../CleanTxt <$file.rawtxt >>pcre.txt perl ../CleanTxt <$file.rawtxt >>pcre.txt
/bin/rm $file.rawtxt /bin/rm $file.rawtxt
echo "------------------------------------------------------------------------------" >>pcre.txt echo "------------------------------------------------------------------------------" >>pcre.txt
if [ "$file" != "pcresample" ] ; then if [ "$file" != "pcresample" ] ; then
@ -63,11 +77,46 @@ done
for file in pcretest pcregrep pcre-config ; do for file in pcretest pcregrep pcre-config ; do
echo Making $file.txt echo Making $file.txt
nroff -c -man $file.1 >$file.rawtxt nroff -c -man $file.1 >$file.rawtxt
../CleanTxt <$file.rawtxt >$file.txt perl ../CleanTxt <$file.rawtxt >$file.txt
/bin/rm $file.rawtxt /bin/rm $file.rawtxt
done done
# Make pcredemo.3 from the pcredemo.c source file
echo "Making pcredemo.3"
perl <<"END" >pcredemo.3
open(IN, "../pcredemo.c") || die "Failed to open pcredemo.c\n";
open(OUT, ">pcredemo.3") || die "Failed to open pcredemo.3\n";
print OUT ".\\\" Start example.\n" .
".de EX\n" .
". nr mE \\\\n(.f\n" .
". nf\n" .
". nh\n" .
". ft CW\n" .
"..\n" .
".\n" .
".\n" .
".\\\" End example.\n" .
".de EE\n" .
". ft \\\\n(mE\n" .
". fi\n" .
". hy \\\\n(HY\n" .
"..\n" .
".\n" .
".EX\n" ;
while (<IN>)
{
s/\\/\\e/g;
print OUT;
}
print OUT ".EE\n";
close(IN);
close(OUT);
END
if [ $? != 0 ] ; then exit 1; fi
# Make HTML form of the documentation. # Make HTML form of the documentation.
echo "Making HTML documentation" echo "Making HTML documentation"
@ -77,35 +126,42 @@ cp index.html.src html/index.html
for file in *.1 ; do for file in *.1 ; do
base=`basename $file .1` base=`basename $file .1`
echo " Making $base.html" echo " Making $base.html"
../132html -toc $base <$file >html/$base.html perl ../132html -toc $base <$file >html/$base.html
done done
# Exclude table of contents for function summaries. It seems that expr # Exclude table of contents for function summaries. It seems that expr
# forces an anchored regex. Also exclude them for small pages that have # forces an anchored regex. Also exclude them for small pages that have
# only one section. # only one section.
for file in *.3 ; do for file in *.3 ; do
base=`basename $file .3` base=`basename $file .3`
toc=-toc toc=-toc
if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
if [ "$base" = "pcresample" ] || \ if [ "$base" = "pcresample" ] || \
[ "$base" = "pcrestack" ] || \ [ "$base" = "pcrestack" ] || \
[ "$base" = "pcrecompat" ] || \ [ "$base" = "pcrecompat" ] || \
[ "$base" = "pcreperform" ] ; then [ "$base" = "pcrelimits" ] || \
[ "$base" = "pcreperform" ] || \
[ "$base" = "pcreunicode" ] ; then
toc="" toc=""
fi fi
echo " Making $base.html" echo " Making $base.html"
../132html $toc $base <$file >html/$base.html perl ../132html $toc $base <$file >html/$base.html
if [ $? != 0 ] ; then exit 1; fi if [ $? != 0 ] ; then exit 1; fi
done done
# End of documentation processing # End of documentation processing; stop if only documentation required.
cd .. cd ..
echo Documentation done echo Documentation done
if [ "$1" = "doc" ] ; then exit; fi
# These files are detrailed; do not detrail the test data because there may be # These files are detrailed; do not detrail the test data because there may be
# significant trailing spaces. The configure files are also omitted from the # significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# detrailing. # line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing. We don't bother with
# those pcre[16|32]_xx files that just define COMPILE_PCRE16 and then #include the
# common file, because they aren't going to change.
files="\ files="\
Makefile.am \ Makefile.am \
@ -117,6 +173,7 @@ files="\
AUTHORS \ AUTHORS \
NEWS \ NEWS \
NON-UNIX-USE \ NON-UNIX-USE \
NON-AUTOTOOLS-BUILD \
INSTALL \ INSTALL \
132html \ 132html \
CleanTxt \ CleanTxt \
@ -125,12 +182,13 @@ files="\
CMakeLists.txt \ CMakeLists.txt \
RunGrepTest \ RunGrepTest \
RunTest \ RunTest \
RunTest.bat \
pcre-config.in \ pcre-config.in \
libpcre.pc.in \ libpcre.pc.in \
libpcre16.pc.in \
libpcre32.pc.in \
libpcreposix.pc.in \
libpcrecpp.pc.in \ libpcrecpp.pc.in \
config.h.in \ config.h.in \
pcre_printint.src \
pcre_chartables.c.dist \ pcre_chartables.c.dist \
pcredemo.c \ pcredemo.c \
pcregrep.c \ pcregrep.c \
@ -139,7 +197,8 @@ files="\
pcreposix.c \ pcreposix.c \
pcreposix.h \ pcreposix.h \
pcre.h.in \ pcre.h.in \
pcre_internal.h pcre_internal.h \
pcre_byte_order.c \
pcre_compile.c \ pcre_compile.c \
pcre_config.c \ pcre_config.c \
pcre_dfa_exec.c \ pcre_dfa_exec.c \
@ -147,18 +206,26 @@ files="\
pcre_fullinfo.c \ pcre_fullinfo.c \
pcre_get.c \ pcre_get.c \
pcre_globals.c \ pcre_globals.c \
pcre_info.c \ pcre_jit_compile.c \
pcre_jit_test.c \
pcre_maketables.c \ pcre_maketables.c \
pcre_newline.c \ pcre_newline.c \
pcre_ord2utf8.c \ pcre_ord2utf8.c \
pcre16_ord2utf16.c \
pcre32_ord2utf32.c \
pcre_printint.c \
pcre_refcount.c \ pcre_refcount.c \
pcre_string_utils.c \
pcre_study.c \ pcre_study.c \
pcre_tables.c \ pcre_tables.c \
pcre_try_flipped.c \
pcre_ucp_searchfuncs.c \ pcre_ucp_searchfuncs.c \
pcre_valid_utf8.c \ pcre_valid_utf8.c \
pcre_version.c \ pcre_version.c \
pcre_xclass.c \ pcre_xclass.c \
pcre16_utf16_utils.c \
pcre32_utf32_utils.c \
pcre16_valid_utf16.c \
pcre32_valid_utf32.c \
pcre_scanner.cc \ pcre_scanner.cc \
pcre_scanner.h \ pcre_scanner.h \
pcre_scanner_unittest.cc \ pcre_scanner_unittest.cc \
@ -179,35 +246,7 @@ files="\
libpcreposix.def" libpcreposix.def"
echo Detrailing echo Detrailing
./Detrail $files doc/p* doc/html/* perl ./Detrail $files doc/p* doc/html/*
echo Doing basic configure to get default pcre.h and config.h
# This is in case the caller has set aliases (as I do - PH)
unset cp ls mv rm
./configure >/dev/null
echo Converting pcre.h and config.h to generic forms
cp -f pcre.h pcre.h.generic
perl <<'END'
open(IN, "<config.h") || die "Can't open config.h: $!\n";
open(OUT, ">config.h.generic") || die "Can't open config.h.generic: $!\n";
while (<IN>)
{
if (/^#define\s(?!PACKAGE)(\w+)/)
{
print OUT "#ifndef $1\n";
print OUT;
print OUT "#endif\n";
}
else
{
print OUT;
}
}
close IN;
close OUT;
END
echo Done echo Done

View File

@ -18,12 +18,14 @@ The contents of this README file are:
The PCRE APIs The PCRE APIs
Documentation for PCRE Documentation for PCRE
Contributions by users of PCRE Contributions by users of PCRE
Building PCRE on non-Unix systems Building PCRE on non-Unix-like systems
Building PCRE on Unix-like systems Building PCRE without using autotools
Retrieving configuration information on Unix-like systems Building PCRE using autotools
Shared libraries on Unix-like systems Retrieving configuration information
Cross-compiling on Unix-like systems Shared libraries
Cross-compiling using autotools
Using HP's ANSI C++ compiler (aCC) Using HP's ANSI C++ compiler (aCC)
Using PCRE from MySQL
Making new tarballs Making new tarballs
Testing PCRE Testing PCRE
Character tables Character tables
@ -33,16 +35,20 @@ The contents of this README file are:
The PCRE APIs The PCRE APIs
------------- -------------
PCRE is written in C, and it has its own API. The distribution also includes a PCRE is written in C, and it has its own API. There are three sets of functions,
set of C++ wrapper functions (see the pcrecpp man page for details), courtesy one for the 8-bit library, which processes strings of bytes, one for the
of Google Inc. 16-bit library, which processes strings of 16-bit values, and one for the 32-bit
library, which processes strings of 32-bit values. The distribution also
includes a set of C++ wrapper functions (see the pcrecpp man page for details),
courtesy of Google Inc., which can be used to call the 8-bit PCRE library from
C++.
In addition, there is a set of C wrapper functions that are based on the POSIX In addition, there is a set of C wrapper functions (again, just for the 8-bit
regular expression API (see the pcreposix man page). These end up in the library) that are based on the POSIX regular expression API (see the pcreposix
library called libpcreposix. Note that this just provides a POSIX calling man page). These end up in the library called libpcreposix. Note that this just
interface to PCRE; the regular expressions themselves still follow Perl syntax provides a POSIX calling interface to PCRE; the regular expressions themselves
and semantics. The POSIX API is restricted, and does not give full access to still follow Perl syntax and semantics. The POSIX API is restricted, and does
all of PCRE's facilities. not give full access to all of PCRE's facilities.
The header file for the POSIX-style functions is called pcreposix.h. The The header file for the POSIX-style functions is called pcreposix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems official POSIX name is regex.h, but I did not want to risk possible problems
@ -105,36 +111,45 @@ Windows (I myself do not use Windows). Nowadays there is more Windows support
in the standard distribution, so these contibutions have been archived. in the standard distribution, so these contibutions have been archived.
Building PCRE on non-Unix systems Building PCRE on non-Unix-like systems
--------------------------------- --------------------------------------
For a non-Unix system, please read the comments in the file NON-UNIX-USE, For a non-Unix-like system, please read the comments in the file
though if your system supports the use of "configure" and "make" you may be NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
able to build PCRE in the same way as for Unix-like systems. PCRE can also be "make" you may be able to build PCRE using autotools in the same way as for
configured in many platform environments using the GUI facility of CMake's many Unix-like systems.
CMakeSetup. It creates Makefiles, solution files, etc.
PCRE can also be configured using the GUI facility provided by CMake's
cmake-gui command. This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
PCRE has been compiled on many different operating systems. It should be PCRE has been compiled on many different operating systems. It should be
straightforward to build PCRE on any system that has a Standard C compiler and straightforward to build PCRE on any system that has a Standard C compiler and
library, because it uses only Standard C functions. library, because it uses only Standard C functions.
Building PCRE on Unix-like systems Building PCRE without using autotools
---------------------------------- -------------------------------------
The use of autotools (in particular, libtool) is problematic in some
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
file for ways of building PCRE without using autotools.
Building PCRE using autotools
-----------------------------
If you are using HP's ANSI C++ compiler (aCC), please see the special note If you are using HP's ANSI C++ compiler (aCC), please see the special note
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below. in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.
The following instructions assume the use of the widely used "configure, make, The following instructions assume the use of the widely used "configure; make;
make install" process. There is also support for CMake in the PCRE make install" (autotools) process.
distribution; there are some comments about using CMake in the NON-UNIX-USE
file, though it can also be used in Unix-like systems.
To build PCRE on a Unix-like system, first run the "configure" command from the To build PCRE on system that supports autotools, first run the "configure"
PCRE distribution directory, with your current directory set to the directory command from the PCRE distribution directory, with your current directory set
where you want the files to be created. This command is a standard GNU to the directory where you want the files to be created. This command is a
"autoconf" configuration script, for which generic instructions are supplied in standard GNU "autoconf" configuration script, for which generic instructions
the file INSTALL. are supplied in the file INSTALL.
Most commonly, people build PCRE within its own distribution directory, and in Most commonly, people build PCRE within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However, this case, on many systems, just running "./configure" is sufficient. However,
@ -142,9 +157,9 @@ the usual methods of changing standard defaults are available. For example:
CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local
specifies that the C compiler should be run with the flags '-O2 -Wall' instead This command specifies that the C compiler should be run with the flags '-O2
of the default, and that "make install" should install PCRE under /opt/local -Wall' instead of the default, and that "make install" should install PCRE
instead of the default /usr/local. under /opt/local instead of the default /usr/local.
If you want to build in a different directory, just run "configure" with that If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE source directory as current. For example, suppose you have unpacked the PCRE source
@ -158,27 +173,62 @@ possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this. does not have any features to support this.
There are some optional features that can be included or omitted from the PCRE There are some optional features that can be included or omitted from the PCRE
library. You can read more about them in the pcrebuild man page. library. They are also documented in the pcrebuild man page.
. If you want to suppress the building of the C++ wrapper library, you can add . By default, both shared and static libraries are built. You can change this
--disable-cpp to the "configure" command. Otherwise, when "configure" is run, by adding one of these options to the "configure" command:
it will try to find a C++ compiler and C++ header files, and if it succeeds,
it will try to build the C++ wrapper. --disable-shared
--disable-static
(See also "Shared libraries on Unix-like systems" below.)
. By default, only the 8-bit library is built. If you add --enable-pcre16 to
the "configure" command, the 16-bit library is also built. If you add
--enable-pcre32 to the "configure" command, the 32-bit library is also built.
If you want only the 16-bit or 32-bit library, use --disable-pcre8 to disable
building the 8-bit library.
. If you are building the 8-bit library and want to suppress the building of
the C++ wrapper library, you can add --disable-cpp to the "configure"
command. Otherwise, when "configure" is run without --disable-pcre8, it will
try to find a C++ compiler and C++ header files, and if it succeeds, it will
try to build the C++ wrapper.
. If you want to include support for just-in-time compiling, which can give
large performance improvements on certain platforms, add --enable-jit to the
"configure" command. This support is available only for certain hardware
architectures. If you try to enable it on an unsupported architecture, there
will be a compile time error.
. When JIT support is enabled, pcregrep automatically makes use of it, unless
you add --disable-pcregrep-jit to the "configure" command.
. If you want to make use of the support for UTF-8 Unicode character strings in . If you want to make use of the support for UTF-8 Unicode character strings in
PCRE, you must add --enable-utf8 to the "configure" command. Without it, the the 8-bit library, or UTF-16 Unicode character strings in the 16-bit library,
code for handling UTF-8 is not included in the library. Even when included, or UTF-32 Unicode character strings in the 32-bit library, you must add
it still has to be enabled by an option at run time. When PCRE is compiled --enable-utf to the "configure" command. Without it, the code for handling
with this option, its input can only either be ASCII or UTF-8, even when UTF-8, UTF-16 and UTF-8 is not included in the relevant library. Even
running on EBCDIC platforms. It is not possible to use both --enable-utf8 and when --enable-utf is included, the use of a UTF encoding still has to be
--enable-ebcdic at the same time. enabled by an option at run time. When PCRE is compiled with this option, its
input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC
platforms. It is not possible to use both --enable-utf and --enable-ebcdic at
the same time.
. If, in addition to support for UTF-8 character strings, you want to include . There are no separate options for enabling UTF-8, UTF-16 and UTF-32
support for the \P, \p, and \X sequences that recognize Unicode character independently because that would allow ridiculous settings such as requesting
properties, you must add --enable-unicode-properties to the "configure" UTF-16 support while building only the 8-bit library. However, the option
command. This adds about 30K to the size of the library (in the form of a --enable-utf8 is retained for backwards compatibility with earlier releases
property table); only the basic two-letter properties such as Lu are that did not support 16-bit or 32-bit character strings. It is synonymous with
supported. --enable-utf. It is not possible to configure one library with UTF support
and the other without in the same configuration.
. If, in addition to support for UTF-8/16/32 character strings, you want to
include support for the \P, \p, and \X sequences that recognize Unicode
character properties, you must add --enable-unicode-properties to the
"configure" command. This adds about 30K to the size of the library (in the
form of a property table); only the basic two-letter properties such as Lu
are supported.
. You can build PCRE to recognize either CR or LF or the sequence CRLF or any . You can build PCRE to recognize either CR or LF or the sequence CRLF or any
of the preceding, or any of the Unicode newline sequences as indicating the of the preceding, or any of the Unicode newline sequences as indicating the
@ -231,10 +281,12 @@ library. You can read more about them in the pcrebuild man page.
sizes in the pcrestack man page. sizes in the pcrestack man page.
. The default maximum compiled pattern size is around 64K. You can increase . The default maximum compiled pattern size is around 64K. You can increase
this by adding --with-link-size=3 to the "configure" command. You can this by adding --with-link-size=3 to the "configure" command. In the 8-bit
increase it even more by setting --with-link-size=4, but this is unlikely library, PCRE then uses three bytes instead of two for offsets to different
ever to be necessary. Increasing the internal link size will reduce parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
performance. the same as --with-link-size=4, which (in both libraries) uses four-byte
offsets. Increasing the internal link size reduces performance. In the 32-bit
library, the only supported link size is 4.
. You can build PCRE so that its internal match() function that is called from . You can build PCRE so that its internal match() function that is called from
pcre_exec() does not call itself recursively. Instead, it uses memory blocks pcre_exec() does not call itself recursively. Instead, it uses memory blocks
@ -246,9 +298,10 @@ library. You can read more about them in the pcrebuild man page.
on the "configure" command. PCRE runs more slowly in this mode, but it may be on the "configure" command. PCRE runs more slowly in this mode, but it may be
necessary in environments with limited stack sizes. This applies only to the necessary in environments with limited stack sizes. This applies only to the
pcre_exec() function; it does not apply to pcre_dfa_exec(), which does not normal execution of the pcre_exec() function; if JIT support is being
use deeply nested recursion. There is a discussion about stack sizes in the successfully used, it is not relevant. Equally, it does not apply to
pcrestack man page. pcre_dfa_exec(), which does not use deeply nested recursion. There is a
discussion about stack sizes in the pcrestack man page.
. For speed, PCRE uses four tables for manipulating and identifying characters . For speed, PCRE uses four tables for manipulating and identifying characters
whose code point values are less than 256. By default, it uses a set of whose code point values are less than 256. By default, it uses a set of
@ -262,33 +315,64 @@ library. You can read more about them in the pcrebuild man page.
pcre_chartables.c.dist. See "Character tables" below for further information. pcre_chartables.c.dist. See "Character tables" below for further information.
. It is possible to compile PCRE for use on systems that use EBCDIC as their . It is possible to compile PCRE for use on systems that use EBCDIC as their
character code (as opposed to ASCII) by specifying character code (as opposed to ASCII/Unicode) by specifying
--enable-ebcdic --enable-ebcdic
This automatically implies --enable-rebuild-chartables (see above). However, This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE is built this way, it always operates in EBCDIC. It cannot support when PCRE is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8. both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
. It is possible to compile pcregrep to use libz and/or libbz2, in order to . In environments where valgrind is installed, if you specify
read .gz and .bz2 files (respectively), by specifying one or both of
--enable-valgrind
PCRE will use valgrind annotations to mark certain memory regions as
unaddressable. This allows it to detect invalid memory accesses, and is
mostly useful for debugging PCRE itself.
. In environments where the gcc compiler is used and lcov version 1.6 or above
is installed, if you specify
--enable-coverage
the build process implements a code coverage report for the test suite. The
report is generated by running "make coverage". If ccache is installed on
your system, it must be disabled when building PCRE for coverage reporting.
You can do this by setting the environment variable CCACHE_DISABLE=1 before
running "make" to build PCRE.
. The pcregrep program currently supports only 8-bit data files, and so
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
libz and/or libbz2, in order to read .gz and .bz2 files (respectively), by
specifying one or both of
--enable-pcregrep-libz --enable-pcregrep-libz
--enable-pcregrep-libbz2 --enable-pcregrep-libbz2
Of course, the relevant libraries must be installed on your system. Of course, the relevant libraries must be installed on your system.
. It is possible to compile pcretest so that it links with the libreadline . The default size of internal buffer used by pcregrep can be set by, for
library, by specifying example:
--enable-pcretest-libreadline --with-pcregrep-bufsize=50K
The default value is 20K.
. It is possible to compile pcretest so that it links with the libreadline
or libedit libraries, by specifying, respectively,
--enable-pcretest-libreadline or --enable-pcretest-libedit
If this is done, when pcretest's input is from a terminal, it reads it using If this is done, when pcretest's input is from a terminal, it reads it using
the readline() function. This provides line-editing and history facilities. the readline() function. This provides line-editing and history facilities.
Note that libreadline is GPL-licenced, so if you distribute a binary of Note that libreadline is GPL-licenced, so if you distribute a binary of
pcretest linked in this way, there may be licensing issues. pcretest linked in this way, there may be licensing issues. These can be
avoided by linking with libedit (which has a BSD licence) instead.
Setting this option causes the -lreadline option to be added to the pcretest Enabling libreadline causes the -lreadline option to be added to the pcretest
build. In many operating environments with a sytem-installed readline build. In many operating environments with a sytem-installed readline
library this is sufficient. However, in some environments (e.g. if an library this is sufficient. However, in some environments (e.g. if an
unmodified distribution version of readline is in use), it may be necessary unmodified distribution version of readline is in use), it may be necessary
@ -301,37 +385,43 @@ library. You can read more about them in the pcrebuild man page.
The "configure" script builds the following files for the basic C library: The "configure" script builds the following files for the basic C library:
. Makefile is the makefile that builds the library . Makefile the makefile that builds the library
. config.h contains build-time configuration options for the library . config.h build-time configuration options for the library
. pcre.h is the public PCRE header file . pcre.h the public PCRE header file
. pcre-config is a script that shows the settings of "configure" options . pcre-config script that shows the building settings such as CFLAGS
. libpcre.pc is data for the pkg-config command that were set for "configure"
. libtool is a script that builds shared and/or static libraries . libpcre.pc ) data for the pkg-config command
. RunTest is a script for running tests on the basic C library . libpcre16.pc )
. RunGrepTest is a script for running tests on the pcregrep command . libpcre32.pc )
. libpcreposix.pc )
. libtool script that builds shared and/or static libraries
Versions of config.h and pcre.h are distributed in the PCRE tarballs under Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
the names config.h.generic and pcre.h.generic. These are provided for the names config.h.generic and pcre.h.generic. These are provided for those who
benefit of those who have to built PCRE without the benefit of "configure". If have to built PCRE without using "configure" or CMake. If you use "configure"
you use "configure", the .generic versions are not used. or CMake, the .generic versions are not used.
If a C++ compiler is found, the following files are also built: When building the 8-bit library, if a C++ compiler is found, the following
files are also built:
. libpcrecpp.pc is data for the pkg-config command . libpcrecpp.pc data for the pkg-config command
. pcrecpparg.h is a header file for programs that call PCRE via the C++ wrapper . pcrecpparg.h header file for calling PCRE via the C++ wrapper
. pcre_stringpiece.h is the header for the C++ "stringpiece" functions . pcre_stringpiece.h header for the C++ "stringpiece" functions
The "configure" script also creates config.status, which is an executable The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs. contains compiler output from tests that "configure" runs.
Once "configure" has run, you can run "make". It builds two libraries, called Once "configure" has run, you can run "make". This builds the the libraries
libpcre and libpcreposix, a test program called pcretest, and the pcregrep libpcre, libpcre16 and/or libpcre32, and a test program called pcretest. If you
command. If a C++ compiler was found on your system, "make" also builds the C++ enabled JIT support with --enable-jit, a test program called pcre_jit_test is
wrapper library, which is called libpcrecpp, and some test programs called built as well.
pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest.
Building the C++ wrapper can be disabled by adding --disable-cpp to the If the 8-bit library is built, libpcreposix and the pcregrep command are also
"configure" command. built, and if a C++ compiler was found on your system, and you did not disable
it with --disable-cpp, "make" builds the C++ wrapper library, which is called
libpcrecpp, as well as some test programs called pcrecpp_unittest,
pcre_scanner_unittest, and pcre_stringpiece_unittest.
The command "make check" runs all the appropriate tests. Details of the PCRE The command "make check" runs all the appropriate tests. Details of the PCRE
tests are given below in a separate section of this document. tests are given below in a separate section of this document.
@ -342,16 +432,21 @@ system. The following are installed (file names are all relative to the
Commands (bin): Commands (bin):
pcretest pcretest
pcregrep pcregrep (if 8-bit support is enabled)
pcre-config pcre-config
Libraries (lib): Libraries (lib):
libpcre libpcre16 (if 16-bit support is enabled)
libpcreposix libpcre32 (if 32-bit support is enabled)
libpcrecpp (if C++ support is enabled) libpcre (if 8-bit support is enabled)
libpcreposix (if 8-bit support is enabled)
libpcrecpp (if 8-bit and C++ support is enabled)
Configuration information (lib/pkgconfig): Configuration information (lib/pkgconfig):
libpcre16.pc
libpcre32.pc
libpcre.pc libpcre.pc
libpcreposix.pc
libpcrecpp.pc (if C++ support is enabled) libpcrecpp.pc (if C++ support is enabled)
Header files (include): Header files (include):
@ -365,6 +460,7 @@ system. The following are installed (file names are all relative to the
Man pages (share/man/man{1,3}): Man pages (share/man/man{1,3}):
pcregrep.1 pcregrep.1
pcretest.1 pcretest.1
pcre-config.1
pcre.3 pcre.3
pcre*.3 (lots more pages, all starting "pcre") pcre*.3 (lots more pages, all starting "pcre")
@ -379,17 +475,18 @@ system. The following are installed (file names are all relative to the
LICENCE LICENCE
NEWS NEWS
README README
pcre.txt (a concatenation of the man(3) pages) pcre.txt (a concatenation of the man(3) pages)
pcretest.txt the pcretest man page pcretest.txt the pcretest man page
pcregrep.txt the pcregrep man page pcregrep.txt the pcregrep man page
pcre-config.txt the pcre-config man page
If you want to remove PCRE from your system, you can run "make uninstall". If you want to remove PCRE from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs. remove any directories, because these are often shared with other programs.
Retrieving configuration information on Unix-like systems Retrieving configuration information
--------------------------------------------------------- ------------------------------------
Running "make install" installs the command pcre-config, which can be used to Running "make install" installs the command pcre-config, which can be used to
recall information about the PCRE configuration and installation. For example: recall information about the PCRE configuration and installation. For example:
@ -414,8 +511,8 @@ The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig. <prefix>/lib/pkgconfig.
Shared libraries on Unix-like systems Shared libraries
------------------------------------- ----------------
The default distribution builds PCRE as shared libraries and static libraries, The default distribution builds PCRE as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library as long as the operating system supports shared libraries. Shared library
@ -440,8 +537,8 @@ Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries. build only shared libraries.
Cross-compiling on Unix-like systems Cross-compiling using autotools
------------------------------------ -------------------------------
You can specify CC and CFLAGS in the normal way to the "configure" command, in You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE for some other host. However, you should NOT order to cross-compile PCRE for some other host. However, you should NOT
@ -478,6 +575,26 @@ running the "configure" script:
CXXLDFLAGS="-lstd_v2 -lCsup_v2" CXXLDFLAGS="-lstd_v2 -lCsup_v2"
Using Sun's compilers for Solaris
---------------------------------
A user reports that the following configurations work on Solaris 9 sparcv9 and
Solaris 9 x86 (32-bit):
Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
Solaris 9 x86: ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"
Using PCRE from MySQL
---------------------
On systems where both PCRE and MySQL are installed, it is possible to make use
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
There is a web page that tells you how to do this:
http://www.mysqludf.org/lib_mysqludf_preg/index.php
Making new tarballs Making new tarballs
------------------- -------------------
@ -493,30 +610,49 @@ script creates the .txt and HTML forms of the documentation from the man pages.
Testing PCRE Testing PCRE
------------ ------------
To test the basic PCRE library on a Unix system, run the RunTest script that is To test the basic PCRE library on a Unix-like system, run the RunTest script.
created by the configuring process. There is also a script called RunGrepTest There is another script called RunGrepTest that tests the options of the
that tests the options of the pcregrep command. If the C++ wrapper library is pcregrep command. If the C++ wrapper library is built, three test programs
built, three test programs called pcrecpp_unittest, pcre_scanner_unittest, and called pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest
pcre_stringpiece_unittest are also built. are also built. When JIT support is enabled, another test program called
pcre_jit_test is built.
Both the scripts and all the program tests are run if you obey "make check" or Both the scripts and all the program tests are run if you obey "make check" or
"make test". For other systems, see the instructions in NON-UNIX-USE. "make test". For other environments, see the instructions in
NON-AUTOTOOLS-BUILD.
The RunTest script runs the pcretest test program (which is documented in its The RunTest script runs the pcretest test program (which is documented in its
own man page) on each of the testinput files in the testdata directory in own man page) on each of the relevant testinput files in the testdata
turn, and compares the output with the contents of the corresponding testoutput directory, and compares the output with the contents of the corresponding
files. A file called testtry is used to hold the main output from pcretest testoutput files. Some tests are relevant only when certain build-time options
(testsavedregex is also used as a working file). To run pcretest on just one of were selected. For example, the tests for UTF-8/16/32 support are run only if
the test files, give its number as an argument to RunTest, for example: --enable-utf was used. RunTest outputs a comment when it skips a test.
RunTest 2 Many of the tests that are not skipped are run up to three times. The second
run forces pcre_study() to be called for all patterns except for a few in some
tests that are marked "never study" (see the pcretest program for how this is
done). If JIT support is available, the non-DFA tests are run a third time,
this time with a forced pcre_study() with the PCRE_STUDY_JIT_COMPILE option.
The first test file can also be fed directly into the perltest.pl script to The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
check that Perl gives the same results. The only difference you should see is libraries that are enabled. If you want to run just one set of tests, call
in the first few lines, where the Perl version is given instead of the PCRE RunTest with either the -8, -16 or -32 option.
version.
The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(), RunTest uses a file called testtry to hold the main output from pcretest.
Other files whose names begin with "test" are used as working files in some
tests. To run pcretest on just one or more specific test files, give their
numbers as arguments to RunTest, for example:
RunTest 2 7 11
You can also call RunTest with the single argument "list" to cause it to output
a list of tests.
The first test file can be fed directly into the perltest.pl script to check
that Perl gives the same results. The only difference you should see is in the
first few lines, where the Perl version is given instead of the PCRE version.
The second set of tests check pcre_fullinfo(), pcre_study(),
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
detection, and run-time flags that are specific to PCRE, as well as the POSIX detection, and run-time flags that are specific to PCRE, as well as the POSIX
wrapper API. It also uses the debugging flags to check some of the internals of wrapper API. It also uses the debugging flags to check some of the internals of
@ -551,24 +687,38 @@ RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
Windows versions of test 2. More info on using RunTest.bat is included in the Windows versions of test 2. More info on using RunTest.bat is included in the
document entitled NON-UNIX-USE.] document entitled NON-UNIX-USE.]
The fourth test checks the UTF-8 support. It is not run automatically unless The fourth and fifth tests check the UTF-8/16/32 support and error handling and
PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when internal UTF features of PCRE that are not relevant to Perl, respectively. The
running "configure". This file can be also fed directly to the perltest script, sixth and seventh tests do the same for Unicode character properties support.
provided you are running Perl 5.8 or higher. (For Perl 5.6, a small patch,
commented in the script, can be be used.)
The fifth test checks error handling with UTF-8 encoding, and internal UTF-8 The eighth, ninth, and tenth tests check the pcre_dfa_exec() alternative
features of PCRE that are not relevant to Perl. matching function, in non-UTF-8/16/32 mode, UTF-8/16/32 mode, and UTF-8/16/32
mode with Unicode property support, respectively.
The sixth test checks the support for Unicode character properties. It it not The eleventh test checks some internal offsets and code size features; it is
run automatically unless PCRE is built with Unicode property support. To to run only when the default "link size" of 2 is set (in other cases the sizes
this you must set --enable-unicode-properties when running "configure". change) and when Unicode property support is enabled.
The seventh, eighth, and ninth tests check the pcre_dfa_exec() alternative The twelfth test is run only when JIT support is available, and the thirteenth
matching function, in non-UTF-8 mode, UTF-8 mode, and UTF-8 mode with Unicode test is run only when JIT support is not available. They test some JIT-specific
property support, respectively. The eighth and ninth tests are not run features such as information output from pcretest about JIT compilation.
automatically unless PCRE is build with the relevant support.
The fourteenth, fifteenth, and sixteenth tests are run only in 8-bit mode, and
the seventeenth, eighteenth, and nineteenth tests are run only in 16/32-bit mode.
These are tests that generate different output in the two modes. They are for
general cases, UTF-8/16/32 support, and Unicode property support, respectively.
The twentieth test is run only in 16/32-bit mode. It tests some specific
16/32-bit features of the DFA matching engine.
The twenty-first and twenty-second tests are run only in 16/32-bit mode, when the
link size is set to 2 for the 16-bit library. They test reloading pre-compiled patterns.
The twenty-third and twenty-fourth tests are run only in 16-bit mode. They are for
general cases, and UTF-16 support, respectively.
The twenty-fifth and twenty-sixth tests are run only in 32-bit mode. They are for
general cases, and UTF-32 support, respectively.
Character tables Character tables
---------------- ----------------
@ -627,7 +777,9 @@ will cause PCRE to malfunction.
File manifest File manifest
------------- -------------
The distribution should contain the following files: The distribution should contain the files listed below. Where a file name is
given as pcre[16|32]_xxx it means that there are three files, one with the name
pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
(A) Source files of the PCRE library functions and their headers: (A) Source files of the PCRE library functions and their headers:
@ -636,33 +788,42 @@ The distribution should contain the following files:
pcre_chartables.c.dist a default set of character tables that assume ASCII pcre_chartables.c.dist a default set of character tables that assume ASCII
coding; used, unless --enable-rebuild-chartables is coding; used, unless --enable-rebuild-chartables is
specified, by copying to pcre_chartables.c specified, by copying to pcre[16]_chartables.c
pcreposix.c )
pcre[16|32]_byte_order.c )
pcre[16|32]_compile.c )
pcre[16|32]_config.c )
pcre[16|32]_dfa_exec.c )
pcre[16|32]_exec.c )
pcre[16|32]_fullinfo.c )
pcre[16|32]_get.c ) sources for the functions in the library,
pcre[16|32]_globals.c ) and some internal functions that they use
pcre[16|32]_jit_compile.c )
pcre[16|32]_maketables.c )
pcre[16|32]_newline.c )
pcre[16|32]_refcount.c )
pcre[16|32]_string_utils.c )
pcre[16|32]_study.c )
pcre[16|32]_tables.c )
pcre[16|32]_ucd.c )
pcre[16|32]_version.c )
pcre[16|32]_xclass.c )
pcre_ord2utf8.c )
pcre_valid_utf8.c )
pcre16_ord2utf16.c )
pcre16_utf16_utils.c )
pcre16_valid_utf16.c )
pcre32_utf32_utils.c )
pcre32_valid_utf32.c )
pcre[16|32]_printint.c ) debugging function that is used by pcretest,
) and can also be #included in pcre_compile()
pcreposix.c )
pcre_compile.c )
pcre_config.c )
pcre_dfa_exec.c )
pcre_exec.c )
pcre_fullinfo.c )
pcre_get.c ) sources for the functions in the library,
pcre_globals.c ) and some internal functions that they use
pcre_info.c )
pcre_maketables.c )
pcre_newline.c )
pcre_ord2utf8.c )
pcre_refcount.c )
pcre_study.c )
pcre_tables.c )
pcre_try_flipped.c )
pcre_ucd.c )
pcre_valid_utf8.c )
pcre_version.c )
pcre_xclass.c )
pcre_printint.src ) debugging function that is #included in pcretest,
) and can also be #included in pcre_compile()
pcre.h.in template for pcre.h when built by "configure" pcre.h.in template for pcre.h when built by "configure"
pcreposix.h header for the external POSIX wrapper API pcreposix.h header for the external POSIX wrapper API
pcre_internal.h header for internal use pcre_internal.h header for internal use
sljit/* 16 files that make up the JIT compiler
ucp.h header for Unicode property handling ucp.h header for Unicode property handling
config.h.in template for config.h, which is built by "configure" config.h.in template for config.h, which is built by "configure"
@ -699,7 +860,8 @@ The distribution should contain the following files:
Makefile.am ) the automake input that was used to create Makefile.am ) the automake input that was used to create
) Makefile.in ) Makefile.in
NEWS important changes in this release NEWS important changes in this release
NON-UNIX-USE notes on building PCRE on non-Unix systems NON-UNIX-USE the previous name for NON-AUTOTOOLS-BUILD
NON-AUTOTOOLS-BUILD notes on building PCRE without using autotools
PrepareRelease script to make preparations for "make dist" PrepareRelease script to make preparations for "make dist"
README this file README this file
RunTest a Unix shell script for running tests RunTest a Unix shell script for running tests
@ -712,7 +874,7 @@ The distribution should contain the following files:
) "configure" and config.h ) "configure" and config.h
depcomp ) script to find program dependencies, generated by depcomp ) script to find program dependencies, generated by
) automake ) automake
doc/*.3 man page sources for the PCRE functions doc/*.3 man page sources for PCRE
doc/*.1 man page sources for pcregrep and pcretest doc/*.1 man page sources for pcregrep and pcretest
doc/index.html.src the base HTML page doc/index.html.src the base HTML page
doc/html/* HTML documentation doc/html/* HTML documentation
@ -720,7 +882,10 @@ The distribution should contain the following files:
doc/pcretest.txt plain text documentation of test program doc/pcretest.txt plain text documentation of test program
doc/perltest.txt plain text documentation of Perl test program doc/perltest.txt plain text documentation of Perl test program
install-sh a shell script for installing files install-sh a shell script for installing files
libpcre16.pc.in template for libpcre16.pc for pkg-config
libpcre32.pc.in template for libpcre32.pc for pkg-config
libpcre.pc.in template for libpcre.pc for pkg-config libpcre.pc.in template for libpcre.pc for pkg-config
libpcreposix.pc.in template for libpcreposix.pc for pkg-config
libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config libpcrecpp.pc.in template for libpcrecpp.pc for pkg-config
ltmain.sh file used to build a libtool script ltmain.sh file used to build a libtool script
missing ) common stub for a few missing GNU programs while missing ) common stub for a few missing GNU programs while
@ -728,17 +893,20 @@ The distribution should contain the following files:
mkinstalldirs script for making install directories mkinstalldirs script for making install directories
perltest.pl Perl test program perltest.pl Perl test program
pcre-config.in source of script which retains PCRE information pcre-config.in source of script which retains PCRE information
pcre_jit_test.c test program for the JIT compiler
pcrecpp_unittest.cc ) pcrecpp_unittest.cc )
pcre_scanner_unittest.cc ) test programs for the C++ wrapper pcre_scanner_unittest.cc ) test programs for the C++ wrapper
pcre_stringpiece_unittest.cc ) pcre_stringpiece_unittest.cc )
testdata/testinput* test data for main library tests testdata/testinput* test data for main library tests
testdata/testoutput* expected test results testdata/testoutput* expected test results
testdata/grep* input and output for pcregrep tests testdata/grep* input and output for pcregrep tests
testdata/* other supporting test files
(D) Auxiliary files for cmake support (D) Auxiliary files for cmake support
cmake/COPYING-CMAKE-SCRIPTS cmake/COPYING-CMAKE-SCRIPTS
cmake/FindPackageHandleStandardArgs.cmake cmake/FindPackageHandleStandardArgs.cmake
cmake/FindEditline.cmake
cmake/FindReadline.cmake cmake/FindReadline.cmake
CMakeLists.txt CMakeLists.txt
config-cmake.h.in config-cmake.h.in
@ -764,4 +932,4 @@ The distribution should contain the following files:
Philip Hazel Philip Hazel
Email local part: ph10 Email local part: ph10
Email domain: cam.ac.uk Email domain: cam.ac.uk
Last updated: 21 March 2009 Last updated: 27 October 2012

View File

@ -2,67 +2,104 @@
# Run pcregrep tests. The assumption is that the PCRE tests check the library # Run pcregrep tests. The assumption is that the PCRE tests check the library
# itself. What we are checking here is the file handling and options that are # itself. What we are checking here is the file handling and options that are
# supported by pcregrep. # supported by pcregrep. This script must be run in the build directory.
# Set the C locale, so that sort(1) behaves predictably. # Set the C locale, so that sort(1) behaves predictably.
LC_ALL=C LC_ALL=C
export LC_ALL export LC_ALL
pcregrep=`pwd`/pcregrep # Remove any non-default colouring and aliases that the caller may have set.
echo " " unset PCREGREP_COLOUR PCREGREP_COLOR
echo "Testing pcregrep" unset cp ls mv rm
$pcregrep -V
# Remember the current (build) directory, set the program to be tested, and
# valgrind settings when requested.
builddir=`pwd`
pcregrep=$builddir/pcregrep
cf="diff -ub"
valgrind= valgrind=
while [ $# -gt 0 ] ; do while [ $# -gt 0 ] ; do
case $1 in case $1 in
valgrind) valgrind="valgrind -q --leak-check=no";; valgrind) valgrind="valgrind -q --leak-check=no --smc-check=all";;
*) echo "Unknown argument $1"; exit 1;; *) echo "RunGrepTest: Unknown argument $1"; exit 1;;
esac esac
shift shift
done done
# If PCRE has been built in a directory other than the source directory, and echo " "
# this test is being run from "make check" as usual, then $(srcdir) will be pcregrep_version=`$pcregrep -V`
# set. If not, set it to the current directory. We then arrange to run the if [ "$valgrind" = "" ] ; then
# pcregrep command in the source directory so that the file names that appear echo "Testing $pcregrep_version"
# in the output are always the same. else
echo "Testing $pcregrep_version using valgrind"
fi
# Set up a suitable "diff" command for comparison. Some systems have a diff
# that lacks a -u option. Try to deal with this; better do the test for the -b
# option as well.
cf="diff"
diff -b /dev/null /dev/null 2>/dev/null && cf="diff -b"
diff -u /dev/null /dev/null 2>/dev/null && cf="diff -u"
diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
# If this test is being run from "make check", $srcdir will be set. If not, set
# it to the current or parent directory, whichever one contains the test data.
# Subsequently, we run most of the pcregrep tests in the source directory so
# that the file names in the output are always the same.
if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then if [ -z "$srcdir" -o ! -d "$srcdir/testdata" ] ; then
srcdir=. if [ -d "./testdata" ] ; then
srcdir=.
elif [ -d "../testdata" ] ; then
srcdir=..
else
echo "Cannot find the testdata directory"
exit 1
fi
fi fi
# Check for the availability of UTF-8 support # Check for the availability of UTF-8 support
./pcretest -C | ./pcregrep "No UTF-8 support" >/dev/null ./pcretest -C utf >/dev/null
utf8=$? utf8=$?
echo "Testing pcregrep main features"
echo "---------------------------- Test 1 ------------------------------" >testtry echo "---------------------------- Test 1 ------------------------------" >testtry
(cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 2 ------------------------------" >>testtry echo "---------------------------- Test 2 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep '^PATTERN' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 3 ------------------------------" >>testtry echo "---------------------------- Test 3 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 4 ------------------------------" >>testtry echo "---------------------------- Test 4 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -ic PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 5 ------------------------------" >>testtry echo "---------------------------- Test 5 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -in PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 6 ------------------------------" >>testtry echo "---------------------------- Test 6 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -inh PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 7 ------------------------------" >>testtry echo "---------------------------- Test 7 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -il PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 8 ------------------------------" >>testtry echo "---------------------------- Test 8 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -l PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 9 ------------------------------" >>testtry echo "---------------------------- Test 9 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -q PATTERN ./testdata/grepinput ./testdata/grepinputx) >>testtry
@ -74,69 +111,92 @@ echo "RC=$?" >>testtry
echo "---------------------------- Test 11 -----------------------------" >>testtry echo "---------------------------- Test 11 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -vn pattern ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 12 -----------------------------" >>testtry echo "---------------------------- Test 12 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -ix pattern ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 13 -----------------------------" >>testtry echo "---------------------------- Test 13 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist ./testdata/grepinputx) >>testtry echo seventeen >testtemp1
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist -f $builddir/testtemp1 ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 14 -----------------------------" >>testtry echo "---------------------------- Test 14 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -w pat ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 15 -----------------------------" >>testtry echo "---------------------------- Test 15 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry (cd $srcdir; $valgrind $pcregrep 'abc^*' ./testdata/grepinput) 2>>testtry >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 16 -----------------------------" >>testtry echo "---------------------------- Test 16 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry (cd $srcdir; $valgrind $pcregrep abc ./testdata/grepinput ./testdata/nonexistfile) 2>>testtry >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 17 -----------------------------" >>testtry echo "---------------------------- Test 17 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -M 'the\noutput' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 18 -----------------------------" >>testtry echo "---------------------------- Test 18 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -Mn '(the\noutput|dog\.\n--)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 19 -----------------------------" >>testtry echo "---------------------------- Test 19 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -Mix 'Pattern' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 20 -----------------------------" >>testtry echo "---------------------------- Test 20 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -Mixn 'complete pair\nof lines' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 21 -----------------------------" >>testtry echo "---------------------------- Test 21 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -nA3 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 22 -----------------------------" >>testtry echo "---------------------------- Test 22 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -nB3 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 23 -----------------------------" >>testtry echo "---------------------------- Test 23 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -C3 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 24 -----------------------------" >>testtry echo "---------------------------- Test 24 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -A9 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 25 -----------------------------" >>testtry echo "---------------------------- Test 25 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -nB9 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 26 -----------------------------" >>testtry echo "---------------------------- Test 26 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -A9 -B9 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 27 -----------------------------" >>testtry echo "---------------------------- Test 27 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -A10 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 28 -----------------------------" >>testtry echo "---------------------------- Test 28 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -nB10 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 29 -----------------------------" >>testtry echo "---------------------------- Test 29 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -C12 -B10 'four' ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 30 -----------------------------" >>testtry echo "---------------------------- Test 30 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -inB3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 31 -----------------------------" >>testtry echo "---------------------------- Test 31 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -inA3 'pattern' ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 32 -----------------------------" >>testtry echo "---------------------------- Test 32 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -L 'fox' ./testdata/grepinput ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 33 -----------------------------" >>testtry echo "---------------------------- Test 33 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1 (cd $srcdir; $valgrind $pcregrep 'fox' ./testdata/grepnonexist) >>testtry 2>&1
@ -147,11 +207,11 @@ echo "---------------------------- Test 34 -----------------------------" >>test
echo "RC=$?" >>testtry echo "RC=$?" >>testtry
echo "---------------------------- Test 35 -----------------------------" >>testtry echo "---------------------------- Test 35 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --exclude_dir='^\.' 'fox' ./testdata) >>testtry (cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry echo "RC=$?" >>testtry
echo "---------------------------- Test 36 -----------------------------" >>testtry echo "---------------------------- Test 36 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude_dir='^\.' 'fox' ./testdata | sort) >>testtry (cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry echo "RC=$?" >>testtry
echo "---------------------------- Test 37 -----------------------------" >>testtry echo "---------------------------- Test 37 -----------------------------" >>testtry
@ -162,60 +222,270 @@ cat teststderr >>testtry
echo "---------------------------- Test 38 ------------------------------" >>testtry echo "---------------------------- Test 38 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep '>\x00<' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 39 ------------------------------" >>testtry echo "---------------------------- Test 39 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -A1 'before the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 40 ------------------------------" >>testtry echo "---------------------------- Test 40 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -B1 'after the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 41 ------------------------------" >>testtry echo "---------------------------- Test 41 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -B1 -o '\w+ the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 41 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
echo "---------------------------- Test 42 ------------------------------" >>testtry echo "---------------------------- Test 42 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -B1 -onH '\w+ the binary zero' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 43 ------------------------------" >>testtry echo "---------------------------- Test 43 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -on -e before -e zero -e after ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -on 'before|zero|after' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 44 ------------------------------" >>testtry echo "---------------------------- Test 44 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep -on -e before -ezero -e after ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 45 ------------------------------" >>testtry echo "---------------------------- Test 45 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -e abc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry (cd $srcdir; $valgrind $pcregrep -on -f ./testdata/greplist -e binary ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 46 ------------------------------" >>testtry echo "---------------------------- Test 46 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Fx "AB.VE (cd $srcdir; $valgrind $pcregrep -eabc -e '(unclosed' ./testdata/grepinput) 2>>testtry >>testtry
elephant" ./testdata/grepinput) >>testtry echo "RC=$?" >>testtry
echo "---------------------------- Test 47 ------------------------------" >>testtry echo "---------------------------- Test 47 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -F "AB.VE (cd $srcdir; $valgrind $pcregrep -Fx "AB.VE
elephant" ./testdata/grepinput) >>testtry elephant" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 48 ------------------------------" >>testtry echo "---------------------------- Test 48 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE (cd $srcdir; $valgrind $pcregrep -F "AB.VE
elephant" ./testdata/grepinput) >>testtry elephant" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 49 ------------------------------" >>testtry echo "---------------------------- Test 49 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -F -e DATA -e "AB.VE
elephant" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 50 ------------------------------" >>testtry echo "---------------------------- Test 50 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry (cd $srcdir; $valgrind $pcregrep "^(abc|def|ghi|jkl)" ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 51 ------------------------------" >>testtry echo "---------------------------- Test 51 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry (cd $srcdir; $valgrind $pcregrep -Mv "brown\sfox" ./testdata/grepinputv) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 52 ------------------------------" >>testtry echo "---------------------------- Test 52 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep --colour=always jumps ./testdata/grepinputv) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 53 ------------------------------" >>testtry echo "---------------------------- Test 53 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry (cd $srcdir; $valgrind $pcregrep --file-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 54 -----------------------------" >>testtry echo "---------------------------- Test 54 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --line-offsets 'before|zero|after' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 55 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry (cd $srcdir; $valgrind $pcregrep -f./testdata/greplist --color=always ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 56 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -c lazy ./testdata/grepinput*) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 57 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -c -l lazy ./testdata/grepinput*) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 58 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regex=PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 59 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regexp=PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 60 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regex PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 61 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --regexp PATTERN ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 62 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --match-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 63 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 64 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o1 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 65 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 66 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o3 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 67 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o12 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 68 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --only-matching=2 '(?<=PAT)TERN (ap(pear)s)' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 69 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -vn --colour=always pattern ./testdata/grepinputx) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 70 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 71 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 72 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 73 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 74 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "^01|02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 75 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "^01|02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 76 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|02|^03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 77 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "^01|^02|03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 78 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "^01|^02|03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 79 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "^01|^02|03" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 80 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o "\b01|\b02" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 81 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --color=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 82 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o --colour=always "\\b01|\\b02" ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 83 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --buffer-size=100 "^a" ./testdata/grepinput3) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 84 -----------------------------" >>testtry
echo testdata/grepinput3 >testtemp1
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --file-list $builddir/testtemp1 "fox|complete|t7") >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 85 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 86 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 87 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep "cat" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 88 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -v "cat" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 89 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -I "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 90 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --binary-files=without-match "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 91 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -a "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 92 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --binary-files=text "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 93 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --text "dog" ./testdata/grepbinary) >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 94 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinputx --include grepinput8 'fox' ./testdata/grepinput* | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 95 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete") >>testtry 2>&1
echo "RC=$?" >>testtry
echo "---------------------------- Test 96 -----------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 97 -----------------------------" >>testtry
echo "grepinput$" >testtemp1
echo "grepinput8" >>testtemp1
(cd $srcdir; $valgrind $pcregrep -L -r --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 98 -----------------------------" >>testtry
echo "grepinput$" >testtemp1
echo "grepinput8" >>testtemp1
(cd $srcdir; $valgrind $pcregrep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 99 -----------------------------" >>testtry
echo "grepinput$" >testtemp1
echo "grepinput8" >testtemp2
(cd $srcdir; $valgrind $pcregrep -L -r --include grepinput --exclude-from $builddir/testtemp1 --exclude-from=$builddir/testtemp2 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 100 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -Ho2 --only-matching=1 -o3 '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test 101 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator='|' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtry
echo "RC=$?" >>testtry
# Now compare the results. # Now compare the results.
@ -230,9 +500,11 @@ if [ $utf8 -ne 0 ] ; then
echo "---------------------------- Test U1 ------------------------------" >testtry echo "---------------------------- Test U1 ------------------------------" >testtry
(cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry (cd $srcdir; $valgrind $pcregrep -n -u --newline=any "^X" ./testdata/grepinput8) >>testtry
echo "RC=$?" >>testtry
echo "---------------------------- Test U2 ------------------------------" >>testtry echo "---------------------------- Test U2 ------------------------------" >>testtry
(cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry (cd $srcdir; $valgrind $pcregrep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtry
echo "RC=$?" >>testtry
$cf $srcdir/testdata/grepoutput8 testtry $cf $srcdir/testdata/grepoutput8 testtry
if [ $? != 0 ] ; then exit 1; fi if [ $? != 0 ] ; then exit 1; fi
@ -247,7 +519,7 @@ fi
# is not \n. Do not use exported files, whose line endings might be changed. # is not \n. Do not use exported files, whose line endings might be changed.
# Instead, create an input file using printf so that its contents are exactly # Instead, create an input file using printf so that its contents are exactly
# what we want. Note the messy fudge to get printf to write a string that # what we want. Note the messy fudge to get printf to write a string that
# starts with a hyphen. # starts with a hyphen. These tests are run in the build directory.
echo "Testing pcregrep newline settings" echo "Testing pcregrep newline settings"
printf "abc\rdef\r\nghi\njkl" >testNinput printf "abc\rdef\r\nghi\njkl" >testNinput
@ -263,8 +535,7 @@ pattern=`printf 'def\rjkl'`
$valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry $valgrind $pcregrep -n --newline=cr -F "$pattern" testNinput >>testtry
printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry printf "%c--------------------------- Test N4 ------------------------------\r\n" - >>testtry
pattern=`printf 'xxx\r\njkl'` $valgrind $pcregrep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 testNinput >>testtry
$valgrind $pcregrep -n --newline=crlf -F "$pattern" testNinput >>testtry
printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry printf "%c--------------------------- Test N5 ------------------------------\r\n" - >>testtry
$valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry $valgrind $pcregrep -n --newline=any "^(abc|def|ghi|jkl)" testNinput >>testtry

File diff suppressed because it is too large Load Diff

View File

@ -1,39 +1,616 @@
@rem This file was contributed by Ralf Junker, and touched up by @echo off
@rem Daniel Richard G. Test 10 added by Philip H. @rem This file must use CRLF linebreaks to function properly
@rem Philip H also changed test 3 to use "wintest" files. @rem and requires both pcretest and pcregrep
@rem @rem This file was originally contributed by Ralf Junker, and touched up by
@rem MS Windows batch file to run pcretest on testfiles with the correct @rem Daniel Richard G. Tests 10-12 added by Philip H.
@rem options. @rem Philip H also changed test 3 to use "wintest" files.
@rem @rem
@rem Output is written to a newly created subfolder named "testdata". @rem Updated by Tom Fortmann to support explicit test numbers on the command line.
@rem Added argument validation and added error reporting.
setlocal @rem
@rem MS Windows batch file to run pcretest on testfiles with the correct
if [%srcdir%]==[] set srcdir=. @rem options.
if [%pcretest%]==[] set pcretest=pcretest @rem
@rem Sheri Pierce added logic to skip feature dependent tests
if not exist testout md testout @rem tests 4 5 9 15 and 18 require utf support
@rem tests 6 7 10 16 and 19 require ucp support
%pcretest% -q %srcdir%\testdata\testinput1 > testout\testoutput1 @rem 11 requires ucp and link size 2
%pcretest% -q %srcdir%\testdata\testinput2 > testout\testoutput2 @rem 12 requires presense of jit support
@rem %pcretest% -q %srcdir%\testdata\testinput3 > testout\testoutput3 @rem 13 requires absence of jit support
%pcretest% -q %srcdir%\testdata\wintestinput3 > testout\wintestoutput3 @rem Sheri P also added override tests for study and jit testing
%pcretest% -q %srcdir%\testdata\testinput4 > testout\testoutput4 @rem Zoltan Herczeg added libpcre16 support
%pcretest% -q %srcdir%\testdata\testinput5 > testout\testoutput5 @rem Zoltan Herczeg added libpcre32 support
%pcretest% -q %srcdir%\testdata\testinput6 > testout\testoutput6
%pcretest% -q -dfa %srcdir%\testdata\testinput7 > testout\testoutput7 setlocal enabledelayedexpansion
%pcretest% -q -dfa %srcdir%\testdata\testinput8 > testout\testoutput8 if [%srcdir%]==[] (
%pcretest% -q -dfa %srcdir%\testdata\testinput9 > testout\testoutput9 if exist testdata\ set srcdir=.)
%pcretest% -q %srcdir%\testdata\testinput10 > testout\testoutput10 if [%srcdir%]==[] (
if exist ..\testdata\ set srcdir=..)
fc /n %srcdir%\testdata\testoutput1 testout\testoutput1 if [%srcdir%]==[] (
fc /n %srcdir%\testdata\testoutput2 testout\testoutput2 if exist ..\..\testdata\ set srcdir=..\..)
rem fc /n %srcdir%\testdata\testoutput3 testout\testoutput3 if NOT exist %srcdir%\testdata\ (
fc /n %srcdir%\testdata\wintestoutput3 testout\wintestoutput3 Error: echo distribution testdata folder not found!
fc /n %srcdir%\testdata\testoutput4 testout\testoutput4 call :conferror
fc /n %srcdir%\testdata\testoutput5 testout\testoutput5 exit /b 1
fc /n %srcdir%\testdata\testoutput6 testout\testoutput6 goto :eof
fc /n %srcdir%\testdata\testoutput7 testout\testoutput7 )
fc /n %srcdir%\testdata\testoutput8 testout\testoutput8
fc /n %srcdir%\testdata\testoutput9 testout\testoutput9 if [%pcretest%]==[] set pcretest=.\pcretest.exe
fc /n %srcdir%\testdata\testoutput10 testout\testoutput10
echo source dir is %srcdir%
echo pcretest=%pcretest%
if NOT exist %pcretest% (
echo Error: %pcretest% not found!
echo.
call :conferror
exit /b 1
)
%pcretest% -C linksize >NUL
set link_size=%ERRORLEVEL%
%pcretest% -C pcre8 >NUL
set support8=%ERRORLEVEL%
%pcretest% -C pcre16 >NUL
set support16=%ERRORLEVEL%
%pcretest% -C pcre32 >NUL
set support32=%ERRORLEVEL%
%pcretest% -C utf >NUL
set utf=%ERRORLEVEL%
%pcretest% -C ucp >NUL
set ucp=%ERRORLEVEL%
%pcretest% -C jit >NUL
set jit=%ERRORLEVEL%
if %support8% EQU 1 (
if not exist testout8 md testout8
if not exist testoutstudy8 md testoutstudy8
if not exist testoutjit8 md testoutjit8
)
if %support16% EQU 1 (
if not exist testout16 md testout16
if not exist testoutstudy16 md testoutstudy16
if not exist testoutjit16 md testoutjit16
)
if %support16% EQU 1 (
if not exist testout32 md testout32
if not exist testoutstudy32 md testoutstudy32
if not exist testoutjit32 md testoutjit32
)
set do1=no
set do2=no
set do3=no
set do4=no
set do5=no
set do6=no
set do7=no
set do8=no
set do9=no
set do10=no
set do11=no
set do12=no
set do13=no
set do14=no
set do15=no
set do16=no
set do17=no
set do18=no
set do19=no
set do20=no
set do21=no
set do22=no
set do23=no
set do24=no
set do25=no
set do26=no
set all=yes
for %%a in (%*) do (
set valid=no
for %%v in (1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26) do if %%v == %%a set valid=yes
if "!valid!" == "yes" (
set do%%a=yes
set all=no
) else (
echo Invalid test number - %%a!
echo Usage %0 [ test_number ] ...
echo Where test_number is one or more optional test numbers 1 through 26, default is all tests.
exit /b 1
)
)
set failed="no"
if "%all%" == "yes" (
set do1=yes
set do2=yes
set do3=yes
set do4=yes
set do5=yes
set do6=yes
set do7=yes
set do8=yes
set do9=yes
set do10=yes
set do11=yes
set do12=yes
set do13=yes
set do14=yes
set do15=yes
set do16=yes
set do17=yes
set do18=yes
set do19=yes
set do20=yes
set do21=yes
set do22=yes
set do23=yes
set do24=yes
set do25=yes
set do26=yes
)
@echo RunTest.bat's pcretest output is written to newly created subfolders named
@echo testout, testoutstudy and testoutjit.
@echo.
set mode=
set bits=8
:nextMode
if "%mode%" == "" (
if %support8% EQU 0 goto modeSkip
echo.
echo ---- Testing 8-bit library ----
echo.
)
if "%mode%" == "-16" (
if %support16% EQU 0 goto modeSkip
echo.
echo ---- Testing 16-bit library ----
echo.
)
if "%mode%" == "-32" (
if %support32% EQU 0 goto modeSkip
echo.
echo ---- Testing 32-bit library ----
echo.
)
if "%do1%" == "yes" call :do1
if "%do2%" == "yes" call :do2
if "%do3%" == "yes" call :do3
if "%do4%" == "yes" call :do4
if "%do5%" == "yes" call :do5
if "%do6%" == "yes" call :do6
if "%do7%" == "yes" call :do7
if "%do8%" == "yes" call :do8
if "%do9%" == "yes" call :do9
if "%do10%" == "yes" call :do10
if "%do11%" == "yes" call :do11
if "%do12%" == "yes" call :do12
if "%do13%" == "yes" call :do13
if "%do14%" == "yes" call :do14
if "%do15%" == "yes" call :do15
if "%do16%" == "yes" call :do16
if "%do17%" == "yes" call :do17
if "%do18%" == "yes" call :do18
if "%do19%" == "yes" call :do19
if "%do20%" == "yes" call :do20
if "%do21%" == "yes" call :do21
if "%do22%" == "yes" call :do22
if "%do23%" == "yes" call :do23
if "%do24%" == "yes" call :do24
if "%do25%" == "yes" call :do25
if "%do26%" == "yes" call :do26
:modeSkip
if "%mode%" == "" (
set mode=-16
set bits=16
goto nextMode
)
if "%mode%" == "-16" (
set mode=-32
set bits=32
goto nextMode
)
@rem If mode is -32, testing is finished
if %failed% == "yes" (
echo In above output, one or more of the various tests failed!
exit /b 1
)
echo All OK
goto :eof
:runsub
@rem Function to execute pcretest and compare the output
@rem Arguments are as follows:
@rem
@rem 1 = test number
@rem 2 = outputdir
@rem 3 = test name use double quotes
@rem 4 - 9 = pcretest options
if [%1] == [] (
echo Missing test number argument!
exit /b 1
)
if [%2] == [] (
echo Missing outputdir!
exit /b 1
)
if [%3] == [] (
echo Missing test name argument!
exit /b 1
)
set testinput=testinput%1
set testoutput=testoutput%1
if exist %srcdir%\testdata\win%testinput% (
set testinput=wintestinput%1
set testoutput=wintestoutput%1
)
echo Test %1: %3
%pcretest% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% >%2%bits%\%testoutput%
if errorlevel 1 (
echo. failed executing command-line:
echo. %pcretest% %mode% %4 %5 %6 %7 %8 %9 %srcdir%\testdata\%testinput% ^>%2%bits%\%testoutput%
set failed="yes"
goto :eof
)
set type=
if [%1]==[11] (
set type=-%bits%
)
if [%1]==[18] (
set type=-%bits%
)
if [%1]==[21] (
set type=-%bits%
)
if [%1]==[22] (
set type=-%bits%
)
fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput% >NUL
if errorlevel 1 (
echo. failed comparison: fc /n %srcdir%\testdata\%testoutput% %2%bits%\%testoutput%
if [%1]==[2] (
echo.
echo ** Test 2 requires a lot of stack. PCRE can be configured to
echo ** use heap for recursion. Otherwise, to pass Test 2
echo ** you generally need to allocate 8 mb stack to PCRE.
echo ** See the 'pcrestack' page for a discussion of PCRE's
echo ** stack usage.
echo.
)
if [%1]==[3] (
echo.
echo ** Test 3 failure usually means french locale is not
echo ** available on the system, rather than a bug or problem with PCRE.
echo.
goto :eof
)
set failed="yes"
goto :eof
)
echo. Passed.
goto :eof
:do1
call :runsub 1 testout "Main functionality (Compatible with Perl >= 5.10)" -q
call :runsub 1 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 1 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do2
call :runsub 2 testout "API, errors, internals, and non-Perl stuff" -q
call :runsub 2 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 2 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do3
call :runsub 3 testout "Locale-specific features" -q
call :runsub 3 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 3 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do4
if %utf% EQU 0 (
echo Test 4 Skipped due to absence of UTF-%bits% support.
goto :eof
)
call :runsub 4 testout "UTF-%bits% support - (Compatible with Perl >= 5.10)" -q
call :runsub 4 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 4 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do5
if %utf% EQU 0 (
echo Test 5 Skipped due to absence of UTF-%bits% support.
goto :eof
)
call :runsub 5 testout "API, internals, and non-Perl stuff for UTF-%bits%" -q
call :runsub 5 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 5 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do6
if %ucp% EQU 0 (
echo Test 6 Skipped due to absence of Unicode property support.
goto :eof
)
call :runsub 6 testout "Unicode property support (Compatible with Perl >= 5.10)" -q
call :runsub 6 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 6 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do7
if %ucp% EQU 0 (
echo Test 7 Skipped due to absence of Unicode property support.
goto :eof
)
call :runsub 7 testout "API, internals, and non-Perl stuff for Unicode property support" -q
call :runsub 7 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 7 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do8
call :runsub 8 testout "DFA matching main functionality" -q -dfa
call :runsub 8 testoutstudy "Test with Study Override" -q -dfa -s
goto :eof
:do9
if %utf% EQU 0 (
echo Test 9 Skipped due to absence of UTF-%bits% support.
goto :eof
)
call :runsub 9 testout "DFA matching with UTF-%bits%" -q -dfa
call :runsub 9 testoutstudy "Test with Study Override" -q -dfa -s
goto :eof
:do10
if %ucp% EQU 0 (
echo Test 10 Skipped due to absence of Unicode property support.
goto :eof
)
call :runsub 10 testout "DFA matching with Unicode properties" -q -dfa
call :runsub 10 testoutstudy "Test with Study Override" -q -dfa -s
goto :eof
:do11
if NOT %link_size% EQU 2 (
echo Test 11 Skipped because link size is not 2.
goto :eof
)
if %ucp% EQU 0 (
echo Test 11 Skipped due to absence of Unicode property support.
goto :eof
)
call :runsub 11 testout "Internal offsets and code size tests" -q
call :runsub 11 testoutstudy "Test with Study Override" -q -s
goto :eof
:do12
if %jit% EQU 0 (
echo Test 12 Skipped due to absence of JIT support.
goto :eof
)
call :runsub 12 testout "JIT-specific features (JIT available)" -q
goto :eof
:do13
if %jit% EQU 1 (
echo Test 13 Skipped due to presence of JIT support.
goto :eof
)
call :runsub 13 testout "JIT-specific features (JIT not available)" -q
goto :eof
:do14
if NOT %bits% EQU 8 (
echo Test 14 Skipped when running 16/32-bit tests.
goto :eof
)
copy /Y %srcdir%\testdata\saved16 testsaved16
copy /Y %srcdir%\testdata\saved32 testsaved32
call :runsub 14 testout "Specials for the basic 8-bit library" -q
call :runsub 14 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 14 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do15
if NOT %bits% EQU 8 (
echo Test 15 Skipped when running 16/32-bit tests.
goto :eof
)
if %utf% EQU 0 (
echo Test 15 Skipped due to absence of UTF-%bits% support.
goto :eof
)
call :runsub 15 testout "Specials for the 8-bit library with UTF-%bits% support" -q
call :runsub 15 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 15 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do16
if NOT %bits% EQU 8 (
echo Test 16 Skipped when running 16/32-bit tests.
goto :eof
)
if %ucp% EQU 0 (
echo Test 16 Skipped due to absence of Unicode property support.
goto :eof
)
call :runsub 16 testout "Specials for the 8-bit library with Unicode propery support" -q
call :runsub 16 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 16 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do17
if %bits% EQU 8 (
echo Test 17 Skipped when running 8-bit tests.
goto :eof
)
call :runsub 17 testout "Specials for the basic 16/32-bit library" -q
call :runsub 17 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 17 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do18
if %bits% EQU 8 (
echo Test 18 Skipped when running 8-bit tests.
goto :eof
)
if %utf% EQU 0 (
echo Test 18 Skipped due to absence of UTF-%bits% support.
goto :eof
)
call :runsub 18 testout "Specials for the 16/32-bit library with UTF-%bits% support" -q
call :runsub 18 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 18 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do19
if %bits% EQU 8 (
echo Test 19 Skipped when running 8-bit tests.
goto :eof
)
if %ucp% EQU 0 (
echo Test 19 Skipped due to absence of Unicode property support.
goto :eof
)
call :runsub 19 testout "Specials for the 16/32-bit library with Unicode property support" -q
call :runsub 19 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 19 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do20
if %bits% EQU 8 (
echo Test 20 Skipped when running 8-bit tests.
goto :eof
)
call :runsub 20 testout "DFA specials for the basic 16/32-bit library" -q -dfa
call :runsub 20 testoutstudy "Test with Study Override" -q -dfa -s
goto :eof
:do21
if %bits% EQU 8 (
echo Test 21 Skipped when running 8-bit tests.
goto :eof
)
if NOT %link_size% EQU 2 (
echo Test 21 Skipped because link size is not 2.
goto :eof
)
copy /Y %srcdir%\testdata\saved8 testsaved8
copy /Y %srcdir%\testdata\saved16LE-1 testsaved16LE-1
copy /Y %srcdir%\testdata\saved16BE-1 testsaved16BE-1
copy /Y %srcdir%\testdata\saved32LE-1 testsaved32LE-1
copy /Y %srcdir%\testdata\saved32BE-1 testsaved32BE-1
call :runsub 21 testout "Reloads for the basic 16/32-bit library" -q
call :runsub 21 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 21 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do22
if %bits% EQU 8 (
echo Test 22 Skipped when running 8-bit tests.
goto :eof
)
if %utf% EQU 0 (
echo Test 22 Skipped due to absence of UTF-%bits% support.
goto :eof
)
if NOT %link_size% EQU 2 (
echo Test 22 Skipped because link size is not 2.
goto :eof
)
copy /Y %srcdir%\testdata\saved16LE-2 testsaved16LE-2
copy /Y %srcdir%\testdata\saved16BE-2 testsaved16BE-2
copy /Y %srcdir%\testdata\saved32LE-2 testsaved32LE-2
copy /Y %srcdir%\testdata\saved32BE-2 testsaved32BE-2
call :runsub 22 testout "Reloads for the 16/32-bit library with UTF-16/32 support" -q
call :runsub 22 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 22 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do23
if NOT %bits% EQU 16 (
echo Test 23 Skipped when running 8/32-bit tests.
goto :eof
)
call :runsub 23 testout "Specials for the 16-bit library" -q
call :runsub 23 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 23 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do24
if NOT %bits% EQU 16 (
echo Test 24 Skipped when running 8/32-bit tests.
goto :eof
)
if %utf% EQU 0 (
echo Test 24 Skipped due to absence of UTF-%bits% support.
goto :eof
)
call :runsub 24 testout "Specials for the 16-bit library with UTF-16 support" -q
call :runsub 24 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 24 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do25
if NOT %bits% EQU 32 (
echo Test 25 Skipped when running 8/16-bit tests.
goto :eof
)
call :runsub 25 testout "Specials for the 32-bit library" -q
call :runsub 25 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 25 testoutjit "Test with JIT Override" -q -s+
goto :eof
:do26
if NOT %bits% EQU 32 (
echo Test 26 Skipped when running 8/16-bit tests.
goto :eof
)
if %utf% EQU 0 (
echo Test 26 Skipped due to absence of UTF-%bits% support.
goto :eof
)
call :runsub 26 testout "Specials for the 32-bit library with UTF-32 support" -q
call :runsub 26 testoutstudy "Test with Study Override" -q -s
if %jit% EQU 1 call :runsub 26 testoutjit "Test with JIT Override" -q -s+
goto :eof
:conferror
@echo.
@echo Either your build is incomplete or you have a configuration error.
@echo.
@echo If configured with cmake and executed via "make test" or the MSVC "RUN_TESTS"
@echo project, pcre_test.bat defines variables and automatically calls RunTest.bat.
@echo For manual testing of all available features, after configuring with cmake
@echo and building, you can run the built pcre_test.bat. For best results with
@echo cmake builds and tests avoid directories with full path names that include
@echo spaces for source or build.
@echo.
@echo Otherwise, if the build dir is in a subdir of the source dir, testdata needed
@echo for input and verification should be found automatically when (from the
@echo location of the the built exes) you call RunTest.bat. By default RunTest.bat
@echo runs all tests compatible with the linked pcre library but it can be given
@echo a test number as an argument.
@echo.
@echo If the build dir is not under the source dir you can either copy your exes
@echo to the source folder or copy RunTest.bat and the testdata folder to the
@echo location of your built exes and then run RunTest.bat.
@echo.
goto :eof

7245
tools/pcre/aclocal.m4 vendored

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,17 @@
# Modified from FindReadline.cmake (PH Feb 2012)
if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
set(EDITLINE_FOUND TRUE)
else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
/usr/include/editline
/usr/include/edit/readline
/usr/include/readline
)
FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)

343
tools/pcre/compile Normal file
View File

@ -0,0 +1,343 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2012-03-05.13; # UTC
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009, 2010, 2012 Free
# Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
nl='
'
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent tools from complaining about whitespace usage.
IFS=" "" $nl"
file_conv=
# func_file_conv build_file lazy
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts. If the determined conversion
# type is listed in (the comma separated) LAZY, no conversion will
# take place.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv/,$2, in
*,$file_conv,*)
;;
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_cl_dashL linkdir
# Make cl look for libraries in LINKDIR
func_cl_dashL ()
{
func_file_conv "$1"
if test -z "$lib_path"; then
lib_path=$file
else
lib_path="$lib_path;$file"
fi
linker_opts="$linker_opts -LIBPATH:$file"
}
# func_cl_dashl library
# Do a library search-path lookup for cl
func_cl_dashl ()
{
lib=$1
found=no
save_IFS=$IFS
IFS=';'
for dir in $lib_path $LIB
do
IFS=$save_IFS
if $shared && test -f "$dir/$lib.dll.lib"; then
found=yes
lib=$dir/$lib.dll.lib
break
fi
if test -f "$dir/$lib.lib"; then
found=yes
lib=$dir/$lib.lib
break
fi
done
IFS=$save_IFS
if test "$found" != yes; then
lib=$lib.lib
fi
}
# func_cl_wrapper cl arg...
# Adjust compile command to suit cl
func_cl_wrapper ()
{
# Assume a capable shell
lib_path=
shared=:
linker_opts=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
eat=1
case $2 in
*.o | *.[oO][bB][jJ])
func_file_conv "$2"
set x "$@" -Fo"$file"
shift
;;
*)
func_file_conv "$2"
set x "$@" -Fe"$file"
shift
;;
esac
;;
-I)
eat=1
func_file_conv "$2" mingw
set x "$@" -I"$file"
shift
;;
-I*)
func_file_conv "${1#-I}" mingw
set x "$@" -I"$file"
shift
;;
-l)
eat=1
func_cl_dashl "$2"
set x "$@" "$lib"
shift
;;
-l*)
func_cl_dashl "${1#-l}"
set x "$@" "$lib"
shift
;;
-L)
eat=1
func_cl_dashL "$2"
;;
-L*)
func_cl_dashL "${1#-L}"
;;
-static)
shared=false
;;
-Wl,*)
arg=${1#-Wl,}
save_ifs="$IFS"; IFS=','
for flag in $arg; do
IFS="$save_ifs"
linker_opts="$linker_opts $flag"
done
IFS="$save_ifs"
;;
-Xlinker)
eat=1
linker_opts="$linker_opts $2"
;;
-*)
set x "$@" "$1"
shift
;;
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
func_file_conv "$1"
set x "$@" -Tp"$file"
shift
;;
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
func_file_conv "$1" mingw
set x "$@" "$file"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -n "$linker_opts"; then
linker_opts="-link$linker_opts"
fi
exec "$@" $linker_opts
exit 1
}
eat=
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand '-c -o'.
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file 'INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
func_cl_wrapper "$@" # Doesn't return...
;;
esac
ofile=
cfile=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
# So we strip '-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no '-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# '.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

View File

@ -18,9 +18,15 @@
#cmakedefine PCRE_STATIC 1 #cmakedefine PCRE_STATIC 1
#cmakedefine SUPPORT_UTF8 1 #cmakedefine SUPPORT_PCRE8 1
#cmakedefine SUPPORT_PCRE16 1
#cmakedefine SUPPORT_PCRE32 1
#cmakedefine SUPPORT_JIT 1
#cmakedefine SUPPORT_PCREGREP_JIT 1
#cmakedefine SUPPORT_UTF 1
#cmakedefine SUPPORT_UCP 1 #cmakedefine SUPPORT_UCP 1
#cmakedefine EBCDIC 1 #cmakedefine EBCDIC 1
#cmakedefine EBCDIC_NL25 1
#cmakedefine BSR_ANYCRLF 1 #cmakedefine BSR_ANYCRLF 1
#cmakedefine NO_RECURSE 1 #cmakedefine NO_RECURSE 1
@ -29,14 +35,18 @@
#cmakedefine SUPPORT_LIBBZ2 1 #cmakedefine SUPPORT_LIBBZ2 1
#cmakedefine SUPPORT_LIBZ 1 #cmakedefine SUPPORT_LIBZ 1
#cmakedefine SUPPORT_LIBEDIT 1
#cmakedefine SUPPORT_LIBREADLINE 1 #cmakedefine SUPPORT_LIBREADLINE 1
#cmakedefine SUPPORT_VALGRIND 1
#cmakedefine SUPPORT_GCOV 1
#define NEWLINE @NEWLINE@ #define NEWLINE @NEWLINE@
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@ #define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
#define LINK_SIZE @PCRE_LINK_SIZE@ #define LINK_SIZE @PCRE_LINK_SIZE@
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@ #define MATCH_LIMIT @PCRE_MATCH_LIMIT@
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@ #define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
#define MAX_NAME_SIZE 32 #define MAX_NAME_SIZE 32
#define MAX_NAME_COUNT 10000 #define MAX_NAME_COUNT 10000

View File

@ -1,10 +1,10 @@
#! /bin/sh #! /bin/sh
# Attempt to guess a canonical system name. # Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# Free Software Foundation, Inc. # 2011, 2012 Free Software Foundation, Inc.
timestamp='2008-09-28' timestamp='2012-08-14'
# This file is free software; you can redistribute it and/or modify it # This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by # under the terms of the GNU General Public License as published by
@ -17,9 +17,7 @@ timestamp='2008-09-28'
# General Public License for more details. # General Public License for more details.
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software # along with this program; if not, see <http://www.gnu.org/licenses/>.
# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
# 02110-1301, USA.
# #
# As a special exception to the GNU General Public License, if you # As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a # distribute this file as part of a program that contains a
@ -27,16 +25,16 @@ timestamp='2008-09-28'
# the same distribution terms that you use for the rest of that program. # the same distribution terms that you use for the rest of that program.
# Originally written by Per Bothner <per@bothner.com>. # Originally written by Per Bothner. Please send patches (context
# Please send patches to <config-patches@gnu.org>. Submit a context # diff format) to <config-patches@gnu.org> and include a ChangeLog
# diff and a properly formatted ChangeLog entry. # entry.
# #
# This script attempts to guess a canonical system name similar to # This script attempts to guess a canonical system name similar to
# config.sub. If it succeeds, it prints the system name on stdout, and # config.sub. If it succeeds, it prints the system name on stdout, and
# exits with 0. Otherwise, it exits with 1. # exits with 0. Otherwise, it exits with 1.
# #
# The plan is that this can be called by configure scripts if you # You can get the latest version of this script from:
# don't specify an explicit build system type. # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
me=`echo "$0" | sed -e 's,.*/,,'` me=`echo "$0" | sed -e 's,.*/,,'`
@ -56,8 +54,9 @@ version="\
GNU config.guess ($timestamp) GNU config.guess ($timestamp)
Originally written by Per Bothner. Originally written by Per Bothner.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@ -139,9 +138,10 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
if [ "${UNAME_SYSTEM}" = "Linux" ] ; then case "${UNAME_SYSTEM}" in
Linux|GNU/*)
eval $set_cc_for_build eval $set_cc_for_build
cat << EOF > $dummy.c cat <<-EOF > $dummy.c
#include <features.h> #include <features.h>
#ifdef __UCLIBC__ #ifdef __UCLIBC__
# ifdef __UCLIBC_CONFIG_VERSION__ # ifdef __UCLIBC_CONFIG_VERSION__
@ -150,18 +150,23 @@ if [ "${UNAME_SYSTEM}" = "Linux" ] ; then
LIBC=uclibc LIBC=uclibc
# endif # endif
#else #else
# ifdef __dietlibc__
LIBC=dietlibc
# else
LIBC=gnu LIBC=gnu
# endif
#endif #endif
EOF EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep LIBC= | sed -e 's: ::g'` eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
fi ;;
esac
# Note: order is significant - the case branches are not exclusive. # Note: order is significant - the case branches are not exclusive.
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*:NetBSD:*:*) *:NetBSD:*:*)
# NetBSD (nbsd) targets should (where applicable) match one or # NetBSD (nbsd) targets should (where applicable) match one or
# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
# *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
# switched to ELF, *-*-netbsd* would select the old # switched to ELF, *-*-netbsd* would select the old
# object file format. This provides both forward # object file format. This provides both forward
@ -187,7 +192,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
arm*|i386|m68k|ns32k|sh3*|sparc|vax) arm*|i386|m68k|ns32k|sh3*|sparc|vax)
eval $set_cc_for_build eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep __ELF__ >/dev/null | grep -q __ELF__
then then
# Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
# Return netbsd for either. FIX? # Return netbsd for either. FIX?
@ -197,7 +202,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
fi fi
;; ;;
*) *)
os=netbsd os=netbsd
;; ;;
esac esac
# The OS release # The OS release
@ -218,6 +223,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
echo "${machine}-${os}${release}" echo "${machine}-${os}${release}"
exit ;; exit ;;
*:Bitrig:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
exit ;;
*:OpenBSD:*:*) *:OpenBSD:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
@ -240,7 +249,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
;; ;;
*5.*) *5.*)
UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
;; ;;
esac esac
# According to Compaq, /usr/sbin/psrinfo has been available on # According to Compaq, /usr/sbin/psrinfo has been available on
@ -286,7 +295,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# A Xn.n version is an unreleased experimental baselevel. # A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r. # 1.2 uses "1.2" for uname -r.
echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
exit ;; # Reset EXIT trap before exiting to avoid spurious non-zero exit code.
exitcode=$?
trap '' 0
exit $exitcode ;;
Alpha\ *:Windows_NT*:*) Alpha\ *:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem? # How do we know it's Interix rather than the generic POSIX subsystem?
# Should we change UNAME_MACHINE based on the output of uname instead # Should we change UNAME_MACHINE based on the output of uname instead
@ -312,7 +324,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
echo s390-ibm-zvmoe echo s390-ibm-zvmoe
exit ;; exit ;;
*:OS400:*:*) *:OS400:*:*)
echo powerpc-ibm-os400 echo powerpc-ibm-os400
exit ;; exit ;;
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
echo arm-acorn-riscix${UNAME_RELEASE} echo arm-acorn-riscix${UNAME_RELEASE}
@ -341,14 +353,33 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
case `/usr/bin/uname -p` in case `/usr/bin/uname -p` in
sparc) echo sparc-icl-nx7; exit ;; sparc) echo sparc-icl-nx7; exit ;;
esac ;; esac ;;
s390x:SunOS:*:*)
echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;;
sun4H:SunOS:5.*:*) sun4H:SunOS:5.*:*)
echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;; exit ;;
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;; exit ;;
i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
echo i386-pc-auroraux${UNAME_RELEASE}
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` eval $set_cc_for_build
SUN_ARCH="i386"
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
# This test works for both compilers.
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
SUN_ARCH="x86_64"
fi
fi
echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
exit ;; exit ;;
sun4*:SunOS:6*:*) sun4*:SunOS:6*:*)
# According to config.sub, this is the proper way to canonicalize # According to config.sub, this is the proper way to canonicalize
@ -392,23 +423,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# MiNT. But MiNT is downward compatible to TOS, so this should # MiNT. But MiNT is downward compatible to TOS, so this should
# be no problem. # be no problem.
atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
echo m68k-atari-mint${UNAME_RELEASE} echo m68k-atari-mint${UNAME_RELEASE}
exit ;; exit ;;
atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
echo m68k-atari-mint${UNAME_RELEASE} echo m68k-atari-mint${UNAME_RELEASE}
exit ;; exit ;;
*falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
echo m68k-atari-mint${UNAME_RELEASE} echo m68k-atari-mint${UNAME_RELEASE}
exit ;; exit ;;
milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
echo m68k-milan-mint${UNAME_RELEASE} echo m68k-milan-mint${UNAME_RELEASE}
exit ;; exit ;;
hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
echo m68k-hades-mint${UNAME_RELEASE} echo m68k-hades-mint${UNAME_RELEASE}
exit ;; exit ;;
*:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
echo m68k-unknown-mint${UNAME_RELEASE} echo m68k-unknown-mint${UNAME_RELEASE}
exit ;; exit ;;
m68k:machten:*:*) m68k:machten:*:*)
echo m68k-apple-machten${UNAME_RELEASE} echo m68k-apple-machten${UNAME_RELEASE}
exit ;; exit ;;
@ -478,8 +509,8 @@ EOF
echo m88k-motorola-sysv3 echo m88k-motorola-sysv3
exit ;; exit ;;
AViiON:dgux:*:*) AViiON:dgux:*:*)
# DG/UX returns AViiON for all architectures # DG/UX returns AViiON for all architectures
UNAME_PROCESSOR=`/usr/bin/uname -p` UNAME_PROCESSOR=`/usr/bin/uname -p`
if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
then then
if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@ -492,7 +523,7 @@ EOF
else else
echo i586-dg-dgux${UNAME_RELEASE} echo i586-dg-dgux${UNAME_RELEASE}
fi fi
exit ;; exit ;;
M88*:DolphinOS:*:*) # DolphinOS (SVR3) M88*:DolphinOS:*:*) # DolphinOS (SVR3)
echo m88k-dolphin-sysv3 echo m88k-dolphin-sysv3
exit ;; exit ;;
@ -549,7 +580,7 @@ EOF
echo rs6000-ibm-aix3.2 echo rs6000-ibm-aix3.2
fi fi
exit ;; exit ;;
*:AIX:*:[456]) *:AIX:*:[4567])
IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
IBM_ARCH=rs6000 IBM_ARCH=rs6000
@ -592,52 +623,52 @@ EOF
9000/[678][0-9][0-9]) 9000/[678][0-9][0-9])
if [ -x /usr/bin/getconf ]; then if [ -x /usr/bin/getconf ]; then
sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
case "${sc_cpu_version}" in case "${sc_cpu_version}" in
523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
532) # CPU_PA_RISC2_0 532) # CPU_PA_RISC2_0
case "${sc_kernel_bits}" in case "${sc_kernel_bits}" in
32) HP_ARCH="hppa2.0n" ;; 32) HP_ARCH="hppa2.0n" ;;
64) HP_ARCH="hppa2.0w" ;; 64) HP_ARCH="hppa2.0w" ;;
'') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20
esac ;; esac ;;
esac esac
fi fi
if [ "${HP_ARCH}" = "" ]; then if [ "${HP_ARCH}" = "" ]; then
eval $set_cc_for_build eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c sed 's/^ //' << EOF >$dummy.c
#define _HPUX_SOURCE #define _HPUX_SOURCE
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
int main () int main ()
{ {
#if defined(_SC_KERNEL_BITS) #if defined(_SC_KERNEL_BITS)
long bits = sysconf(_SC_KERNEL_BITS); long bits = sysconf(_SC_KERNEL_BITS);
#endif #endif
long cpu = sysconf (_SC_CPU_VERSION); long cpu = sysconf (_SC_CPU_VERSION);
switch (cpu) switch (cpu)
{ {
case CPU_PA_RISC1_0: puts ("hppa1.0"); break; case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
case CPU_PA_RISC1_1: puts ("hppa1.1"); break; case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
case CPU_PA_RISC2_0: case CPU_PA_RISC2_0:
#if defined(_SC_KERNEL_BITS) #if defined(_SC_KERNEL_BITS)
switch (bits) switch (bits)
{ {
case 64: puts ("hppa2.0w"); break; case 64: puts ("hppa2.0w"); break;
case 32: puts ("hppa2.0n"); break; case 32: puts ("hppa2.0n"); break;
default: puts ("hppa2.0"); break; default: puts ("hppa2.0"); break;
} break; } break;
#else /* !defined(_SC_KERNEL_BITS) */ #else /* !defined(_SC_KERNEL_BITS) */
puts ("hppa2.0"); break; puts ("hppa2.0"); break;
#endif #endif
default: puts ("hppa1.0"); break; default: puts ("hppa1.0"); break;
} }
exit (0); exit (0);
} }
EOF EOF
(CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
test -z "$HP_ARCH" && HP_ARCH=hppa test -z "$HP_ARCH" && HP_ARCH=hppa
@ -657,7 +688,7 @@ EOF
# => hppa64-hp-hpux11.23 # => hppa64-hp-hpux11.23
if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
grep __LP64__ >/dev/null grep -q __LP64__
then then
HP_ARCH="hppa2.0w" HP_ARCH="hppa2.0w"
else else
@ -728,22 +759,22 @@ EOF
exit ;; exit ;;
C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
echo c1-convex-bsd echo c1-convex-bsd
exit ;; exit ;;
C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
if getsysinfo -f scalar_acc if getsysinfo -f scalar_acc
then echo c32-convex-bsd then echo c32-convex-bsd
else echo c2-convex-bsd else echo c2-convex-bsd
fi fi
exit ;; exit ;;
C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
echo c34-convex-bsd echo c34-convex-bsd
exit ;; exit ;;
C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
echo c38-convex-bsd echo c38-convex-bsd
exit ;; exit ;;
C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
echo c4-convex-bsd echo c4-convex-bsd
exit ;; exit ;;
CRAY*Y-MP:*:*:*) CRAY*Y-MP:*:*:*)
echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
exit ;; exit ;;
@ -767,14 +798,14 @@ EOF
exit ;; exit ;;
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;; exit ;;
5000:UNIX_System_V:4.*:*) 5000:UNIX_System_V:4.*:*)
FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;; exit ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
@ -786,34 +817,39 @@ EOF
echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
exit ;; exit ;;
*:FreeBSD:*:*) *:FreeBSD:*:*)
case ${UNAME_MACHINE} in UNAME_PROCESSOR=`/usr/bin/uname -p`
pc98) case ${UNAME_PROCESSOR} in
echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
amd64) amd64)
echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
*) *)
echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
esac esac
exit ;; exit ;;
i*:CYGWIN*:*) i*:CYGWIN*:*)
echo ${UNAME_MACHINE}-pc-cygwin echo ${UNAME_MACHINE}-pc-cygwin
exit ;; exit ;;
*:MINGW64*:*)
echo ${UNAME_MACHINE}-pc-mingw64
exit ;;
*:MINGW*:*) *:MINGW*:*)
echo ${UNAME_MACHINE}-pc-mingw32 echo ${UNAME_MACHINE}-pc-mingw32
exit ;; exit ;;
i*:MSYS*:*)
echo ${UNAME_MACHINE}-pc-msys
exit ;;
i*:windows32*:*) i*:windows32*:*)
# uname -m includes "-pc" on this system. # uname -m includes "-pc" on this system.
echo ${UNAME_MACHINE}-mingw32 echo ${UNAME_MACHINE}-mingw32
exit ;; exit ;;
i*:PW*:*) i*:PW*:*)
echo ${UNAME_MACHINE}-pc-pw32 echo ${UNAME_MACHINE}-pc-pw32
exit ;; exit ;;
*:Interix*:[3456]*) *:Interix*:*)
case ${UNAME_MACHINE} in case ${UNAME_MACHINE} in
x86) x86)
echo i586-pc-interix${UNAME_RELEASE} echo i586-pc-interix${UNAME_RELEASE}
exit ;; exit ;;
EM64T | authenticamd | genuineintel) authenticamd | genuineintel | EM64T)
echo x86_64-unknown-interix${UNAME_RELEASE} echo x86_64-unknown-interix${UNAME_RELEASE}
exit ;; exit ;;
IA64) IA64)
@ -823,6 +859,9 @@ EOF
[345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
echo i${UNAME_MACHINE}-pc-mks echo i${UNAME_MACHINE}-pc-mks
exit ;; exit ;;
8664:Windows_NT:*)
echo x86_64-pc-mks
exit ;;
i*:Windows_NT*:* | Pentium*:Windows_NT*:*) i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
# How do we know it's Interix rather than the generic POSIX subsystem? # How do we know it's Interix rather than the generic POSIX subsystem?
# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
@ -843,101 +882,22 @@ EOF
exit ;; exit ;;
*:GNU:*:*) *:GNU:*:*)
# the GNU system # the GNU system
echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
exit ;; exit ;;
*:GNU/*:*:*) *:GNU/*:*:*)
# other systems with GNU libc and userland # other systems with GNU libc and userland
echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
exit ;; exit ;;
i*86:Minix:*:*) i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix echo ${UNAME_MACHINE}-pc-minix
exit ;; exit ;;
arm*:Linux:*:*) aarch64:Linux:*:*)
eval $set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_EABI__
then
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
else
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
fi
exit ;;
avr32*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;; exit ;;
cris:Linux:*:*) aarch64_be:Linux:*:*)
echo cris-axis-linux-${LIBC} UNAME_MACHINE=aarch64_be
exit ;;
crisv32:Linux:*:*)
echo crisv32-axis-linux-${LIBC}
exit ;;
frv:Linux:*:*)
echo frv-unknown-linux-${LIBC}
exit ;;
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;; exit ;;
m32r*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
m68*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
mips:Linux:*:*)
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#undef CPU
#undef mips
#undef mipsel
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
CPU=mipsel
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
CPU=mips
#else
CPU=
#endif
#endif
EOF
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
/^CPU/{
s: ::g
p
}'`"
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
mips64:Linux:*:*)
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#undef CPU
#undef mips64
#undef mips64el
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
CPU=mips64el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
CPU=mips64
#else
CPU=
#endif
#endif
EOF
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
/^CPU/{
s: ::g
p
}'`"
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
or32:Linux:*:*)
echo or32-unknown-linux-${LIBC}
exit ;;
ppc:Linux:*:*)
echo powerpc-unknown-linux-${LIBC}
exit ;;
ppc64:Linux:*:*)
echo powerpc64-unknown-linux-${LIBC}
exit ;;
alpha:Linux:*:*) alpha:Linux:*:*)
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
EV5) UNAME_MACHINE=alphaev5 ;; EV5) UNAME_MACHINE=alphaev5 ;;
@ -947,13 +907,81 @@ EOF
EV6) UNAME_MACHINE=alphaev6 ;; EV6) UNAME_MACHINE=alphaev6 ;;
EV67) UNAME_MACHINE=alphaev67 ;; EV67) UNAME_MACHINE=alphaev67 ;;
EV68*) UNAME_MACHINE=alphaev68 ;; EV68*) UNAME_MACHINE=alphaev68 ;;
esac esac
objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null objdump --private-headers /bin/sh | grep -q ld.so.1
if test "$?" = 0 ; then LIBC="gnulibc1" ; fi if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;; exit ;;
arm*:Linux:*:*)
eval $set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_EABI__
then
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
else
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_PCS_VFP
then
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
else
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
fi
fi
exit ;;
avr32*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
cris:Linux:*:*)
echo ${UNAME_MACHINE}-axis-linux-${LIBC}
exit ;;
crisv32:Linux:*:*)
echo ${UNAME_MACHINE}-axis-linux-${LIBC}
exit ;;
frv:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
hexagon:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
i*86:Linux:*:*)
echo ${UNAME_MACHINE}-pc-linux-${LIBC}
exit ;;
ia64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
m32r*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
m68*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
mips:Linux:*:* | mips64:Linux:*:*)
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#undef CPU
#undef ${UNAME_MACHINE}
#undef ${UNAME_MACHINE}el
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
CPU=${UNAME_MACHINE}el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
CPU=${UNAME_MACHINE}
#else
CPU=
#endif
#endif
EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
or32:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
padre:Linux:*:*) padre:Linux:*:*)
echo sparc-unknown-linux-gnu echo sparc-unknown-linux-${LIBC}
exit ;;
parisc64:Linux:*:* | hppa64:Linux:*:*)
echo hppa64-unknown-linux-${LIBC}
exit ;; exit ;;
parisc:Linux:*:* | hppa:Linux:*:*) parisc:Linux:*:* | hppa:Linux:*:*)
# Look for CPU level # Look for CPU level
@ -963,14 +991,17 @@ EOF
*) echo hppa-unknown-linux-${LIBC} ;; *) echo hppa-unknown-linux-${LIBC} ;;
esac esac
exit ;; exit ;;
parisc64:Linux:*:* | hppa64:Linux:*:*) ppc64:Linux:*:*)
echo hppa64-unknown-linux-${LIBC} echo powerpc64-unknown-linux-${LIBC}
exit ;;
ppc:Linux:*:*)
echo powerpc-unknown-linux-${LIBC}
exit ;; exit ;;
s390:Linux:*:* | s390x:Linux:*:*) s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux echo ${UNAME_MACHINE}-ibm-linux
exit ;; exit ;;
sh64*:Linux:*:*) sh64*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;; exit ;;
sh*:Linux:*:*) sh*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
@ -978,77 +1009,18 @@ EOF
sparc:Linux:*:* | sparc64:Linux:*:*) sparc:Linux:*:* | sparc64:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;; exit ;;
tile*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;;
vax:Linux:*:*) vax:Linux:*:*)
echo ${UNAME_MACHINE}-dec-linux-${LIBC} echo ${UNAME_MACHINE}-dec-linux-${LIBC}
exit ;; exit ;;
x86_64:Linux:*:*) x86_64:Linux:*:*)
echo x86_64-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;; exit ;;
xtensa*:Linux:*:*) xtensa*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-${LIBC} echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
exit ;; exit ;;
i*86:Linux:*:*)
# The BFD linker knows what the default object file format is, so
# first see if it will tell us. cd to the root directory to prevent
# problems with other programs or directories called `ld' in the path.
# Set LC_ALL=C to ensure ld outputs messages in English.
ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
| sed -ne '/supported targets:/!d
s/[ ][ ]*/ /g
s/.*supported targets: *//
s/ .*//
p'`
case "$ld_supported_targets" in
elf32-i386)
TENTATIVE="${UNAME_MACHINE}-pc-linux-${LIBC}"
;;
a.out-i386-linux)
echo "${UNAME_MACHINE}-pc-linux-${LIBC}aout"
exit ;;
"")
# Either a pre-BFD a.out linker (linux-gnuoldld) or
# one that does not give us useful --help.
echo "${UNAME_MACHINE}-pc-linux-${LIBC}oldld"
exit ;;
esac
# This should get integrated into the C code below, but now we hack
if [ "$LIBC" != "gnu" ] ; then echo "$TENTATIVE" && exit 0 ; fi
# Determine whether the default compiler is a.out or elf
eval $set_cc_for_build
sed 's/^ //' << EOF >$dummy.c
#include <features.h>
#ifdef __ELF__
# ifdef __GLIBC__
# if __GLIBC__ >= 2
LIBC=gnu
# else
LIBC=gnulibc1
# endif
# else
LIBC=gnulibc1
# endif
#else
#if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
LIBC=gnu
#else
LIBC=gnuaout
#endif
#endif
#ifdef __dietlibc__
LIBC=dietlibc
#endif
EOF
eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
/^LIBC/{
s: ::g
p
}'`"
test x"${LIBC}" != x && {
echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
exit
}
test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; }
;;
i*86:DYNIX/ptx:4*:*) i*86:DYNIX/ptx:4*:*)
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
# earlier versions are messed up and put the nodename in both # earlier versions are messed up and put the nodename in both
@ -1056,11 +1028,11 @@ EOF
echo i386-sequent-sysv4 echo i386-sequent-sysv4
exit ;; exit ;;
i*86:UNIX_SV:4.2MP:2.*) i*86:UNIX_SV:4.2MP:2.*)
# Unixware is an offshoot of SVR4, but it has its own version # Unixware is an offshoot of SVR4, but it has its own version
# number series starting with 2... # number series starting with 2...
# I am not positive that other SVR4 systems won't match this, # I am not positive that other SVR4 systems won't match this,
# I just have to hope. -- rms. # I just have to hope. -- rms.
# Use sysv4.2uw... so that sysv4* matches it. # Use sysv4.2uw... so that sysv4* matches it.
echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
exit ;; exit ;;
i*86:OS/2:*:*) i*86:OS/2:*:*)
@ -1077,7 +1049,7 @@ EOF
i*86:syllable:*:*) i*86:syllable:*:*)
echo ${UNAME_MACHINE}-pc-syllable echo ${UNAME_MACHINE}-pc-syllable
exit ;; exit ;;
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
echo i386-unknown-lynxos${UNAME_RELEASE} echo i386-unknown-lynxos${UNAME_RELEASE}
exit ;; exit ;;
i*86:*DOS:*:*) i*86:*DOS:*:*)
@ -1092,7 +1064,7 @@ EOF
fi fi
exit ;; exit ;;
i*86:*:5:[678]*) i*86:*:5:[678]*)
# UnixWare 7.x, OpenUNIX and OpenServer 6. # UnixWare 7.x, OpenUNIX and OpenServer 6.
case `/bin/uname -X | grep "^Machine"` in case `/bin/uname -X | grep "^Machine"` in
*486*) UNAME_MACHINE=i486 ;; *486*) UNAME_MACHINE=i486 ;;
*Pentium) UNAME_MACHINE=i586 ;; *Pentium) UNAME_MACHINE=i586 ;;
@ -1120,10 +1092,13 @@ EOF
exit ;; exit ;;
pc:*:*:*) pc:*:*:*)
# Left here for compatibility: # Left here for compatibility:
# uname -m prints for DJGPP always 'pc', but it prints nothing about # uname -m prints for DJGPP always 'pc', but it prints nothing about
# the processor, so we play safe by assuming i386. # the processor, so we play safe by assuming i586.
echo i386-pc-msdosdjgpp # Note: whatever this is, it MUST be the same as what config.sub
exit ;; # prints for the "djgpp" host, or else GDB configury will decide that
# this is a cross-build.
echo i586-pc-msdosdjgpp
exit ;;
Intel:Mach:3*:*) Intel:Mach:3*:*)
echo i386-pc-mach3 echo i386-pc-mach3
exit ;; exit ;;
@ -1158,8 +1133,18 @@ EOF
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \ /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
&& { echo i486-ncr-sysv4; exit; } ;; && { echo i486-ncr-sysv4; exit; } ;;
NCR*:*:4.2:* | MPRAS*:*:4.2:*)
OS_REL='.3'
test -r /etc/.relid \
&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
&& { echo i486-ncr-sysv4.3${OS_REL}; exit; }
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; }
/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
&& { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
echo m68k-unknown-lynxos${UNAME_RELEASE} echo m68k-unknown-lynxos${UNAME_RELEASE}
exit ;; exit ;;
@ -1172,7 +1157,7 @@ EOF
rs6000:LynxOS:2.*:*) rs6000:LynxOS:2.*:*)
echo rs6000-unknown-lynxos${UNAME_RELEASE} echo rs6000-unknown-lynxos${UNAME_RELEASE}
exit ;; exit ;;
PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
echo powerpc-unknown-lynxos${UNAME_RELEASE} echo powerpc-unknown-lynxos${UNAME_RELEASE}
exit ;; exit ;;
SM[BE]S:UNIX_SV:*:*) SM[BE]S:UNIX_SV:*:*)
@ -1192,10 +1177,10 @@ EOF
echo ns32k-sni-sysv echo ns32k-sni-sysv
fi fi
exit ;; exit ;;
PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
# says <Richard.M.Bartel@ccMail.Census.GOV> # says <Richard.M.Bartel@ccMail.Census.GOV>
echo i586-unisys-sysv4 echo i586-unisys-sysv4
exit ;; exit ;;
*:UNIX_System_V:4*:FTX*) *:UNIX_System_V:4*:FTX*)
# From Gerald Hewes <hewes@openmarket.com>. # From Gerald Hewes <hewes@openmarket.com>.
# How about differentiating between stratus architectures? -djm # How about differentiating between stratus architectures? -djm
@ -1221,11 +1206,11 @@ EOF
exit ;; exit ;;
R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
if [ -d /usr/nec ]; then if [ -d /usr/nec ]; then
echo mips-nec-sysv${UNAME_RELEASE} echo mips-nec-sysv${UNAME_RELEASE}
else else
echo mips-unknown-sysv${UNAME_RELEASE} echo mips-unknown-sysv${UNAME_RELEASE}
fi fi
exit ;; exit ;;
BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
echo powerpc-be-beos echo powerpc-be-beos
exit ;; exit ;;
@ -1238,6 +1223,9 @@ EOF
BePC:Haiku:*:*) # Haiku running on Intel PC compatible. BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
echo i586-pc-haiku echo i586-pc-haiku
exit ;; exit ;;
x86_64:Haiku:*:*)
echo x86_64-unknown-haiku
exit ;;
SX-4:SUPER-UX:*:*) SX-4:SUPER-UX:*:*)
echo sx4-nec-superux${UNAME_RELEASE} echo sx4-nec-superux${UNAME_RELEASE}
exit ;; exit ;;
@ -1265,6 +1253,16 @@ EOF
*:Darwin:*:*) *:Darwin:*:*)
UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
case $UNAME_PROCESSOR in case $UNAME_PROCESSOR in
i386)
eval $set_cc_for_build
if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
then
UNAME_PROCESSOR="x86_64"
fi
fi ;;
unknown) UNAME_PROCESSOR=powerpc ;; unknown) UNAME_PROCESSOR=powerpc ;;
esac esac
echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
@ -1280,7 +1278,10 @@ EOF
*:QNX:*:4*) *:QNX:*:4*)
echo i386-pc-qnx echo i386-pc-qnx
exit ;; exit ;;
NSE-?:NONSTOP_KERNEL:*:*) NEO-?:NONSTOP_KERNEL:*:*)
echo neo-tandem-nsk${UNAME_RELEASE}
exit ;;
NSE-*:NONSTOP_KERNEL:*:*)
echo nse-tandem-nsk${UNAME_RELEASE} echo nse-tandem-nsk${UNAME_RELEASE}
exit ;; exit ;;
NSR-?:NONSTOP_KERNEL:*:*) NSR-?:NONSTOP_KERNEL:*:*)
@ -1325,13 +1326,13 @@ EOF
echo pdp10-unknown-its echo pdp10-unknown-its
exit ;; exit ;;
SEI:*:*:SEIUX) SEI:*:*:SEIUX)
echo mips-sei-seiux${UNAME_RELEASE} echo mips-sei-seiux${UNAME_RELEASE}
exit ;; exit ;;
*:DragonFly:*:*) *:DragonFly:*:*)
echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
exit ;; exit ;;
*:*VMS:*:*) *:*VMS:*:*)
UNAME_MACHINE=`(uname -p) 2>/dev/null` UNAME_MACHINE=`(uname -p) 2>/dev/null`
case "${UNAME_MACHINE}" in case "${UNAME_MACHINE}" in
A*) echo alpha-dec-vms ; exit ;; A*) echo alpha-dec-vms ; exit ;;
I*) echo ia64-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;;
@ -1346,11 +1347,14 @@ EOF
i*86:rdos:*:*) i*86:rdos:*:*)
echo ${UNAME_MACHINE}-pc-rdos echo ${UNAME_MACHINE}-pc-rdos
exit ;; exit ;;
i*86:AROS:*:*)
echo ${UNAME_MACHINE}-pc-aros
exit ;;
x86_64:VMkernel:*:*)
echo ${UNAME_MACHINE}-unknown-esx
exit ;;
esac esac
#echo '(No uname command or uname output not recognized.)' 1>&2
#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
eval $set_cc_for_build eval $set_cc_for_build
cat >$dummy.c <<EOF cat >$dummy.c <<EOF
#ifdef _SEQUENT_ #ifdef _SEQUENT_
@ -1368,11 +1372,11 @@ main ()
#include <sys/param.h> #include <sys/param.h>
printf ("m68k-sony-newsos%s\n", printf ("m68k-sony-newsos%s\n",
#ifdef NEWSOS4 #ifdef NEWSOS4
"4" "4"
#else #else
"" ""
#endif #endif
); exit (0); ); exit (0);
#endif #endif
#endif #endif

View File

@ -1,17 +1,17 @@
/* config.h. Generated from config.h.in by configure. */ /* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */ /* config.h.in. Generated from configure.ac by autoheader. */
/* PCRE is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
/* On Unix-like systems config.h.in is converted by "configure" into config.h. In environments that support the facilities, config.h.in is converted by
Some other environments also support the use of "configure". PCRE is written in "configure", or config-cmake.h.in is converted by CMake, into config.h. If you
Standard C, but there are a few non-standard things it can cope with, allowing are going to build PCRE "by hand" without using "configure" or CMake, you
it to run on SunOS4 and other "close to standard" systems. should copy the distributed config.h.generic to config.h, and then edit the
macro definitions to be the way you need them. You must then add
If you are going to build PCRE "by hand" on a system without "configure" you -DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
should copy the distributed config.h.generic to config.h, and then set up the at the start of every source.
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
all of your compile commands, so that config.h is included at the start of
every source.
Alternatively, you can avoid editing by using -D on the compiler command line Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
@ -21,20 +21,28 @@ HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */ them both to 0; an emulation function will be used. */
/* By default, the \R escape sequence matches any Unicode line ending /* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined, this is character or sequence of characters. If BSR_ANYCRLF is defined (to any
changed so that backslash-R matches only CR, LF, or CRLF. The build- time value), this is changed so that backslash-R matches only CR, LF, or CRLF.
default can be overridden by the user of PCRE at runtime. On systems that The build-time default can be overridden by the user of PCRE at runtime. */
support it, "configure" can be used to override the default. */
/* #undef BSR_ANYCRLF */ /* #undef BSR_ANYCRLF */
/* If you are compiling for a system that uses EBCDIC instead of ASCII /* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro as 1. On systems that can use character codes, define this macro to any value. You must also edit the
"configure", this can be done via --enable-ebcdic. PCRE will then assume NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
that all input strings are in EBCDIC. If you do not define this macro, PCRE On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
will assume input strings are ASCII or UTF-8 Unicode. It is not possible to automatically adjusted. When EBCDIC is set, PCRE assumes that all input
build a version of PCRE that supports both EBCDIC and UTF-8. */ strings are in EBCDIC. If you do not define this macro, PCRE will assume
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
/* #undef EBCDIC */ /* #undef EBCDIC */
/* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. The value must also be set in the
NEWLINE macro below. On systems that can use "configure" or CMake to set
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
/* #undef EBCDIC_NL25 */
/* Define to 1 if you have the `bcopy' function. */ /* Define to 1 if you have the `bcopy' function. */
#ifndef HAVE_BCOPY #ifndef HAVE_BCOPY
#define HAVE_BCOPY 1 #define HAVE_BCOPY 1
@ -58,6 +66,12 @@ them both to 0; an emulation function will be used. */
#define HAVE_DLFCN_H 1 #define HAVE_DLFCN_H 1
#endif #endif
/* Define to 1 if you have the <editline/readline.h> header file. */
/* #undef HAVE_EDITLINE_READLINE_H */
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
/* #undef HAVE_EDIT_READLINE_READLINE_H */
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#ifndef HAVE_INTTYPES_H #ifndef HAVE_INTTYPES_H
#define HAVE_INTTYPES_H 1 #define HAVE_INTTYPES_H 1
@ -83,15 +97,17 @@ them both to 0; an emulation function will be used. */
#define HAVE_MEMORY_H 1 #define HAVE_MEMORY_H 1
#endif #endif
/* Define if you have POSIX threads libraries and header files. */
/* #undef HAVE_PTHREAD */
/* Have PTHREAD_PRIO_INHERIT. */
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
/* Define to 1 if you have the <readline/history.h> header file. */ /* Define to 1 if you have the <readline/history.h> header file. */
#ifndef HAVE_READLINE_HISTORY_H /* #undef HAVE_READLINE_HISTORY_H */
#define HAVE_READLINE_HISTORY_H 1
#endif
/* Define to 1 if you have the <readline/readline.h> header file. */ /* Define to 1 if you have the <readline/readline.h> header file. */
#ifndef HAVE_READLINE_READLINE_H /* #undef HAVE_READLINE_READLINE_H */
#define HAVE_READLINE_READLINE_H 1
#endif
/* Define to 1 if you have the <stdint.h> header file. */ /* Define to 1 if you have the <stdint.h> header file. */
#ifndef HAVE_STDINT_H #ifndef HAVE_STDINT_H
@ -123,10 +139,13 @@ them both to 0; an emulation function will be used. */
#define HAVE_STRING_H 1 #define HAVE_STRING_H 1
#endif #endif
/* Define to 1 if you have the `strtoll' function. */ /* Define to 1 if you have `strtoimax'. */
/* #undef HAVE_STRTOIMAX */
/* Define to 1 if you have `strtoll'. */
/* #undef HAVE_STRTOLL */ /* #undef HAVE_STRTOLL */
/* Define to 1 if you have the `strtoq' function. */ /* Define to 1 if you have `strtoq'. */
#ifndef HAVE_STRTOQ #ifndef HAVE_STRTOQ
#define HAVE_STRTOQ 1 #define HAVE_STRTOQ 1
#endif #endif
@ -154,6 +173,12 @@ them both to 0; an emulation function will be used. */
#define HAVE_UNSIGNED_LONG_LONG 1 #define HAVE_UNSIGNED_LONG_LONG 1
#endif #endif
/* Define to 1 or 0, depending whether the compiler supports simple visibility
declarations. */
#ifndef HAVE_VISIBILITY
#define HAVE_VISIBILITY 1
#endif
/* Define to 1 if you have the <windows.h> header file. */ /* Define to 1 if you have the <windows.h> header file. */
/* #undef HAVE_WINDOWS_H */ /* #undef HAVE_WINDOWS_H */
@ -162,26 +187,30 @@ them both to 0; an emulation function will be used. */
#define HAVE_ZLIB_H 1 #define HAVE_ZLIB_H 1
#endif #endif
/* Define to 1 if you have the `_strtoi64' function. */ /* Define to 1 if you have `_strtoi64'. */
/* #undef HAVE__STRTOI64 */ /* #undef HAVE__STRTOI64 */
/* The value of LINK_SIZE determines the number of bytes used to store links /* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases. compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
for longer patterns in extreme cases. On systems that support it, for longer patterns in extreme cases. */
"configure" can be used to override this default. */
#ifndef LINK_SIZE #ifndef LINK_SIZE
#define LINK_SIZE 2 #define LINK_SIZE 2
#endif #endif
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#ifndef LT_OBJDIR
#define LT_OBJDIR ".libs/"
#endif
/* The value of MATCH_LIMIT determines the default number of times the /* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit. pcre_exec(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. On systems that so that it does not accidentally catch legitimate cases. */
support it, "configure" can be used to override this default default. */
#ifndef MATCH_LIMIT #ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000 #define MATCH_LIMIT 10000000
#endif #endif
@ -193,8 +222,7 @@ them both to 0; an emulation function will be used. */
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
match(). To have any useful effect, it must be less than the value of match(). To have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
a runtime method for setting a different limit. On systems that support it, a runtime method for setting a different limit. */
"configure" can be used to override the default. */
#ifndef MATCH_LIMIT_RECURSION #ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT #define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif #endif
@ -213,22 +241,28 @@ them both to 0; an emulation function will be used. */
#define MAX_NAME_SIZE 32 #define MAX_NAME_SIZE 32
#endif #endif
/* The value of NEWLINE determines the newline character sequence. On systems /* The value of NEWLINE determines the default newline character sequence.
that support it, "configure" can be used to override the default, which is PCRE client programs can override this by selecting other values at run
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2 time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
(ANYCRLF). */ (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
0x25) that are used as the NL line terminator that is equivalent to ASCII
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
or -2 (ANYCRLF). */
#ifndef NEWLINE #ifndef NEWLINE
#define NEWLINE 10 #define NEWLINE 10
#endif #endif
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
/* #undef NO_MINUS_C_MINUS_O */
/* PCRE uses recursive function calls to handle backtracking while matching. /* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to get a version that doesn't use recursion in the size. Define NO_RECURSE to any value to get a version that doesn't use
match() function; instead it creates its own stack by steam using recursion in the match() function; instead it creates its own stack by
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see steam using pcre_recurse_malloc() to obtain memory from the heap. For more
the comments and other stuff just above the match() function. On systems detail, see the comments and other stuff just above the match() function.
that support it, "configure" can be used to set this in the Makefile (use */
--disable-stack-for-recursion). */
/* #undef NO_RECURSE */ /* #undef NO_RECURSE */
/* Name of package */ /* Name of package */
@ -241,27 +275,38 @@ them both to 0; an emulation function will be used. */
#define PACKAGE_NAME "PCRE" #define PACKAGE_NAME "PCRE"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE 7.9" #define PACKAGE_STRING "PCRE 8.32"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre" #define PACKAGE_TARNAME "pcre"
/* Define to the version of this package. */ /* Define to the home page for this package. */
#define PACKAGE_VERSION "7.9" #define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "8.32"
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
pcregrep to hold parts of the file it is searching. This is also the
minimum value. The actual amount of memory used by pcregrep is three times
this number, because it allows for the buffering of "before" and "after"
lines. */
#ifndef PCREGREP_BUFSIZE
#define PCREGREP_BUFSIZE 20480
#endif
/* If you are compiling for a system other than a Unix-like system or /* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, it contain the relevant magic. If you do not define this macro, a suitable
defaults to "extern" for a C compiler and "extern C" for a C++ __declspec value is used for Windows systems; in other environments
compiler on non-Win32 systems. This macro apears at the start of "extern" is used for a C compiler and "extern C" for a C++ compiler.
every exported function that is part of the external API. It does This macro apears at the start of every exported function that is part
not appear on functions that are "external" in the C sense, but of the external API. It does not appear on functions that are "external"
which are internal to the library. */ in the C sense, but which are internal to the library. */
/* #undef PCRE_EXP_DEFN */ /* #undef PCRE_EXP_DEFN */
/* Define if linking statically (TODO: make nice with Libtool) */ /* Define to any value if linking statically (TODO: make nice with Libtool) */
/* #undef PCRE_STATIC */ /* #undef PCRE_STATIC */
/* When calling PCRE via the POSIX interface, additional working storage is /* When calling PCRE via the POSIX interface, additional working storage is
@ -270,44 +315,78 @@ them both to 0; an emulation function will be used. */
only two. If the number of expected substrings is small, the wrapper only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it, used is defined by POSIX_MALLOC_THRESHOLD. */
"configure" can be used to override this default. */
#ifndef POSIX_MALLOC_THRESHOLD #ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD 10 #define POSIX_MALLOC_THRESHOLD 10
#endif #endif
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
/* #undef PTHREAD_CREATE_JOINABLE */
/* Define to 1 if you have the ANSI C header files. */ /* Define to 1 if you have the ANSI C header files. */
#ifndef STDC_HEADERS #ifndef STDC_HEADERS
#define STDC_HEADERS 1 #define STDC_HEADERS 1
#endif #endif
/* Define to allow pcregrep to be linked with libbz2, so that it is able to /* Define to allow pcretest and pcregrep to be linked with gcov, so that they
handle .bz2 files. */ are able to generate code coverage reports. */
/* #undef SUPPORT_GCOV */
/* Define to any value to enable support for Just-In-Time compiling. */
/* #undef SUPPORT_JIT */
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
is able to handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */ /* #undef SUPPORT_LIBBZ2 */
/* Define to allow pcretest to be linked with libreadline. */ /* Define to any value to allow pcretest to be linked with libedit. */
/* #undef SUPPORT_LIBEDIT */
/* Define to any value to allow pcretest to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */ /* #undef SUPPORT_LIBREADLINE */
/* Define to allow pcregrep to be linked with libz, so that it is able to /* Define to any value to allow pcregrep to be linked with libz, so that it is
handle .gz files. */ able to handle .gz files. */
/* #undef SUPPORT_LIBZ */ /* #undef SUPPORT_LIBZ */
/* Define to enable support for Unicode properties */ /* Define to any value to enable the 16 bit PCRE library. */
/* #undef SUPPORT_PCRE16 */
/* Define to any value to enable the 32 bit PCRE library. */
/* #undef SUPPORT_PCRE32 */
/* Define to any value to enable the 8 bit PCRE library. */
#ifndef SUPPORT_PCRE8
#define SUPPORT_PCRE8 /**/
#endif
/* Define to any value to enable JIT support in pcregrep. */
/* #undef SUPPORT_PCREGREP_JIT */
/* Define to any value to enable support for Unicode properties. */
/* #undef SUPPORT_UCP */ /* #undef SUPPORT_UCP */
/* Define to enable support for the UTF-8 Unicode encoding. This will work /* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
even in an EBCDIC environment, but it is incompatible with the EBCDIC This will work even in an EBCDIC environment, but it is incompatible with
macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8, but the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
not both at once. */ ASCII/UTF-8/16/32, but not both at once. */
/* #undef SUPPORT_UTF8 */ /* #undef SUPPORT_UTF */
/* Valgrind support to find invalid memory reads. */
/* #undef SUPPORT_VALGRIND */
/* Version number of package */ /* Version number of package */
#ifndef VERSION #ifndef VERSION
#define VERSION "7.9" #define VERSION "8.32"
#endif #endif
/* Define to empty if `const' does not conform to ANSI C. */ /* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */ /* #undef const */
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
/* #undef int64_t */
/* Define to `unsigned int' if <sys/types.h> does not define. */ /* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */ /* #undef size_t */

View File

@ -1,16 +1,17 @@
/* config.h.in. Generated from configure.ac by autoheader. */ /* config.h.in. Generated from configure.ac by autoheader. */
/* On Unix-like systems config.h.in is converted by "configure" into config.h. /* PCRE is written in Standard C, but there are a few non-standard things it
Some other environments also support the use of "configure". PCRE is written in can cope with, allowing it to run on SunOS4 and other "close to standard"
Standard C, but there are a few non-standard things it can cope with, allowing systems.
it to run on SunOS4 and other "close to standard" systems.
If you are going to build PCRE "by hand" on a system without "configure" you In environments that support the facilities, config.h.in is converted by
should copy the distributed config.h.generic to config.h, and then set up the "configure", or config-cmake.h.in is converted by CMake, into config.h. If you
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to are going to build PCRE "by hand" without using "configure" or CMake, you
all of your compile commands, so that config.h is included at the start of should copy the distributed config.h.generic to config.h, and then edit the
every source. macro definitions to be the way you need them. You must then add
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
@ -20,20 +21,28 @@ HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */ them both to 0; an emulation function will be used. */
/* By default, the \R escape sequence matches any Unicode line ending /* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined, this is character or sequence of characters. If BSR_ANYCRLF is defined (to any
changed so that backslash-R matches only CR, LF, or CRLF. The build- time value), this is changed so that backslash-R matches only CR, LF, or CRLF.
default can be overridden by the user of PCRE at runtime. On systems that The build-time default can be overridden by the user of PCRE at runtime. */
support it, "configure" can be used to override the default. */
#undef BSR_ANYCRLF #undef BSR_ANYCRLF
/* If you are compiling for a system that uses EBCDIC instead of ASCII /* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro as 1. On systems that can use character codes, define this macro to any value. You must also edit the
"configure", this can be done via --enable-ebcdic. PCRE will then assume NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
that all input strings are in EBCDIC. If you do not define this macro, PCRE On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
will assume input strings are ASCII or UTF-8 Unicode. It is not possible to automatically adjusted. When EBCDIC is set, PCRE assumes that all input
build a version of PCRE that supports both EBCDIC and UTF-8. */ strings are in EBCDIC. If you do not define this macro, PCRE will assume
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
#undef EBCDIC #undef EBCDIC
/* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. The value must also be set in the
NEWLINE macro below. On systems that can use "configure" or CMake to set
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
#undef EBCDIC_NL25
/* Define to 1 if you have the `bcopy' function. */ /* Define to 1 if you have the `bcopy' function. */
#undef HAVE_BCOPY #undef HAVE_BCOPY
@ -49,6 +58,12 @@ them both to 0; an emulation function will be used. */
/* Define to 1 if you have the <dlfcn.h> header file. */ /* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H #undef HAVE_DLFCN_H
/* Define to 1 if you have the <editline/readline.h> header file. */
#undef HAVE_EDITLINE_READLINE_H
/* Define to 1 if you have the <edit/readline/readline.h> header file. */
#undef HAVE_EDIT_READLINE_READLINE_H
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H #undef HAVE_INTTYPES_H
@ -64,6 +79,12 @@ them both to 0; an emulation function will be used. */
/* Define to 1 if you have the <memory.h> header file. */ /* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H #undef HAVE_MEMORY_H
/* Define if you have POSIX threads libraries and header files. */
#undef HAVE_PTHREAD
/* Have PTHREAD_PRIO_INHERIT. */
#undef HAVE_PTHREAD_PRIO_INHERIT
/* Define to 1 if you have the <readline/history.h> header file. */ /* Define to 1 if you have the <readline/history.h> header file. */
#undef HAVE_READLINE_HISTORY_H #undef HAVE_READLINE_HISTORY_H
@ -88,10 +109,13 @@ them both to 0; an emulation function will be used. */
/* Define to 1 if you have the <string.h> header file. */ /* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H #undef HAVE_STRING_H
/* Define to 1 if you have the `strtoll' function. */ /* Define to 1 if you have `strtoimax'. */
#undef HAVE_STRTOIMAX
/* Define to 1 if you have `strtoll'. */
#undef HAVE_STRTOLL #undef HAVE_STRTOLL
/* Define to 1 if you have the `strtoq' function. */ /* Define to 1 if you have `strtoq'. */
#undef HAVE_STRTOQ #undef HAVE_STRTOQ
/* Define to 1 if you have the <sys/stat.h> header file. */ /* Define to 1 if you have the <sys/stat.h> header file. */
@ -109,30 +133,36 @@ them both to 0; an emulation function will be used. */
/* Define to 1 if the system has the type `unsigned long long'. */ /* Define to 1 if the system has the type `unsigned long long'. */
#undef HAVE_UNSIGNED_LONG_LONG #undef HAVE_UNSIGNED_LONG_LONG
/* Define to 1 or 0, depending whether the compiler supports simple visibility
declarations. */
#undef HAVE_VISIBILITY
/* Define to 1 if you have the <windows.h> header file. */ /* Define to 1 if you have the <windows.h> header file. */
#undef HAVE_WINDOWS_H #undef HAVE_WINDOWS_H
/* Define to 1 if you have the <zlib.h> header file. */ /* Define to 1 if you have the <zlib.h> header file. */
#undef HAVE_ZLIB_H #undef HAVE_ZLIB_H
/* Define to 1 if you have the `_strtoi64' function. */ /* Define to 1 if you have `_strtoi64'. */
#undef HAVE__STRTOI64 #undef HAVE__STRTOI64
/* The value of LINK_SIZE determines the number of bytes used to store links /* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases. compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
for longer patterns in extreme cases. On systems that support it, for longer patterns in extreme cases. */
"configure" can be used to override this default. */
#undef LINK_SIZE #undef LINK_SIZE
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* The value of MATCH_LIMIT determines the default number of times the /* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit. pcre_exec(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. On systems that so that it does not accidentally catch legitimate cases. */
support it, "configure" can be used to override this default default. */
#undef MATCH_LIMIT #undef MATCH_LIMIT
/* The above limit applies to all calls of match(), whether or not they /* The above limit applies to all calls of match(), whether or not they
@ -142,8 +172,7 @@ them both to 0; an emulation function will be used. */
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
match(). To have any useful effect, it must be less than the value of match(). To have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
a runtime method for setting a different limit. On systems that support it, a runtime method for setting a different limit. */
"configure" can be used to override the default. */
#undef MATCH_LIMIT_RECURSION #undef MATCH_LIMIT_RECURSION
/* This limit is parameterized just in case anybody ever wants to change it. /* This limit is parameterized just in case anybody ever wants to change it.
@ -156,20 +185,26 @@ them both to 0; an emulation function will be used. */
overflow caused by enormously large patterns. */ overflow caused by enormously large patterns. */
#undef MAX_NAME_SIZE #undef MAX_NAME_SIZE
/* The value of NEWLINE determines the newline character sequence. On systems /* The value of NEWLINE determines the default newline character sequence.
that support it, "configure" can be used to override the default, which is PCRE client programs can override this by selecting other values at run
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2 time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
(ANYCRLF). */ (CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
0x25) that are used as the NL line terminator that is equivalent to ASCII
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
or -2 (ANYCRLF). */
#undef NEWLINE #undef NEWLINE
/* Define to 1 if your C compiler doesn't accept -c and -o together. */
#undef NO_MINUS_C_MINUS_O
/* PCRE uses recursive function calls to handle backtracking while matching. /* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to get a version that doesn't use recursion in the size. Define NO_RECURSE to any value to get a version that doesn't use
match() function; instead it creates its own stack by steam using recursion in the match() function; instead it creates its own stack by
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see steam using pcre_recurse_malloc() to obtain memory from the heap. For more
the comments and other stuff just above the match() function. On systems detail, see the comments and other stuff just above the match() function.
that support it, "configure" can be used to set this in the Makefile (use */
--disable-stack-for-recursion). */
#undef NO_RECURSE #undef NO_RECURSE
/* Name of package */ /* Name of package */
@ -187,22 +222,50 @@ them both to 0; an emulation function will be used. */
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME #undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */ /* Define to the version of this package. */
#undef PACKAGE_VERSION #undef PACKAGE_VERSION
/* to make a symbol visible */
#undef PCRECPP_EXP_DECL
/* to make a symbol visible */
#undef PCRECPP_EXP_DEFN
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
pcregrep to hold parts of the file it is searching. This is also the
minimum value. The actual amount of memory used by pcregrep is three times
this number, because it allows for the buffering of "before" and "after"
lines. */
#undef PCREGREP_BUFSIZE
/* to make a symbol visible */
#undef PCREPOSIX_EXP_DECL
/* to make a symbol visible */
#undef PCREPOSIX_EXP_DEFN
/* to make a symbol visible */
#undef PCRE_EXP_DATA_DEFN
/* to make a symbol visible */
#undef PCRE_EXP_DECL
/* If you are compiling for a system other than a Unix-like system or /* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, it contain the relevant magic. If you do not define this macro, a suitable
defaults to "extern" for a C compiler and "extern C" for a C++ __declspec value is used for Windows systems; in other environments
compiler on non-Win32 systems. This macro apears at the start of "extern" is used for a C compiler and "extern C" for a C++ compiler.
every exported function that is part of the external API. It does This macro apears at the start of every exported function that is part
not appear on functions that are "external" in the C sense, but of the external API. It does not appear on functions that are "external"
which are internal to the library. */ in the C sense, but which are internal to the library. */
#undef PCRE_EXP_DEFN #undef PCRE_EXP_DEFN
/* Define if linking statically (TODO: make nice with Libtool) */ /* Define to any value if linking statically (TODO: make nice with Libtool) */
#undef PCRE_STATIC #undef PCRE_STATIC
/* When calling PCRE via the POSIX interface, additional working storage is /* When calling PCRE via the POSIX interface, additional working storage is
@ -211,32 +274,60 @@ them both to 0; an emulation function will be used. */
only two. If the number of expected substrings is small, the wrapper only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it, used is defined by POSIX_MALLOC_THRESHOLD. */
"configure" can be used to override this default. */
#undef POSIX_MALLOC_THRESHOLD #undef POSIX_MALLOC_THRESHOLD
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
#undef PTHREAD_CREATE_JOINABLE
/* Define to 1 if you have the ANSI C header files. */ /* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS #undef STDC_HEADERS
/* Define to allow pcregrep to be linked with libbz2, so that it is able to /* Define to allow pcretest and pcregrep to be linked with gcov, so that they
handle .bz2 files. */ are able to generate code coverage reports. */
#undef SUPPORT_GCOV
/* Define to any value to enable support for Just-In-Time compiling. */
#undef SUPPORT_JIT
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
is able to handle .bz2 files. */
#undef SUPPORT_LIBBZ2 #undef SUPPORT_LIBBZ2
/* Define to allow pcretest to be linked with libreadline. */ /* Define to any value to allow pcretest to be linked with libedit. */
#undef SUPPORT_LIBEDIT
/* Define to any value to allow pcretest to be linked with libreadline. */
#undef SUPPORT_LIBREADLINE #undef SUPPORT_LIBREADLINE
/* Define to allow pcregrep to be linked with libz, so that it is able to /* Define to any value to allow pcregrep to be linked with libz, so that it is
handle .gz files. */ able to handle .gz files. */
#undef SUPPORT_LIBZ #undef SUPPORT_LIBZ
/* Define to enable support for Unicode properties */ /* Define to any value to enable the 16 bit PCRE library. */
#undef SUPPORT_PCRE16
/* Define to any value to enable the 32 bit PCRE library. */
#undef SUPPORT_PCRE32
/* Define to any value to enable the 8 bit PCRE library. */
#undef SUPPORT_PCRE8
/* Define to any value to enable JIT support in pcregrep. */
#undef SUPPORT_PCREGREP_JIT
/* Define to any value to enable support for Unicode properties. */
#undef SUPPORT_UCP #undef SUPPORT_UCP
/* Define to enable support for the UTF-8 Unicode encoding. This will work /* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
even in an EBCDIC environment, but it is incompatible with the EBCDIC This will work even in an EBCDIC environment, but it is incompatible with
macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8, but the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
not both at once. */ ASCII/UTF-8/16/32, but not both at once. */
#undef SUPPORT_UTF8 #undef SUPPORT_UTF
/* Valgrind support to find invalid memory reads. */
#undef SUPPORT_VALGRIND
/* Version number of package */ /* Version number of package */
#undef VERSION #undef VERSION
@ -244,5 +335,9 @@ them both to 0; an emulation function will be used. */
/* Define to empty if `const' does not conform to ANSI C. */ /* Define to empty if `const' does not conform to ANSI C. */
#undef const #undef const
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef int64_t
/* Define to `unsigned int' if <sys/types.h> does not define. */ /* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t #undef size_t

259
tools/pcre/config.sub vendored
View File

@ -1,10 +1,10 @@
#! /bin/sh #! /bin/sh
# Configuration validation subroutine script. # Configuration validation subroutine script.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# Free Software Foundation, Inc. # 2011, 2012 Free Software Foundation, Inc.
timestamp='2008-09-08' timestamp='2012-08-18'
# This file is (in principle) common to ALL GNU software. # This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software # The presence of a machine in this file suggests that SOME GNU software
@ -21,9 +21,7 @@ timestamp='2008-09-08'
# GNU General Public License for more details. # GNU General Public License for more details.
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software # along with this program; if not, see <http://www.gnu.org/licenses/>.
# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
# 02110-1301, USA.
# #
# As a special exception to the GNU General Public License, if you # As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a # distribute this file as part of a program that contains a
@ -32,13 +30,16 @@ timestamp='2008-09-08'
# Please send patches to <config-patches@gnu.org>. Submit a context # Please send patches to <config-patches@gnu.org>. Submit a context
# diff and a properly formatted ChangeLog entry. # diff and a properly formatted GNU ChangeLog entry.
# #
# Configuration subroutine to validate and canonicalize a configuration type. # Configuration subroutine to validate and canonicalize a configuration type.
# Supply the specified configuration type as an argument. # Supply the specified configuration type as an argument.
# If it is invalid, we print an error message on stderr and exit with code 1. # If it is invalid, we print an error message on stderr and exit with code 1.
# Otherwise, we print the canonical config type on stdout and succeed. # Otherwise, we print the canonical config type on stdout and succeed.
# You can get the latest version of this script from:
# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
# This file is supposed to be the same for all GNU packages # This file is supposed to be the same for all GNU packages
# and recognize all the CPU types, system types and aliases # and recognize all the CPU types, system types and aliases
# that are meaningful with *any* GNU software. # that are meaningful with *any* GNU software.
@ -72,8 +73,9 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\ version="\
GNU config.sub ($timestamp) GNU config.sub ($timestamp)
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@ -120,12 +122,18 @@ esac
# Here we must recognize all the valid KERNEL-OS combinations. # Here we must recognize all the valid KERNEL-OS combinations.
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in case $maybe_os in
nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
knetbsd*-gnu* | netbsd*-gnu* | \
kopensolaris*-gnu* | \
storm-chaos* | os2-emx* | rtmk-nova*) storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
;; ;;
android-linux)
os=-linux-android
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
;;
*) *)
basic_machine=`echo $1 | sed 's/-[^-]*$//'` basic_machine=`echo $1 | sed 's/-[^-]*$//'`
if [ $basic_machine != $1 ] if [ $basic_machine != $1 ]
@ -148,10 +156,13 @@ case $os in
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-apple | -axis | -knuth | -cray) -apple | -axis | -knuth | -cray | -microblaze)
os= os=
basic_machine=$1 basic_machine=$1
;; ;;
-bluegene*)
os=-cnk
;;
-sim | -cisco | -oki | -wec | -winbond) -sim | -cisco | -oki | -wec | -winbond)
os= os=
basic_machine=$1 basic_machine=$1
@ -166,10 +177,10 @@ case $os in
os=-chorusos os=-chorusos
basic_machine=$1 basic_machine=$1
;; ;;
-chorusrdb) -chorusrdb)
os=-chorusrdb os=-chorusrdb
basic_machine=$1 basic_machine=$1
;; ;;
-hiux*) -hiux*)
os=-hiuxwe2 os=-hiuxwe2
;; ;;
@ -214,6 +225,12 @@ case $os in
-isc*) -isc*)
basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;; ;;
-lynx*178)
os=-lynxos178
;;
-lynx*5)
os=-lynxos5
;;
-lynx*) -lynx*)
os=-lynxos os=-lynxos
;; ;;
@ -238,17 +255,23 @@ case $basic_machine in
# Some are omitted here because they have special meanings below. # Some are omitted here because they have special meanings below.
1750a | 580 \ 1750a | 580 \
| a29k \ | a29k \
| aarch64 | aarch64_be \
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
| am33_2.0 \ | am33_2.0 \
| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
| be32 | be64 \
| bfin \ | bfin \
| c4x | clipper \ | c4x | clipper \
| d10v | d30v | dlx | dsp16xx | dvp \ | d10v | d30v | dlx | dsp16xx | dvp \
| epiphany \
| fido | fr30 | frv \ | fido | fr30 | frv \
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
| hexagon \
| i370 | i860 | i960 | ia64 \ | i370 | i860 | i960 | ia64 \
| ip2k | iq2000 \ | ip2k | iq2000 \
| le32 | le64 \
| lm32 \
| m32c | m32r | m32rle | m68000 | m68k | m88k \ | m32c | m32r | m32rle | m68000 | m68k | m88k \
| maxq | mb | microblaze | mcore | mep | metag \ | maxq | mb | microblaze | mcore | mep | metag \
| mips | mipsbe | mipseb | mipsel | mipsle \ | mips | mipsbe | mipseb | mipsel | mipsle \
@ -270,29 +293,42 @@ case $basic_machine in
| mipsisa64sr71k | mipsisa64sr71kel \ | mipsisa64sr71k | mipsisa64sr71kel \
| mipstx39 | mipstx39el \ | mipstx39 | mipstx39el \
| mn10200 | mn10300 \ | mn10200 | mn10300 \
| moxie \
| mt \ | mt \
| msp430 \ | msp430 \
| nds32 | nds32le | nds32be \
| nios | nios2 \ | nios | nios2 \
| ns16k | ns32k \ | ns16k | ns32k \
| open8 \
| or32 \ | or32 \
| pdp10 | pdp11 | pj | pjl \ | pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ | powerpc | powerpc64 | powerpc64le | powerpcle \
| pyramid \ | pyramid \
| rl78 | rx \
| score \ | score \
| sh | sh[1234] | sh[24]a | sh[24]a*eb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
| sh64 | sh64le \ | sh64 | sh64le \
| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
| sparcv8 | sparcv9 | sparcv9b | sparcv9v \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
| spu | strongarm \ | spu \
| tahoe | thumb | tic4x | tic80 | tron \ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
| v850 | v850e \ | ubicom32 \
| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
| we32k \ | we32k \
| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ | x86 | xc16x | xstormy16 | xtensa \
| z8k | z80) | z8k | z80)
basic_machine=$basic_machine-unknown basic_machine=$basic_machine-unknown
;; ;;
m6811 | m68hc11 | m6812 | m68hc12) c54x)
# Motorola 68HC11/12. basic_machine=tic54x-unknown
;;
c55x)
basic_machine=tic55x-unknown
;;
c6x)
basic_machine=tic6x-unknown
;;
m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
basic_machine=$basic_machine-unknown basic_machine=$basic_machine-unknown
os=-none os=-none
;; ;;
@ -302,6 +338,21 @@ case $basic_machine in
basic_machine=mt-unknown basic_machine=mt-unknown
;; ;;
strongarm | thumb | xscale)
basic_machine=arm-unknown
;;
xgate)
basic_machine=$basic_machine-unknown
os=-none
;;
xscaleeb)
basic_machine=armeb-unknown
;;
xscaleel)
basic_machine=armel-unknown
;;
# We use `pc' rather than `unknown' # We use `pc' rather than `unknown'
# because (1) that's what they normally are, and # because (1) that's what they normally are, and
# (2) the word "unknown" tends to confuse beginning users. # (2) the word "unknown" tends to confuse beginning users.
@ -316,24 +367,29 @@ case $basic_machine in
# Recognize the basic CPU types with company name. # Recognize the basic CPU types with company name.
580-* \ 580-* \
| a29k-* \ | a29k-* \
| aarch64-* | aarch64_be-* \
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
| avr-* | avr32-* \ | avr-* | avr32-* \
| be32-* | be64-* \
| bfin-* | bs2000-* \ | bfin-* | bs2000-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \
| clipper-* | craynv-* | cydra-* \ | clipper-* | craynv-* | cydra-* \
| d10v-* | d30v-* | dlx-* \ | d10v-* | d30v-* | dlx-* \
| elxsi-* \ | elxsi-* \
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
| h8300-* | h8500-* \ | h8300-* | h8500-* \
| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
| hexagon-* \
| i*86-* | i860-* | i960-* | ia64-* \ | i*86-* | i860-* | i960-* | ia64-* \
| ip2k-* | iq2000-* \ | ip2k-* | iq2000-* \
| le32-* | le64-* \
| lm32-* \
| m32c-* | m32r-* | m32rle-* \ | m32c-* | m32r-* | m32rle-* \
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
| mips16-* \ | mips16-* \
| mips64-* | mips64el-* \ | mips64-* | mips64el-* \
@ -355,24 +411,29 @@ case $basic_machine in
| mmix-* \ | mmix-* \
| mt-* \ | mt-* \
| msp430-* \ | msp430-* \
| nds32-* | nds32le-* | nds32be-* \
| nios-* | nios2-* \ | nios-* | nios2-* \
| none-* | np1-* | ns16k-* | ns32k-* \ | none-* | np1-* | ns16k-* | ns32k-* \
| open8-* \
| orion-* \ | orion-* \
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
| pyramid-* \ | pyramid-* \
| romp-* | rs6000-* \ | rl78-* | romp-* | rs6000-* | rx-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]a*eb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
| sparclite-* \ | sparclite-* \
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
| tahoe-* | thumb-* \ | tahoe-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
| tile*-* \
| tron-* \ | tron-* \
| v850-* | v850e-* | vax-* \ | ubicom32-* \
| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
| vax-* \
| we32k-* \ | we32k-* \
| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \
| xstormy16-* | xtensa*-* \ | xstormy16-* | xtensa*-* \
| ymp-* \ | ymp-* \
| z8k-* | z80-*) | z8k-* | z80-*)
@ -397,7 +458,7 @@ case $basic_machine in
basic_machine=a29k-amd basic_machine=a29k-amd
os=-udi os=-udi
;; ;;
abacus) abacus)
basic_machine=abacus-unknown basic_machine=abacus-unknown
;; ;;
adobe68k) adobe68k)
@ -443,6 +504,10 @@ case $basic_machine in
basic_machine=m68k-apollo basic_machine=m68k-apollo
os=-bsd os=-bsd
;; ;;
aros)
basic_machine=i386-pc
os=-aros
;;
aux) aux)
basic_machine=m68k-apple basic_machine=m68k-apple
os=-aux os=-aux
@ -459,11 +524,24 @@ case $basic_machine in
basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
os=-linux os=-linux
;; ;;
bluegene*)
basic_machine=powerpc-ibm
os=-cnk
;;
c54x-*)
basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c55x-*)
basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c6x-*)
basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
c90) c90)
basic_machine=c90-cray basic_machine=c90-cray
os=-unicos os=-unicos
;; ;;
cegcc) cegcc)
basic_machine=arm-unknown basic_machine=arm-unknown
os=-cegcc os=-cegcc
;; ;;
@ -495,7 +573,7 @@ case $basic_machine in
basic_machine=craynv-cray basic_machine=craynv-cray
os=-unicosmp os=-unicosmp
;; ;;
cr16) cr16 | cr16-*)
basic_machine=cr16-unknown basic_machine=cr16-unknown
os=-elf os=-elf
;; ;;
@ -653,7 +731,6 @@ case $basic_machine in
i370-ibm* | ibm*) i370-ibm* | ibm*)
basic_machine=i370-ibm basic_machine=i370-ibm
;; ;;
# I'm not sure what "Sysv32" means. Should this be sysv3.2?
i*86v32) i*86v32)
basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
os=-sysv32 os=-sysv32
@ -711,6 +788,13 @@ case $basic_machine in
basic_machine=ns32k-utek basic_machine=ns32k-utek
os=-sysv os=-sysv
;; ;;
microblaze)
basic_machine=microblaze-xilinx
;;
mingw64)
basic_machine=x86_64-pc
os=-mingw64
;;
mingw32) mingw32)
basic_machine=i386-pc basic_machine=i386-pc
os=-mingw32 os=-mingw32
@ -765,10 +849,18 @@ case $basic_machine in
ms1-*) ms1-*)
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
;; ;;
msys)
basic_machine=i386-pc
os=-msys
;;
mvs) mvs)
basic_machine=i370-ibm basic_machine=i370-ibm
os=-mvs os=-mvs
;; ;;
nacl)
basic_machine=le32-unknown
os=-nacl
;;
ncr3000) ncr3000)
basic_machine=i486-ncr basic_machine=i486-ncr
os=-sysv4 os=-sysv4
@ -833,6 +925,12 @@ case $basic_machine in
np1) np1)
basic_machine=np1-gould basic_machine=np1-gould
;; ;;
neo-tandem)
basic_machine=neo-tandem
;;
nse-tandem)
basic_machine=nse-tandem
;;
nsr-tandem) nsr-tandem)
basic_machine=nsr-tandem basic_machine=nsr-tandem
;; ;;
@ -915,9 +1013,10 @@ case $basic_machine in
;; ;;
power) basic_machine=power-ibm power) basic_machine=power-ibm
;; ;;
ppc) basic_machine=powerpc-unknown ppc | ppcbe) basic_machine=powerpc-unknown
;; ;;
ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ppc-* | ppcbe-*)
basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
;; ;;
ppcle | powerpclittle | ppc-le | powerpc-little) ppcle | powerpclittle | ppc-le | powerpc-little)
basic_machine=powerpcle-unknown basic_machine=powerpcle-unknown
@ -1011,6 +1110,9 @@ case $basic_machine in
basic_machine=i860-stratus basic_machine=i860-stratus
os=-sysv4 os=-sysv4
;; ;;
strongarm-* | thumb-*)
basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
sun2) sun2)
basic_machine=m68000-sun basic_machine=m68000-sun
;; ;;
@ -1067,20 +1169,8 @@ case $basic_machine in
basic_machine=t90-cray basic_machine=t90-cray
os=-unicos os=-unicos
;; ;;
tic54x | c54x*)
basic_machine=tic54x-unknown
os=-coff
;;
tic55x | c55x*)
basic_machine=tic55x-unknown
os=-coff
;;
tic6x | c6x*)
basic_machine=tic6x-unknown
os=-coff
;;
tile*) tile*)
basic_machine=tile-unknown basic_machine=$basic_machine-unknown
os=-linux-gnu os=-linux-gnu
;; ;;
tx39) tx39)
@ -1150,6 +1240,9 @@ case $basic_machine in
xps | xps100) xps | xps100)
basic_machine=xps100-honeywell basic_machine=xps100-honeywell
;; ;;
xscale-* | xscalee[bl]-*)
basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
;;
ymp) ymp)
basic_machine=ymp-cray basic_machine=ymp-cray
os=-unicos os=-unicos
@ -1200,7 +1293,7 @@ case $basic_machine in
we32k) we32k)
basic_machine=we32k-att basic_machine=we32k-att
;; ;;
sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
basic_machine=sh-unknown basic_machine=sh-unknown
;; ;;
sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
@ -1247,9 +1340,12 @@ esac
if [ x"$os" != x"" ] if [ x"$os" != x"" ]
then then
case $os in case $os in
# First match some system type aliases # First match some system type aliases
# that might get confused with valid system types. # that might get confused with valid system types.
# -solaris* is a basic system type, with this one exception. # -solaris* is a basic system type, with this one exception.
-auroraux)
os=-auroraux
;;
-solaris1 | -solaris1.*) -solaris1 | -solaris1.*)
os=`echo $os | sed -e 's|solaris1|sunos4|'` os=`echo $os | sed -e 's|solaris1|sunos4|'`
;; ;;
@ -1270,29 +1366,31 @@ case $os in
# Each alternative MUST END IN A *, to match a version number. # Each alternative MUST END IN A *, to match a version number.
# -sysv* is not here because it comes later, after sysvr4. # -sysv* is not here because it comes later, after sysvr4.
-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
| -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
| -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
| -sym* | -kopensolaris* \
| -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
| -aos* \ | -aos* | -aros* \
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
| -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
| -openbsd* | -solidbsd* \ | -bitrig* | -openbsd* | -solidbsd* \
| -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \ | -chorusos* | -chorusrdb* | -cegcc* \
| -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
| -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-musl* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* \ | -uxpv* | -beos* | -mpeix* | -udk* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
| -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -irx* \
| -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
| -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -irx*) | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
# Remember, each alternative MUST END IN *, to match a version number. # Remember, each alternative MUST END IN *, to match a version number.
;; ;;
-qnx*) -qnx*)
@ -1331,7 +1429,7 @@ case $os in
-opened*) -opened*)
os=-openedition os=-openedition
;; ;;
-os400*) -os400*)
os=-os400 os=-os400
;; ;;
-wince*) -wince*)
@ -1380,7 +1478,7 @@ case $os in
-sinix*) -sinix*)
os=-sysv4 os=-sysv4
;; ;;
-tpf*) -tpf*)
os=-tpf os=-tpf
;; ;;
-triton*) -triton*)
@ -1425,6 +1523,8 @@ case $os in
-dicos*) -dicos*)
os=-dicos os=-dicos
;; ;;
-nacl*)
;;
-none) -none)
;; ;;
*) *)
@ -1447,10 +1547,10 @@ else
# system, and we'll never get to this point. # system, and we'll never get to this point.
case $basic_machine in case $basic_machine in
score-*) score-*)
os=-elf os=-elf
;; ;;
spu-*) spu-*)
os=-elf os=-elf
;; ;;
*-acorn) *-acorn)
@ -1462,8 +1562,20 @@ case $basic_machine in
arm*-semi) arm*-semi)
os=-aout os=-aout
;; ;;
c4x-* | tic4x-*) c4x-* | tic4x-*)
os=-coff os=-coff
;;
hexagon-*)
os=-elf
;;
tic54x-*)
os=-coff
;;
tic55x-*)
os=-coff
;;
tic6x-*)
os=-coff
;; ;;
# This must come before the *-dec entry. # This must come before the *-dec entry.
pdp10-*) pdp10-*)
@ -1483,14 +1595,11 @@ case $basic_machine in
;; ;;
m68000-sun) m68000-sun)
os=-sunos3 os=-sunos3
# This also exists in the configure program, but was not the
# default.
# os=-sunos4
;; ;;
m68*-cisco) m68*-cisco)
os=-aout os=-aout
;; ;;
mep-*) mep-*)
os=-elf os=-elf
;; ;;
mips*-cisco) mips*-cisco)
@ -1517,7 +1626,7 @@ case $basic_machine in
*-ibm) *-ibm)
os=-aix os=-aix
;; ;;
*-knuth) *-knuth)
os=-mmixware os=-mmixware
;; ;;
*-wec) *-wec)
@ -1622,7 +1731,7 @@ case $basic_machine in
-sunos*) -sunos*)
vendor=sun vendor=sun
;; ;;
-aix*) -cnk*|-aix*)
vendor=ibm vendor=ibm
;; ;;
-beos*) -beos*)

27885
tools/pcre/configure vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,27 +1,38 @@
dnl Process this file with autoconf to produce a configure script. dnl Process this file with autoconf to produce a configure script.
dnl NOTE FOR MAINTAINERS: Do not use major or minor version numbers with dnl NOTE FOR MAINTAINERS: Do not use minor version numbers 08 or 09 because
dnl leading zeros, because they may be treated as octal constants. The dnl the leading zeros may cause them to be treated as invalid octal constants
dnl PCRE_PRERELEASE feature is for identifying release candidates. It might dnl if a PCRE user writes code that uses PCRE_MINOR as a number. There is now
dnl be defined as -RC2, for example. For real releases, it should be defined dnl a check further down that throws an error if 08 or 09 are used.
dnl empty.
m4_define(pcre_major, [7]) dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
m4_define(pcre_minor, [9]) dnl be defined as -RC2, for example. For real releases, it should be empty.
m4_define(pcre_major, [8])
m4_define(pcre_minor, [32])
m4_define(pcre_prerelease, []) m4_define(pcre_prerelease, [])
m4_define(pcre_date, [2009-04-11]) m4_define(pcre_date, [2012-11-30])
# NOTE: The CMakeLists.txt file searches for the above variables in the first
# 50 lines of this file. Please update that if the variables above are moved.
# Libtool shared library interface versions (current:revision:age) # Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0]) m4_define(libpcre_version, [3:0:2])
m4_define(libpcreposix_version, [0:0:0]) m4_define(libpcre16_version, [2:0:2])
m4_define(libpcre32_version, [0:0:0])
m4_define(libpcreposix_version, [0:1:0])
m4_define(libpcrecpp_version, [0:0:0]) m4_define(libpcrecpp_version, [0:0:0])
AC_PREREQ(2.57) AC_PREREQ(2.57)
AC_INIT(PCRE, pcre_major.pcre_minor[]pcre_prerelease, , pcre) AC_INIT(PCRE, pcre_major.pcre_minor[]pcre_prerelease, , pcre)
AC_CONFIG_SRCDIR([pcre.h.in]) AC_CONFIG_SRCDIR([pcre.h.in])
AM_INIT_AUTOMAKE([dist-bzip2 dist-zip]) AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_CONFIG_HEADERS(config.h) AC_CONFIG_HEADERS(config.h)
# This was added at the suggestion of libtoolize (03-Jan-10)
AC_CONFIG_MACRO_DIR([m4])
# The default CFLAGS and CXXFLAGS in Autoconf are "-g -O2" for gcc and just # The default CFLAGS and CXXFLAGS in Autoconf are "-g -O2" for gcc and just
# "-g" for any other compiler. There doesn't seem to be a standard way of # "-g" for any other compiler. There doesn't seem to be a standard way of
# getting rid of the -g (which I don't think is needed for a production # getting rid of the -g (which I don't think is needed for a production
@ -37,6 +48,7 @@ remember_set_CXXFLAGS="$CXXFLAGS"
AC_PROG_CC AC_PROG_CC
AC_PROG_CXX AC_PROG_CXX
AM_PROG_CC_C_O
if test "x$remember_set_CFLAGS" = "x" if test "x$remember_set_CFLAGS" = "x"
then then
@ -63,19 +75,37 @@ fi
# AC_PROG_CXX will return "g++" even if no c++ compiler is installed. # AC_PROG_CXX will return "g++" even if no c++ compiler is installed.
# Check for that case, and just disable c++ code if g++ doesn't run. # Check for that case, and just disable c++ code if g++ doesn't run.
AC_LANG_PUSH(C++) AC_LANG_PUSH(C++)
AC_COMPILE_IFELSE(AC_LANG_PROGRAM([],[]),, CXX=""; CXXCP=""; CXXFLAGS="") AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],, CXX=""; CXXCP=""; CXXFLAGS="")
AC_LANG_POP AC_LANG_POP
# Check for a 64-bit integer type
AC_TYPE_INT64_T
AC_PROG_INSTALL AC_PROG_INSTALL
AC_LIBTOOL_WIN32_DLL AC_LIBTOOL_WIN32_DLL
AC_PROG_LIBTOOL LT_INIT
AC_PROG_LN_S AC_PROG_LN_S
# Check for GCC visibility feature
PCRE_VISIBILITY
# Versioning
PCRE_MAJOR="pcre_major" PCRE_MAJOR="pcre_major"
PCRE_MINOR="pcre_minor" PCRE_MINOR="pcre_minor"
PCRE_PRERELEASE="pcre_prerelease" PCRE_PRERELEASE="pcre_prerelease"
PCRE_DATE="pcre_date" PCRE_DATE="pcre_date"
if test "$PCRE_MINOR" = "08" -o "$PCRE_MINOR" = "09"
then
echo "***"
echo "*** Minor version number $PCRE_MINOR must not be used. ***"
echo "*** Use only 01 to 07 or 10 onwards, to avoid octal issues. ***"
echo "***"
exit 1
fi
AC_SUBST(PCRE_MAJOR) AC_SUBST(PCRE_MAJOR)
AC_SUBST(PCRE_MINOR) AC_SUBST(PCRE_MINOR)
AC_SUBST(PCRE_PRERELEASE) AC_SUBST(PCRE_PRERELEASE)
@ -87,11 +117,46 @@ then
htmldir='${docdir}/html' htmldir='${docdir}/html'
fi fi
# Handle --disable-cpp # Handle --disable-pcre8 (enabled by default)
AC_ARG_ENABLE(pcre8,
AS_HELP_STRING([--disable-pcre8],
[disable 8 bit character support]),
, enable_pcre8=unset)
AC_SUBST(enable_pcre8)
# Handle --enable-pcre16 (disabled by default)
AC_ARG_ENABLE(pcre16,
AS_HELP_STRING([--enable-pcre16],
[enable 16 bit character support]),
, enable_pcre16=unset)
AC_SUBST(enable_pcre16)
# Handle --enable-pcre32 (disabled by default)
AC_ARG_ENABLE(pcre32,
AS_HELP_STRING([--enable-pcre32],
[enable 32 bit character support]),
, enable_pcre32=unset)
AC_SUBST(enable_pcre32)
# Handle --disable-cpp. The substitution of enable_cpp is needed for use in
# pcre-config.
AC_ARG_ENABLE(cpp, AC_ARG_ENABLE(cpp,
AS_HELP_STRING([--disable-cpp], AS_HELP_STRING([--disable-cpp],
[disable C++ support]), [disable C++ support]),
, enable_cpp=yes) , enable_cpp=unset)
AC_SUBST(enable_cpp)
# Handle --enable-jit (disabled by default)
AC_ARG_ENABLE(jit,
AS_HELP_STRING([--enable-jit],
[enable Just-In-Time compiling support]),
, enable_jit=no)
# Handle --disable-pcregrep-jit (enabled by default)
AC_ARG_ENABLE(pcregrep-jit,
AS_HELP_STRING([--disable-pcregrep-jit],
[disable JIT support in pcregrep]),
, enable_pcregrep_jit=yes)
# Handle --enable-rebuild-chartables # Handle --enable-rebuild-chartables
AC_ARG_ENABLE(rebuild-chartables, AC_ARG_ENABLE(rebuild-chartables,
@ -102,22 +167,22 @@ AC_ARG_ENABLE(rebuild-chartables,
# Handle --enable-utf8 (disabled by default) # Handle --enable-utf8 (disabled by default)
AC_ARG_ENABLE(utf8, AC_ARG_ENABLE(utf8,
AS_HELP_STRING([--enable-utf8], AS_HELP_STRING([--enable-utf8],
[enable UTF-8 support (incompatible with --enable-ebcdic)]), [another name for --enable-utf. Kept only for compatibility reasons]),
, enable_utf8=unset) , enable_utf8=unset)
# Handle --enable-utf (disabled by default)
AC_ARG_ENABLE(utf,
AS_HELP_STRING([--enable-utf],
[enable UTF-8/16/32 support (incompatible with --enable-ebcdic)]),
, enable_utf=unset)
# Handle --enable-unicode-properties # Handle --enable-unicode-properties
AC_ARG_ENABLE(unicode-properties, AC_ARG_ENABLE(unicode-properties,
AS_HELP_STRING([--enable-unicode-properties], AS_HELP_STRING([--enable-unicode-properties],
[enable Unicode properties support (implies --enable-utf8)]), [enable Unicode properties support (implies --enable-utf)]),
, enable_unicode_properties=no) , enable_unicode_properties=no)
# Handle --enable-newline=NL # Handle newline options
dnl AC_ARG_ENABLE(newline,
dnl AS_HELP_STRING([--enable-newline=NL],
dnl [use NL as newline (lf, cr, crlf, anycrlf, any; default=lf)]),
dnl , enable_newline=lf)
# Separate newline options
ac_pcre_newline=lf ac_pcre_newline=lf
AC_ARG_ENABLE(newline-is-cr, AC_ARG_ENABLE(newline-is-cr,
AS_HELP_STRING([--enable-newline-is-cr], AS_HELP_STRING([--enable-newline-is-cr],
@ -150,9 +215,15 @@ AC_ARG_ENABLE(bsr-anycrlf,
# Handle --enable-ebcdic # Handle --enable-ebcdic
AC_ARG_ENABLE(ebcdic, AC_ARG_ENABLE(ebcdic,
AS_HELP_STRING([--enable-ebcdic], AS_HELP_STRING([--enable-ebcdic],
[assume EBCDIC coding rather than ASCII; incompatible with --enable-utf8; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]), [assume EBCDIC coding rather than ASCII; incompatible with --enable-utf; use only in (uncommon) EBCDIC environments; it implies --enable-rebuild-chartables]),
, enable_ebcdic=no) , enable_ebcdic=no)
# Handle --enable-ebcdic-nl25
AC_ARG_ENABLE(ebcdic-nl25,
AS_HELP_STRING([--enable-ebcdic-nl25],
[set EBCDIC code for NL to 0x25 instead of 0x15; it implies --enable-ebcdic]),
, enable_ebcdic_nl25=no)
# Handle --disable-stack-for-recursion # Handle --disable-stack-for-recursion
AC_ARG_ENABLE(stack-for-recursion, AC_ARG_ENABLE(stack-for-recursion,
AS_HELP_STRING([--disable-stack-for-recursion], AS_HELP_STRING([--disable-stack-for-recursion],
@ -171,6 +242,18 @@ AC_ARG_ENABLE(pcregrep-libbz2,
[link pcregrep with libbz2 to handle .bz2 files]), [link pcregrep with libbz2 to handle .bz2 files]),
, enable_pcregrep_libbz2=no) , enable_pcregrep_libbz2=no)
# Handle --with-pcregrep-bufsize=N
AC_ARG_WITH(pcregrep-bufsize,
AS_HELP_STRING([--with-pcregrep-bufsize=N],
[pcregrep buffer size (default=20480)]),
, with_pcregrep_bufsize=20480)
# Handle --enable-pcretest-libedit
AC_ARG_ENABLE(pcretest-libedit,
AS_HELP_STRING([--enable-pcretest-libedit],
[link pcretest with libedit]),
, enable_pcretest_libedit=no)
# Handle --enable-pcretest-libreadline # Handle --enable-pcretest-libreadline
AC_ARG_ENABLE(pcretest-libreadline, AC_ARG_ENABLE(pcretest-libreadline,
AS_HELP_STRING([--enable-pcretest-libreadline], AS_HELP_STRING([--enable-pcretest-libreadline],
@ -208,38 +291,87 @@ AC_ARG_WITH(match-limit-recursion,
[default limit on internal recursion (default=MATCH_LIMIT)]), [default limit on internal recursion (default=MATCH_LIMIT)]),
, with_match_limit_recursion=MATCH_LIMIT) , with_match_limit_recursion=MATCH_LIMIT)
# Make sure that if enable_unicode_properties was set, that UTF-8 support # Handle --enable-valgrind
# is enabled. AC_ARG_ENABLE(valgrind,
# AS_HELP_STRING([--enable-valgrind],
[valgrind support]),
, enable_valgrind=no)
# Enable code coverage reports using gcov
AC_ARG_ENABLE(coverage,
AS_HELP_STRING([--enable-coverage],
[enable code coverage reports using gcov]),
, enable_coverage=no)
# Copy enable_utf8 value to enable_utf for compatibility reasons
if test "x$enable_utf8" != "xunset"
then
if test "x$enable_utf" != "xunset"
then
AC_MSG_ERROR([--enable/disable-utf8 is kept only for compatibility reasons and its value is copied to --enable/disable-utf. Newer code must use --enable/disable-utf alone.])
fi
enable_utf=$enable_utf8
fi
# Set the default value for pcre8
if test "x$enable_pcre8" = "xunset"
then
enable_pcre8=yes
fi
# Set the default value for pcre16
if test "x$enable_pcre16" = "xunset"
then
enable_pcre16=no
fi
# Set the default value for pcre32
if test "x$enable_pcre32" = "xunset"
then
enable_pcre32=no
fi
# Make sure enable_pcre8 or enable_pcre16 was set
if test "x$enable_pcre8$enable_pcre16$enable_pcre32" = "xnonono"
then
AC_MSG_ERROR([At least one of 8, 16 or 32 bit pcre library must be enabled])
fi
# Make sure that if enable_unicode_properties was set, that UTF support is enabled.
if test "x$enable_unicode_properties" = "xyes" if test "x$enable_unicode_properties" = "xyes"
then then
if test "x$enable_utf8" = "xno" if test "x$enable_utf" = "xno"
then then
AC_MSG_ERROR([support for Unicode properties requires UTF-8 support]) AC_MSG_ERROR([support for Unicode properties requires UTF-8/16/32 support])
fi fi
enable_utf8=yes enable_utf=yes
fi fi
if test "x$enable_utf8" = "xunset" # enable_utf is disabled by default.
if test "x$enable_utf" = "xunset"
then then
enable_utf8=no enable_utf=no
fi fi
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled. # enable_cpp copies the value of enable_pcre8 by default
# Also check that UTF-8 support is not requested, because PCRE cannot handle if test "x$enable_cpp" = "xunset"
# EBCDIC and UTF-8 in the same build. To do so it would need to use different
# character constants depending on the mode.
#
if test "x$enable_ebcdic" = "xyes"
then then
enable_rebuild_chartables=yes enable_cpp=$enable_pcre8
if test "x$enable_utf8" = "xyes" fi
# Make sure that if enable_cpp was set, that enable_pcre8 support is enabled
if test "x$enable_cpp" = "xyes"
then
if test "x$enable_pcre8" = "xno"
then then
AC_MSG_ERROR([support for EBCDIC and UTF-8 cannot be enabled at the same time]) AC_MSG_ERROR([C++ library requires pcre library with 8 bit characters])
fi fi
fi fi
# Convert the newline identifier into the appropriate integer value. # Convert the newline identifier into the appropriate integer value. The first
# three are ASCII values 0x0a, 0x0d, and 0x0d0a, but if EBCDIC is enabled, they
# are changed below.
case "$enable_newline" in case "$enable_newline" in
lf) ac_pcre_newline_value=10 ;; lf) ac_pcre_newline_value=10 ;;
cr) ac_pcre_newline_value=13 ;; cr) ac_pcre_newline_value=13 ;;
@ -251,6 +383,37 @@ case "$enable_newline" in
;; ;;
esac esac
# --enable-ebcdic-nl25 implies --enable-ebcdic
if test "x$enable_ebcdic_nl25" = "xyes"; then
enable_ebcdic=yes
fi
# Make sure that if enable_ebcdic is set, rebuild_chartables is also enabled,
# and the newline value is adjusted appropriately (CR is still 13, but LF is
# 21 or 37). Also check that UTF support is not requested, because PCRE cannot
# handle EBCDIC and UTF in the same build. To do so it would need to use
# different character constants depending on the mode.
#
if test "x$enable_ebcdic" = "xyes"; then
enable_rebuild_chartables=yes
if test "x$enable_utf" = "xyes"; then
AC_MSG_ERROR([support for EBCDIC and UTF-8/16/32 cannot be enabled at the same time])
fi
if test "x$enable_ebcdic_nl25" = "xno"; then
case "$ac_pcre_newline_value" in
10) ac_pcre_newline_value=21 ;;
3338) ac_pcre_newline_value=3349 ;;
esac
else
case "$ac_pcre_newline_value" in
10) ac_pcre_newline_value=37 ;;
3338) ac_pcre_newline_value=3365 ;;
esac
fi
fi
# Check argument to --with-link-size # Check argument to --with-link-size
case "$with_link_size" in case "$with_link_size" in
2|3|4) ;; 2|3|4) ;;
@ -260,16 +423,17 @@ case "$with_link_size" in
esac esac
AH_TOP([ AH_TOP([
/* On Unix-like systems config.h.in is converted by "configure" into config.h. /* PCRE is written in Standard C, but there are a few non-standard things it
Some other environments also support the use of "configure". PCRE is written in can cope with, allowing it to run on SunOS4 and other "close to standard"
Standard C, but there are a few non-standard things it can cope with, allowing systems.
it to run on SunOS4 and other "close to standard" systems.
If you are going to build PCRE "by hand" on a system without "configure" you In environments that support the facilities, config.h.in is converted by
should copy the distributed config.h.generic to config.h, and then set up the "configure", or config-cmake.h.in is converted by CMake, into config.h. If you
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to are going to build PCRE "by hand" without using "configure" or CMake, you
all of your compile commands, so that config.h is included at the start of should copy the distributed config.h.generic to config.h, and then edit the
every source. macro definitions to be the way you need them. You must then add
-DHAVE_CONFIG_H to all of your compile commands, so that config.h is included
at the start of every source.
Alternatively, you can avoid editing by using -D on the compiler command line Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H. to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
@ -285,6 +449,11 @@ AC_CHECK_HEADERS(limits.h sys/types.h sys/stat.h dirent.h windows.h)
# The files below are C++ header files. # The files below are C++ header files.
pcre_have_type_traits="0" pcre_have_type_traits="0"
pcre_have_bits_type_traits="0" pcre_have_bits_type_traits="0"
if test "x$enable_cpp" = "xyes" -a -z "$CXX"; then
AC_MSG_ERROR([You need a C++ compiler for C++ support.])
fi
if test "x$enable_cpp" = "xyes" -a -n "$CXX" if test "x$enable_cpp" = "xyes" -a -n "$CXX"
then then
AC_LANG_PUSH(C++) AC_LANG_PUSH(C++)
@ -301,11 +470,11 @@ for flag in "-alias,__ZN7pcrecpp2RE6no_argE,__ZN7pcrecpp6no_argE" \
LDFLAGS="$OLD_LDFLAGS -Wl,$flag" LDFLAGS="$OLD_LDFLAGS -Wl,$flag"
# We try to run the linker with this new ld flag. If the link fails, # We try to run the linker with this new ld flag. If the link fails,
# we give up and remove the new flag from LDFLAGS. # we give up and remove the new flag from LDFLAGS.
AC_LINK_IFELSE(AC_LANG_PROGRAM([namespace pcrecpp { AC_LINK_IFELSE([AC_LANG_PROGRAM([namespace pcrecpp {
class RE { static int no_arg; }; class RE { static int no_arg; };
int RE::no_arg; int RE::no_arg;
}], }],
[]), [])],
[AC_MSG_RESULT([yes]); [AC_MSG_RESULT([yes]);
EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS -Wl,$flag"; EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS -Wl,$flag";
break;], break;],
@ -323,6 +492,49 @@ AC_CHECK_HEADERS(bits/type_traits.h, [pcre_have_bits_type_traits="1"],
AC_CHECK_HEADERS(type_traits.h, [pcre_have_type_traits="1"], AC_CHECK_HEADERS(type_traits.h, [pcre_have_type_traits="1"],
[pcre_have_type_traits="0"]) [pcre_have_type_traits="0"])
# (This isn't c++-specific, but is only used in pcrecpp.cc, so try this
# in a c++ context. This matters becuase strtoimax is C99 and may not
# be supported by the C++ compiler.)
# Figure out how to create a longlong from a string: strtoll and
# equiv. It's not enough to call AC_CHECK_FUNCS: hpux has a
# strtoll, for instance, but it only takes 2 args instead of 3!
# We have to call AH_TEMPLATE since AC_DEFINE_UNQUOTED below is complex.
AH_TEMPLATE(HAVE_STRTOQ, [Define to 1 if you have `strtoq'.])
AH_TEMPLATE(HAVE_STRTOLL, [Define to 1 if you have `strtoll'.])
AH_TEMPLATE(HAVE__STRTOI64, [Define to 1 if you have `_strtoi64'.])
AH_TEMPLATE(HAVE_STRTOIMAX, [Define to 1 if you have `strtoimax'.])
have_strto_fn=0
for fn in strtoq strtoll _strtoi64 strtoimax; do
AC_MSG_CHECKING([for $fn])
if test "$fn" = strtoimax; then
include=stdint.h
else
include=stdlib.h
fi
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include <$include>],
[char* e; return $fn("100", &e, 10)])],
[AC_MSG_RESULT(yes)
AC_DEFINE_UNQUOTED(HAVE_`echo $fn | tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ`, 1,
[Define to 1 if you have `$fn'.])
have_strto_fn=1
break],
[AC_MSG_RESULT(no)])
done
if test "$have_strto_fn" = 1; then
AC_CHECK_TYPES([long long],
[pcre_have_long_long="1"],
[pcre_have_long_long="0"])
AC_CHECK_TYPES([unsigned long long],
[pcre_have_ulong_long="1"],
[pcre_have_ulong_long="0"])
else
pcre_have_long_long="0"
pcre_have_ulong_long="0"
fi
AC_SUBST(pcre_have_long_long)
AC_SUBST(pcre_have_ulong_long)
AC_LANG_POP AC_LANG_POP
fi fi
# Using AC_SUBST eliminates the need to include config.h in a public .h file # Using AC_SUBST eliminates the need to include config.h in a public .h file
@ -330,32 +542,20 @@ AC_SUBST(pcre_have_type_traits)
AC_SUBST(pcre_have_bits_type_traits) AC_SUBST(pcre_have_bits_type_traits)
# Conditional compilation # Conditional compilation
AM_CONDITIONAL(WITH_PCRE8, test "x$enable_pcre8" = "xyes")
AM_CONDITIONAL(WITH_PCRE16, test "x$enable_pcre16" = "xyes")
AM_CONDITIONAL(WITH_PCRE32, test "x$enable_pcre32" = "xyes")
AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes") AM_CONDITIONAL(WITH_PCRE_CPP, test "x$enable_cpp" = "xyes")
AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes") AM_CONDITIONAL(WITH_REBUILD_CHARTABLES, test "x$enable_rebuild_chartables" = "xyes")
AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
AM_CONDITIONAL(WITH_UTF, test "x$enable_utf" = "xyes")
AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
# Checks for typedefs, structures, and compiler characteristics. # Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST AC_C_CONST
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
pcre_have_strotolonglong=0
AC_CHECK_FUNCS(strtoq strtoll _strtoi64, [pcre_have_strotolonglong="1"; break])
# If we can't convert a string to a long long, pretend we don't even
# have a long long.
if test $pcre_have_strotolonglong = "0"; then
pcre_have_long_long="0"
pcre_have_ulong_long="0"
else
AC_CHECK_TYPES([long long],
[pcre_have_long_long="1"],
[pcre_have_long_long="0"])
AC_CHECK_TYPES([unsigned long long],
[pcre_have_ulong_long="1"],
[pcre_have_ulong_long="0"])
fi
AC_SUBST(pcre_have_long_long)
AC_SUBST(pcre_have_ulong_long)
# Checks for library functions. # Checks for library functions.
AC_CHECK_FUNCS(bcopy memmove strerror) AC_CHECK_FUNCS(bcopy memmove strerror)
@ -365,84 +565,196 @@ AC_CHECK_FUNCS(bcopy memmove strerror)
AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1]) AC_CHECK_HEADERS([zlib.h], [HAVE_ZLIB_H=1])
AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1]) AC_CHECK_LIB([z], [gzopen], [HAVE_LIBZ=1])
# Check for the availability of libbz2 # Check for the availability of libbz2. Originally we just used AC_CHECK_LIB,
# as for libz. However, this had the following problem, diagnosed and fixed by
# a user:
#
# - libbz2 uses the Pascal calling convention (WINAPI) for the functions
# under Win32.
# - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
# therefore missing the function definition.
# - The compiler thus generates a "C" signature for the test function.
# - The linker fails to find the "C" function.
# - PCRE fails to configure if asked to do so against libbz2.
#
# Solution:
#
# - Replace the AC_CHECK_LIB test with a custom test.
AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1]) AC_CHECK_HEADERS([bzlib.h], [HAVE_BZLIB_H=1])
AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1]) # Original test
# AC_CHECK_LIB([bz2], [BZ2_bzopen], [HAVE_LIBBZ2=1])
#
# Custom test follows
AC_MSG_CHECKING([for libbz2])
OLD_LIBS="$LIBS"
LIBS="$LIBS -lbz2"
AC_LINK_IFELSE([AC_LANG_PROGRAM([[
#ifdef HAVE_BZLIB_H
#include <bzlib.h>
#endif]],
[[return (int)BZ2_bzopen("conftest", "rb");]])],
[AC_MSG_RESULT([yes]);HAVE_LIBBZ2=1; break;],
AC_MSG_RESULT([no]))
LIBS="$OLD_LIBS"
# Check for the availabiity of libreadline # Check for the availabiity of libreadline
AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1]) if test "$enable_pcretest_libreadline" = "yes"; then
AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1]) AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_H=1])
AC_CHECK_LIB([readline], [readline], [HAVE_LIB_READLINE=1]) AC_CHECK_HEADERS([readline/history.h], [HAVE_HISTORY_H=1])
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lreadline"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltinfo"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lcurses"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncurses"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-lncursesw"],
[unset ac_cv_lib_readline_readline;
AC_CHECK_LIB([readline], [readline], [LIBREADLINE="-ltermcap"],
[LIBREADLINE=""],
[-ltermcap])],
[-lncursesw])],
[-lncurses])],
[-lcurses])],
[-ltinfo])])
AC_SUBST(LIBREADLINE)
if test -n "$LIBREADLINE"; then
if test "$LIBREADLINE" != "-lreadline"; then
echo "-lreadline needs $LIBREADLINE"
LIBREADLINE="-lreadline $LIBREADLINE"
fi
fi
fi
# Check for the availability of libedit. Different distributions put its
# headers in different places. Try to cover the most common ones.
if test "$enable_pcretest_libedit" = "yes"; then
AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
[AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
[AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
fi
# This facilitates -ansi builds under Linux # This facilitates -ansi builds under Linux
dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc]) dnl AC_DEFINE([_GNU_SOURCE], [], [Enable GNU extensions in glibc])
PCRE_STATIC_CFLAG=""
if test "x$enable_shared" = "xno" ; then if test "x$enable_shared" = "xno" ; then
AC_DEFINE([PCRE_STATIC], [1], [ AC_DEFINE([PCRE_STATIC], [1], [
Define if linking statically (TODO: make nice with Libtool)]) Define to any value if linking statically (TODO: make nice with Libtool)])
PCRE_STATIC_CFLAG="-DPCRE_STATIC"
fi fi
AC_SUBST(PCRE_STATIC_CFLAG)
# Here is where pcre specific defines are handled # Here is where pcre specific defines are handled
if test "$enable_utf8" = "yes"; then if test "$enable_pcre8" = "yes"; then
AC_DEFINE([SUPPORT_UTF8], [], [ AC_DEFINE([SUPPORT_PCRE8], [], [
Define to enable support for the UTF-8 Unicode encoding. This will Define to any value to enable the 8 bit PCRE library.])
work even in an EBCDIC environment, but it is incompatible with fi
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code
*or* ASCII/UTF-8, but not both at once.]) if test "$enable_pcre16" = "yes"; then
AC_DEFINE([SUPPORT_PCRE16], [], [
Define to any value to enable the 16 bit PCRE library.])
fi
if test "$enable_pcre32" = "yes"; then
AC_DEFINE([SUPPORT_PCRE32], [], [
Define to any value to enable the 32 bit PCRE library.])
fi
if test "$enable_jit" = "yes"; then
AX_PTHREAD([], [AC_MSG_ERROR([JIT support requires pthreads])])
CC="$PTHREAD_CC"
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
LIBS="$PTHREAD_LIBS $LIBS"
AC_DEFINE([SUPPORT_JIT], [], [
Define to any value to enable support for Just-In-Time compiling.])
else
enable_pcregrep_jit="no"
fi
if test "$enable_pcregrep_jit" = "yes"; then
AC_DEFINE([SUPPORT_PCREGREP_JIT], [], [
Define to any value to enable JIT support in pcregrep.])
fi
if test "$enable_utf" = "yes"; then
AC_DEFINE([SUPPORT_UTF], [], [
Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
This will work even in an EBCDIC environment, but it is incompatible
with the EBCDIC macro. That is, PCRE can support *either* EBCDIC
code *or* ASCII/UTF-8/16/32, but not both at once.])
fi fi
if test "$enable_unicode_properties" = "yes"; then if test "$enable_unicode_properties" = "yes"; then
AC_DEFINE([SUPPORT_UCP], [], [ AC_DEFINE([SUPPORT_UCP], [], [
Define to enable support for Unicode properties]) Define to any value to enable support for Unicode properties.])
fi fi
if test "$enable_stack_for_recursion" = "no"; then if test "$enable_stack_for_recursion" = "no"; then
AC_DEFINE([NO_RECURSE], [], [ AC_DEFINE([NO_RECURSE], [], [
PCRE uses recursive function calls to handle backtracking while PCRE uses recursive function calls to handle backtracking while
matching. This can sometimes be a problem on systems that have matching. This can sometimes be a problem on systems that have
stacks of limited size. Define NO_RECURSE to get a version that stacks of limited size. Define NO_RECURSE to any value to get a
doesn't use recursion in the match() function; instead it creates version that doesn't use recursion in the match() function; instead
its own stack by steam using pcre_recurse_malloc() to obtain memory it creates its own stack by steam using pcre_recurse_malloc() to obtain
from the heap. For more detail, see the comments and other stuff memory from the heap. For more detail, see the comments and other stuff
just above the match() function. On systems that support it, just above the match() function.])
"configure" can be used to set this in the Makefile
(use --disable-stack-for-recursion).])
fi fi
if test "$enable_pcregrep_libz" = "yes"; then if test "$enable_pcregrep_libz" = "yes"; then
AC_DEFINE([SUPPORT_LIBZ], [], [ AC_DEFINE([SUPPORT_LIBZ], [], [
Define to allow pcregrep to be linked with libz, so that it is Define to any value to allow pcregrep to be linked with libz, so that it is
able to handle .gz files.]) able to handle .gz files.])
fi fi
if test "$enable_pcregrep_libbz2" = "yes"; then if test "$enable_pcregrep_libbz2" = "yes"; then
AC_DEFINE([SUPPORT_LIBBZ2], [], [ AC_DEFINE([SUPPORT_LIBBZ2], [], [
Define to allow pcregrep to be linked with libbz2, so that it is Define to any value to allow pcregrep to be linked with libbz2, so that it
able to handle .bz2 files.]) is able to handle .bz2 files.])
fi fi
if test "$enable_pcretest_libreadline" = "yes"; then if test $with_pcregrep_bufsize -lt 8192 ; then
with_pcregrep_bufsize="8192"
fi
AC_DEFINE_UNQUOTED([PCREGREP_BUFSIZE], [$with_pcregrep_bufsize], [
The value of PCREGREP_BUFSIZE determines the size of buffer used by pcregrep
to hold parts of the file it is searching. This is also the minimum value.
The actual amount of memory used by pcregrep is three times this number,
because it allows for the buffering of "before" and "after" lines.])
if test "$enable_pcretest_libedit" = "yes"; then
AC_DEFINE([SUPPORT_LIBEDIT], [], [
Define to any value to allow pcretest to be linked with libedit.])
LIBREADLINE="$LIBEDIT"
elif test "$enable_pcretest_libreadline" = "yes"; then
AC_DEFINE([SUPPORT_LIBREADLINE], [], [ AC_DEFINE([SUPPORT_LIBREADLINE], [], [
Define to allow pcretest to be linked with libreadline.]) Define to any value to allow pcretest to be linked with libreadline.])
fi fi
AC_DEFINE_UNQUOTED([NEWLINE], [$ac_pcre_newline_value], [ AC_DEFINE_UNQUOTED([NEWLINE], [$ac_pcre_newline_value], [
The value of NEWLINE determines the newline character sequence. On The value of NEWLINE determines the default newline character sequence. PCRE
systems that support it, "configure" can be used to override the client programs can override this by selecting other values at run time. In
default, which is 10. The possible values are 10 (LF), 13 (CR), ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 (CRLF); in
3338 (CRLF), -1 (ANY), or -2 (ANYCRLF).]) EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or 3349 or 3365
(CRLF) because there are two alternative codepoints (0x15 and 0x25) that are
used as the NL line terminator that is equivalent to ASCII LF. In both ASCII
and EBCDIC environments the value can also be -1 (ANY), or -2 (ANYCRLF).])
if test "$enable_bsr_anycrlf" = "yes"; then if test "$enable_bsr_anycrlf" = "yes"; then
AC_DEFINE([BSR_ANYCRLF], [], [ AC_DEFINE([BSR_ANYCRLF], [], [
By default, the \R escape sequence matches any Unicode line ending By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined, this is character or sequence of characters. If BSR_ANYCRLF is defined (to any
changed so that backslash-R matches only CR, LF, or CRLF. The build- value), this is changed so that backslash-R matches only CR, LF, or CRLF.
time default can be overridden by the user of PCRE at runtime. On The build-time default can be overridden by the user of PCRE at runtime.])
systems that support it, "configure" can be used to override the
default.])
fi fi
AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [ AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
@ -450,8 +762,7 @@ AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
links as offsets within the compiled regex. The default is 2, which links as offsets within the compiled regex. The default is 2, which
allows for compiled patterns up to 64K long. This covers the vast allows for compiled patterns up to 64K long. This covers the vast
majority of cases. However, PCRE can also be compiled to use 3 or 4 majority of cases. However, PCRE can also be compiled to use 3 or 4
bytes instead. This allows for longer patterns in extreme cases. On bytes instead. This allows for longer patterns in extreme cases.])
systems that support it, "configure" can be used to override this default.])
AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
When calling PCRE via the POSIX interface, additional working storage When calling PCRE via the POSIX interface, additional working storage
@ -460,9 +771,7 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
interface provides only two. If the number of expected substrings is interface provides only two. If the number of expected substrings is
small, the wrapper function uses space on the stack, because this is small, the wrapper function uses space on the stack, because this is
faster than using malloc() for each call. The threshold above which faster than using malloc() for each call. The threshold above which
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD. On the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
systems that support it, "configure" can be used to override this
default.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [ AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
The value of MATCH_LIMIT determines the default number of times the The value of MATCH_LIMIT determines the default number of times the
@ -471,8 +780,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
limit. The limit exists in order to catch runaway regular limit. The limit exists in order to catch runaway regular
expressions that take for ever to determine that they do not match. expressions that take for ever to determine that they do not match.
The default is set very large so that it does not accidentally catch The default is set very large so that it does not accidentally catch
legitimate cases. On systems that support it, "configure" can be legitimate cases.])
used to override this default default.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [ AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
The above limit applies to all calls of match(), whether or not they The above limit applies to all calls of match(), whether or not they
@ -483,8 +791,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
have any useful effect, it must be less than the value of have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
There is a runtime method for setting a different limit. On systems There is a runtime method for setting a different limit.])
that support it, "configure" can be used to override the default.])
AC_DEFINE([MAX_NAME_SIZE], [32], [ AC_DEFINE([MAX_NAME_SIZE], [32], [
This limit is parameterized just in case anybody ever wants to This limit is parameterized just in case anybody ever wants to
@ -500,23 +807,38 @@ AH_VERBATIM([PCRE_EXP_DEFN], [
/* If you are compiling for a system other than a Unix-like system or /* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, it contain the relevant magic. If you do not define this macro, a suitable
defaults to "extern" for a C compiler and "extern C" for a C++ __declspec value is used for Windows systems; in other environments
compiler on non-Win32 systems. This macro apears at the start of "extern" is used for a C compiler and "extern C" for a C++ compiler.
every exported function that is part of the external API. It does This macro apears at the start of every exported function that is part
not appear on functions that are "external" in the C sense, but of the external API. It does not appear on functions that are "external"
which are internal to the library. */ in the C sense, but which are internal to the library. */
#undef PCRE_EXP_DEFN]) #undef PCRE_EXP_DEFN])
if test "$enable_ebcdic" = "yes"; then if test "$enable_ebcdic" = "yes"; then
AC_DEFINE_UNQUOTED([EBCDIC], [], [ AC_DEFINE_UNQUOTED([EBCDIC], [], [
If you are compiling for a system that uses EBCDIC instead of ASCII If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro as 1. On systems that can use character codes, define this macro to any value. You must also edit the
"configure", this can be done via --enable-ebcdic. PCRE will then NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
assume that all input strings are in EBCDIC. If you do not define On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
this macro, PCRE will assume input strings are ASCII or UTF-8 Unicode. automatically adjusted. When EBCDIC is set, PCRE assumes that all input
It is not possible to build a version of PCRE that supports both strings are in EBCDIC. If you do not define this macro, PCRE will assume
EBCDIC and UTF-8.]) input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
a version of PCRE that supports both EBCDIC and UTF-8/16/32.])
fi
if test "$enable_ebcdic_nl25" = "yes"; then
AC_DEFINE_UNQUOTED([EBCDIC_NL25], [], [
In an EBCDIC environment, define this macro to any value to arrange for
the NL character to be 0x25 instead of the default 0x15. NL plays the role
that LF does in an ASCII/Unicode environment. The value must also be set in
the NEWLINE macro below. On systems that can use "configure" or CMake to
set EBCDIC_NL25, the adjustment of NEWLINE is automatic.])
fi
if test "$enable_valgrind" = "yes"; then
AC_DEFINE_UNQUOTED([SUPPORT_VALGRIND], [], [
Valgrind support to find invalid memory reads.])
fi fi
# Platform specific issues # Platform specific issues
@ -537,6 +859,12 @@ esac
EXTRA_LIBPCRE_LDFLAGS="$EXTRA_LIBPCRE_LDFLAGS \ EXTRA_LIBPCRE_LDFLAGS="$EXTRA_LIBPCRE_LDFLAGS \
$NO_UNDEFINED -version-info libpcre_version" $NO_UNDEFINED -version-info libpcre_version"
EXTRA_LIBPCRE16_LDFLAGS="$EXTRA_LIBPCRE16_LDFLAGS \
$NO_UNDEFINED -version-info libpcre16_version"
EXTRA_LIBPCRE32_LDFLAGS="$EXTRA_LIBPCRE32_LDFLAGS \
$NO_UNDEFINED -version-info libpcre32_version"
EXTRA_LIBPCREPOSIX_LDFLAGS="$EXTRA_LIBPCREPOSIX_LDFLAGS \ EXTRA_LIBPCREPOSIX_LDFLAGS="$EXTRA_LIBPCREPOSIX_LDFLAGS \
$NO_UNDEFINED -version-info libpcreposix_version" $NO_UNDEFINED -version-info libpcreposix_version"
@ -545,11 +873,14 @@ EXTRA_LIBPCRECPP_LDFLAGS="$EXTRA_LIBPCRECPP_LDFLAGS \
$EXPORT_ALL_SYMBOLS" $EXPORT_ALL_SYMBOLS"
AC_SUBST(EXTRA_LIBPCRE_LDFLAGS) AC_SUBST(EXTRA_LIBPCRE_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRE16_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRE32_LDFLAGS)
AC_SUBST(EXTRA_LIBPCREPOSIX_LDFLAGS) AC_SUBST(EXTRA_LIBPCREPOSIX_LDFLAGS)
AC_SUBST(EXTRA_LIBPCRECPP_LDFLAGS) AC_SUBST(EXTRA_LIBPCRECPP_LDFLAGS)
# When we run 'make distcheck', use these arguments. # When we run 'make distcheck', use these arguments. Turning off compiler
DISTCHECK_CONFIGURE_FLAGS="--enable-cpp --enable-unicode-properties" # optimization makes it run faster.
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre16 --enable-pcre32 --enable-jit --enable-cpp --enable-unicode-properties"
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS) AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
# Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is # Check that, if --enable-pcregrep-libz or --enable-pcregrep-libbz2 is
@ -583,6 +914,23 @@ AC_SUBST(LIBBZ2)
# Similarly for --enable-pcretest-readline # Similarly for --enable-pcretest-readline
if test "$enable_pcretest_libedit" = "yes"; then
if test "$enable_pcretest_libreadline" = "yes"; then
echo "** Cannot use both --enable-pcretest-libedit and --enable-pcretest-readline"
exit 1
fi
if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
"$HAVE_READLINE_READLINE_H" != "1"; then
echo "** Cannot --enable-pcretest-libedit because neither editline/readline.h"
echo "** nor readline/readline.h was found."
exit 1
fi
if test -z "$LIBEDIT"; then
echo "** Cannot --enable-pcretest-libedit because libedit library was not found."
exit 1
fi
fi
if test "$enable_pcretest_libreadline" = "yes"; then if test "$enable_pcretest_libreadline" = "yes"; then
if test "$HAVE_READLINE_H" != "1"; then if test "$HAVE_READLINE_H" != "1"; then
echo "** Cannot --enable-pcretest-readline because readline/readline.h was not found." echo "** Cannot --enable-pcretest-readline because readline/readline.h was not found."
@ -592,14 +940,73 @@ if test "$enable_pcretest_libreadline" = "yes"; then
echo "** Cannot --enable-pcretest-readline because readline/history.h was not found." echo "** Cannot --enable-pcretest-readline because readline/history.h was not found."
exit 1 exit 1
fi fi
LIBREADLINE="-lreadline" if test -z "$LIBREADLINE"; then
echo "** Cannot --enable-pcretest-readline because readline library was not found."
exit 1
fi
fi fi
AC_SUBST(LIBREADLINE)
# Check for valgrind
if test "$enable_valgrind" = "yes"; then
m4_ifdef([PKG_CHECK_MODULES],
[PKG_CHECK_MODULES([VALGRIND],[valgrind])],
[AC_MSG_ERROR([pkg-config not supported])])
fi
# test code coverage reporting
if test "$enable_coverage" = "yes"; then
if test "x$GCC" != "xyes"; then
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
fi
# ccache is incompatible with gcov
AC_PATH_PROG([SHTOOL],[shtool],[false])
case `$SHTOOL path $CC` in
*ccache*) cc_ccache=yes;;
*) cc_ccache=no;;
esac
if test "$cc_ccache" = "yes"; then
if test -z "$CCACHE_DISABLE" -o "$CCACHE_DISABLE" != "1"; then
AC_MSG_ERROR([must export CCACHE_DISABLE=1 to disable ccache for code coverage])
fi
fi
AC_ARG_VAR([LCOV],[the ltp lcov program])
AC_PATH_PROG([LCOV],[lcov],[false])
if test "x$LCOV" = "xfalse"; then
AC_MSG_ERROR([lcov not found])
fi
AC_ARG_VAR([GENHTML],[the ltp genhtml program])
AC_PATH_PROG([GENHTML],[genhtml],[false])
if test "x$GENHTML" = "xfalse"; then
AC_MSG_ERROR([genhtml not found])
fi
AC_DEFINE([SUPPORT_GCOV],[1], [
Define to allow pcretest and pcregrep to be linked with gcov, so that they
are able to generate code coverage reports.])
# And add flags needed for gcov
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
GCOV_LIBS="-lgcov"
AC_SUBST([GCOV_CFLAGS])
AC_SUBST([GCOV_CXXFLAGS])
AC_SUBST([GCOV_LIBS])
fi # enable_coverage
AM_CONDITIONAL([WITH_GCOV],[test "x$enable_coverage" = "xyes"])
# Produce these files, in addition to config.h. # Produce these files, in addition to config.h.
AC_CONFIG_FILES( AC_CONFIG_FILES(
Makefile Makefile
libpcre.pc libpcre.pc
libpcre16.pc
libpcre32.pc
libpcreposix.pc
libpcrecpp.pc libpcrecpp.pc
pcre-config pcre-config
pcre.h pcre.h
@ -616,9 +1023,16 @@ AC_CONFIG_COMMANDS([delete-old-chartables], [rm -f pcre_chartables.c])
AC_OUTPUT AC_OUTPUT
# Print out a nice little message after configure is run displaying your # Print out a nice little message after configure is run displaying the
# chosen options. # chosen options.
ebcdic_nl_code=n/a
if test "$enable_ebcdic_nl25" = "yes"; then
ebcdic_nl_code=0x25
elif test "$enable_ebcdic" = "yes"; then
ebcdic_nl_code=0x15
fi
cat <<EOF cat <<EOF
$PACKAGE-$VERSION configuration summary: $PACKAGE-$VERSION configuration summary:
@ -630,17 +1044,22 @@ $PACKAGE-$VERSION configuration summary:
C++ compiler .................... : ${CXX} C++ compiler .................... : ${CXX}
Linker .......................... : ${LD} Linker .......................... : ${LD}
C preprocessor flags ............ : ${CPPFLAGS} C preprocessor flags ............ : ${CPPFLAGS}
C compiler flags ................ : ${CFLAGS} C compiler flags ................ : ${CFLAGS} ${VISIBILITY_CFLAGS}
C++ compiler flags .............. : ${CXXFLAGS} C++ compiler flags .............. : ${CXXFLAGS} ${VISIBILITY_CXXFLAGS}
Linker flags .................... : ${LDFLAGS} Linker flags .................... : ${LDFLAGS}
Extra libraries ................. : ${LIBS} Extra libraries ................. : ${LIBS}
Build 8 bit pcre library ........ : ${enable_pcre8}
Build 16 bit pcre library ....... : ${enable_pcre16}
Build 32 bit pcre library ....... : ${enable_pcre32}
Build C++ library ............... : ${enable_cpp} Build C++ library ............... : ${enable_cpp}
Enable UTF-8 support ............ : ${enable_utf8} Enable JIT compiling support .... : ${enable_jit}
Enable UTF-8/16/32 support ...... : ${enable_utf}
Unicode properties .............. : ${enable_unicode_properties} Unicode properties .............. : ${enable_unicode_properties}
Newline char/sequence ........... : ${enable_newline} Newline char/sequence ........... : ${enable_newline}
\R matches only ANYCRLF ......... : ${enable_bsr_anycrlf} \R matches only ANYCRLF ......... : ${enable_bsr_anycrlf}
EBCDIC coding ................... : ${enable_ebcdic} EBCDIC coding ................... : ${enable_ebcdic}
EBCDIC code for NL .............. : ${ebcdic_nl_code}
Rebuild char tables ............. : ${enable_rebuild_chartables} Rebuild char tables ............. : ${enable_rebuild_chartables}
Use stack recursion ............. : ${enable_stack_for_recursion} Use stack recursion ............. : ${enable_stack_for_recursion}
POSIX mem threshold ............. : ${with_posix_malloc_threshold} POSIX mem threshold ............. : ${with_posix_malloc_threshold}
@ -649,9 +1068,14 @@ $PACKAGE-$VERSION configuration summary:
Match limit recursion ........... : ${with_match_limit_recursion} Match limit recursion ........... : ${with_match_limit_recursion}
Build shared libs ............... : ${enable_shared} Build shared libs ............... : ${enable_shared}
Build static libs ............... : ${enable_static} Build static libs ............... : ${enable_static}
Use JIT in pcregrep ............. : ${enable_pcregrep_jit}
Buffer size for pcregrep ........ : ${with_pcregrep_bufsize}
Link pcregrep with libz ......... : ${enable_pcregrep_libz} Link pcregrep with libz ......... : ${enable_pcregrep_libz}
Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2} Link pcregrep with libbz2 ....... : ${enable_pcregrep_libbz2}
Link pcretest with libedit ...... : ${enable_pcretest_libedit}
Link pcretest with libreadline .. : ${enable_pcretest_libreadline} Link pcretest with libreadline .. : ${enable_pcretest_libreadline}
Valgrind support ................ : ${enable_valgrind}
Code coverage ................... : ${enable_coverage}
EOF EOF

View File

@ -1,10 +1,10 @@
#! /bin/sh #! /bin/sh
# depcomp - compile a program generating dependencies as side-effects # depcomp - compile a program generating dependencies as side-effects
scriptversion=2007-03-29.01 scriptversion=2012-03-27.16; # UTC
# Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007 Free Software # Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010,
# Foundation, Inc. # 2011, 2012 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
@ -17,9 +17,7 @@ scriptversion=2007-03-29.01
# GNU General Public License for more details. # GNU General Public License for more details.
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software # along with this program. If not, see <http://www.gnu.org/licenses/>.
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
# As a special exception to the GNU General Public License, if you # As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a # distribute this file as part of a program that contains a
@ -30,7 +28,7 @@ scriptversion=2007-03-29.01
case $1 in case $1 in
'') '')
echo "$0: No command. Try \`$0 --help' for more information." 1>&2 echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1; exit 1;
;; ;;
-h | --h*) -h | --h*)
@ -42,11 +40,11 @@ as side-effects.
Environment variables: Environment variables:
depmode Dependency tracking mode. depmode Dependency tracking mode.
source Source file read by `PROGRAMS ARGS'. source Source file read by 'PROGRAMS ARGS'.
object Object file output by `PROGRAMS ARGS'. object Object file output by 'PROGRAMS ARGS'.
DEPDIR directory where to store dependencies. DEPDIR directory where to store dependencies.
depfile Dependency file to output. depfile Dependency file to output.
tmpdepfile Temporary file to use when outputing dependencies. tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no). libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>. Report bugs to <bug-automake@gnu.org>.
@ -59,6 +57,12 @@ EOF
;; ;;
esac esac
# A tabulation character.
tab=' '
# A newline character.
nl='
'
if test -z "$depmode" || test -z "$source" || test -z "$object"; then if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2 echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1 exit 1
@ -87,6 +91,29 @@ if test "$depmode" = dashXmstdout; then
depmode=dashmstdout depmode=dashmstdout
fi fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in case "$depmode" in
gcc3) gcc3)
## gcc 3 implements dependency tracking that does exactly what ## gcc 3 implements dependency tracking that does exactly what
@ -141,20 +168,21 @@ gcc)
## The second -e expression handles DOS-style file names with drive letters. ## The second -e expression handles DOS-style file names with drive letters.
sed -e 's/^[^:]*: / /' \ sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the `deleted header file' problem. ## This next piece of magic avoids the "deleted header file" problem.
## The problem is that when a header file which appears in a .P file ## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is ## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding ## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do ## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly. ## this for us directly.
tr ' ' ' tr ' ' "$nl" < "$tmpdepfile" |
' < "$tmpdepfile" | ## Some versions of gcc put a space before the ':'. On the theory
## Some versions of gcc put a space before the `:'. On the theory
## that the space means something, we add a space to the output as ## that the space means something, we add a space to the output as
## well. ## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation ## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround. ## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" rm -f "$tmpdepfile"
;; ;;
@ -186,20 +214,17 @@ sgi)
# clever and replace this with sed code, as IRIX sed won't handle # clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in # lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like `#:fec' to the end of the # the IRIX cc adds comments like '#:fec' to the end of the
# dependency line. # dependency line.
tr ' ' ' tr ' ' "$nl" < "$tmpdepfile" \
' < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
tr ' tr "$nl" ' ' >> "$depfile"
' ' ' >> $depfile echo >> "$depfile"
echo >> $depfile
# The second pass generates a dummy entry for each header file. # The second pass generates a dummy entry for each header file.
tr ' ' ' tr ' ' "$nl" < "$tmpdepfile" \
' < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> $depfile >> "$depfile"
else else
# The sourcefile does not contain any dependencies, so just # The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile # store a dummy comment line, to avoid errors with the Makefile
@ -209,10 +234,17 @@ sgi)
rm -f "$tmpdepfile" rm -f "$tmpdepfile"
;; ;;
xlc)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
aix) aix)
# The C for AIX Compiler uses -M and outputs the dependencies # The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the # in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts `$object:' at the # current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information. # start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases. # Version 6 uses the directory in both cases.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
@ -242,12 +274,11 @@ aix)
test -f "$tmpdepfile" && break test -f "$tmpdepfile" && break
done done
if test -f "$tmpdepfile"; then if test -f "$tmpdepfile"; then
# Each line is of the form `foo.o: dependent.h'. # Each line is of the form 'foo.o: dependent.h'.
# Do two passes, one to just change these to # Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'. # '$object: dependent.h' and one to simply 'dependent.h:'.
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
# That's a tab and a space in the []. sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else else
# The sourcefile does not contain any dependencies, so just # The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile # store a dummy comment line, to avoid errors with the Makefile
@ -258,23 +289,26 @@ aix)
;; ;;
icc) icc)
# Intel's C compiler understands `-MD -MF file'. However on # Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
# icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c # However on
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
# ICC 7.0 will fill foo.d with something like # ICC 7.0 will fill foo.d with something like
# foo.o: sub/foo.c # foo.o: sub/foo.c
# foo.o: sub/foo.h # foo.o: sub/foo.h
# which is wrong. We want: # which is wrong. We want
# sub/foo.o: sub/foo.c # sub/foo.o: sub/foo.c
# sub/foo.o: sub/foo.h # sub/foo.o: sub/foo.h
# sub/foo.c: # sub/foo.c:
# sub/foo.h: # sub/foo.h:
# ICC 7.1 will output # ICC 7.1 will output
# foo.o: sub/foo.c sub/foo.h # foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using \ : # and will wrap long lines using '\':
# foo.o: sub/foo.c ... \ # foo.o: sub/foo.c ... \
# sub/foo.h ... \ # sub/foo.h ... \
# ... # ...
# tcc 0.9.26 (FIXME still under development at the moment of writing)
# will emit a similar output, but also prepend the continuation lines
# with horizontal tabulation characters.
"$@" -MD -MF "$tmpdepfile" "$@" -MD -MF "$tmpdepfile"
stat=$? stat=$?
if test $stat -eq 0; then : if test $stat -eq 0; then :
@ -283,15 +317,21 @@ icc)
exit $stat exit $stat
fi fi
rm -f "$depfile" rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h', # Each line is of the form 'foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. # or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
# Do two passes, one to just change these to # Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'. # '$object: dependent.h' and one to simply 'dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
# Some versions of the HPUX 10.20 sed can't process this invocation < "$tmpdepfile" > "$depfile"
# correctly. Breaking it into two sed invocations is a workaround. sed '
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | s/[ '"$tab"'][ '"$tab"']*/ /g
sed -e 's/$/ :/' >> "$depfile" s/^ *//
s/ *\\*$//
s/^[^:]*: *//
/^$/d
/:$/d
s/$/ :/
' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile" rm -f "$tmpdepfile"
;; ;;
@ -327,8 +367,13 @@ hp2)
done done
if test -f "$tmpdepfile"; then if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile" sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
# Add `dependent.h:' lines. # Add 'dependent.h:' lines.
sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile" sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else else
echo "#dummy" > "$depfile" echo "#dummy" > "$depfile"
fi fi
@ -337,9 +382,9 @@ hp2)
tru64) tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side # The Tru64 compiler uses -MD to generate dependencies as a side
# effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. # effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in `foo.d' instead, so we check for that too. # dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected. # Subdirectories are respected.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir= test "x$dir" = "x$object" && dir=
@ -385,14 +430,59 @@ tru64)
done done
if test -f "$tmpdepfile"; then if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
# That's a tab and a space in the []. sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else else
echo "#dummy" > "$depfile" echo "#dummy" > "$depfile"
fi fi
rm -f "$tmpdepfile" rm -f "$tmpdepfile"
;; ;;
msvc7)
if test "$libtool" = yes; then
showIncludes=-Wc,-showIncludes
else
showIncludes=-showIncludes
fi
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test "$stat" = 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The first sed program below extracts the file names and escapes
# backslashes for cygpath. The second sed program outputs the file
# name when reading, but also accumulates all include files in the
# hold buffer in order to output them again at the end. This only
# works with sed implementations that can handle large buffers.
sed < "$tmpdepfile" -n '
/^Note: including file: *\(.*\)/ {
s//\1/
s/\\/\\\\/g
p
}' | $cygpath_u | sort -u | sed -n '
s/ /\\ /g
s/\(.*\)/'"$tab"'\1 \\/p
s/.\(.*\) \\/\1:/
H
$ {
s/.*/'"$tab"'/
G
p
}' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvc7msys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
#nosideeffect) #nosideeffect)
# This comment above is used by automake to tell side-effect # This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones. # dependency tracking mechanisms from slower ones.
@ -404,13 +494,13 @@ dashmstdout)
# Remove the call to Libtool. # Remove the call to Libtool.
if test "$libtool" = yes; then if test "$libtool" = yes; then
while test $1 != '--mode=compile'; do while test "X$1" != 'X--mode=compile'; do
shift shift
done done
shift shift
fi fi
# Remove `-o $object'. # Remove '-o $object'.
IFS=" " IFS=" "
for arg for arg
do do
@ -430,15 +520,14 @@ dashmstdout)
done done
test -z "$dashmflag" && dashmflag=-M test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for `:' # Require at least two characters before searching for ':'
# in the target name. This is to cope with DOS-style filenames: # in the target name. This is to cope with DOS-style filenames:
# a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. # a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag | "$@" $dashmflag |
sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
rm -f "$depfile" rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile" cat < "$tmpdepfile" > "$depfile"
tr ' ' ' tr ' ' "$nl" < "$tmpdepfile" | \
' < "$tmpdepfile" | \
## Some versions of the HPUX 10.20 sed can't process this invocation ## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround. ## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
@ -455,38 +544,46 @@ makedepend)
"$@" || exit $? "$@" || exit $?
# Remove any Libtool call # Remove any Libtool call
if test "$libtool" = yes; then if test "$libtool" = yes; then
while test $1 != '--mode=compile'; do while test "X$1" != 'X--mode=compile'; do
shift shift
done done
shift shift
fi fi
# X makedepend # X makedepend
shift shift
cleared=no cleared=no eat=no
for arg in "$@"; do for arg
do
case $cleared in case $cleared in
no) no)
set ""; shift set ""; shift
cleared=yes ;; cleared=yes ;;
esac esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in case "$arg" in
-D*|-I*) -D*|-I*)
set fnord "$@" "$arg"; shift ;; set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove # Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file. # the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object) -*|$object)
;; ;;
*) *)
set fnord "$@" "$arg"; shift ;; set fnord "$@" "$arg"; shift ;;
esac esac
done done
obj_suffix="`echo $object | sed 's/^.*\././'`" obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile" touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile" rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile" # makedepend may prepend the VPATH from the source file name to the object.
sed '1,2d' "$tmpdepfile" | tr ' ' ' # No need to regex-escape $object, excess matching of '.' is harmless.
' | \ sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
## Some versions of the HPUX 10.20 sed can't process this invocation ## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround. ## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
@ -500,13 +597,13 @@ cpp)
# Remove the call to Libtool. # Remove the call to Libtool.
if test "$libtool" = yes; then if test "$libtool" = yes; then
while test $1 != '--mode=compile'; do while test "X$1" != 'X--mode=compile'; do
shift shift
done done
shift shift
fi fi
# Remove `-o $object'. # Remove '-o $object'.
IFS=" " IFS=" "
for arg for arg
do do
@ -538,13 +635,27 @@ cpp)
msvisualcpp) msvisualcpp)
# Important note: in order to support this mode, a compiler *must* # Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o, # always write the preprocessed file to stdout.
# because we must use -o when running libtool.
"$@" || exit $? "$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" " IFS=" "
for arg for arg
do do
case "$arg" in case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@" set fnord "$@"
shift shift
@ -557,16 +668,23 @@ msvisualcpp)
;; ;;
esac esac
done done
"$@" -E | "$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile" rm -f "$depfile"
echo "$object : \\" > "$depfile" echo "$object : \\" > "$depfile"
. "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
echo " " >> "$depfile" echo "$tab" >> "$depfile"
. "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile" rm -f "$tmpdepfile"
;; ;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none) none)
exec "$@" exec "$@"
;; ;;
@ -585,5 +703,6 @@ exit 0
# eval: (add-hook 'write-file-hooks 'time-stamp) # eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion=" # time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-end: "$" # time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End: # End:

View File

@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language. and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge Copyright (c) 1997-2012 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -108,13 +108,26 @@ fprintf(f,
"library and dead code stripping is activated. This leads to link errors.\n" "library and dead code stripping is activated. This leads to link errors.\n"
"Pulling in the header ensures that the array gets flagged as \"someone\n" "Pulling in the header ensures that the array gets flagged as \"someone\n"
"outside this compilation unit might reference this\" and so it will always\n" "outside this compilation unit might reference this\" and so it will always\n"
"be supplied to the linker. */\n\n" "be supplied to the linker. */\n\n");
/* Force config.h in z/OS */
#if defined NATIVE_ZOS
fprintf(f,
"/* For z/OS, config.h is forced */\n"
"#ifndef HAVE_CONFIG_H\n"
"#define HAVE_CONFIG_H 1\n"
"#endif\n\n");
#endif
fprintf(f,
"#ifdef HAVE_CONFIG_H\n" "#ifdef HAVE_CONFIG_H\n"
"#include \"config.h\"\n" "#include \"config.h\"\n"
"#endif\n\n" "#endif\n\n"
"#include \"pcre_internal.h\"\n\n"); "#include \"pcre_internal.h\"\n\n");
fprintf(f, fprintf(f,
"const unsigned char _pcre_default_tables[] = {\n\n" "const pcre_uint8 PRIV(default_tables)[] = {\n\n"
"/* This table is a lower casing table. */\n\n"); "/* This table is a lower casing table. */\n\n");
fprintf(f, " "); fprintf(f, " ");

View File

@ -18,6 +18,12 @@ The HTML documentation for PCRE comprises the following pages:
<tr><td><a href="pcre.html">pcre</a></td> <tr><td><a href="pcre.html">pcre</a></td>
<td>&nbsp;&nbsp;Introductory page</td></tr> <td>&nbsp;&nbsp;Introductory page</td></tr>
<tr><td><a href="pcre16.html">pcre16</a></td>
<td>&nbsp;&nbsp;Discussion of the 16-bit PCRE library</td></tr>
<tr><td><a href="pcre32.html">pcre32</a></td>
<td>&nbsp;&nbsp;Discussion of the 32-bit PCRE library</td></tr>
<tr><td><a href="pcre-config.html">pcre-config</a></td> <tr><td><a href="pcre-config.html">pcre-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr> <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
@ -36,9 +42,18 @@ The HTML documentation for PCRE comprises the following pages:
<tr><td><a href="pcrecpp.html">pcrecpp</a></td> <tr><td><a href="pcrecpp.html">pcrecpp</a></td>
<td>&nbsp;&nbsp;The C++ wrapper for the PCRE library</td></tr> <td>&nbsp;&nbsp;The C++ wrapper for the PCRE library</td></tr>
<tr><td><a href="pcredemo.html">pcredemo</a></td>
<td>&nbsp;&nbsp;A demonstration C program that uses the PCRE library</td></tr>
<tr><td><a href="pcregrep.html">pcregrep</a></td> <tr><td><a href="pcregrep.html">pcregrep</a></td>
<td>&nbsp;&nbsp;The <b>pcregrep</b> command</td></tr> <td>&nbsp;&nbsp;The <b>pcregrep</b> command</td></tr>
<tr><td><a href="pcrejit.html">pcrejit</a></td>
<td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
<tr><td><a href="pcrelimits.html">pcrelimits</a></td>
<td>&nbsp;&nbsp;Details of size and other limits</td></tr>
<tr><td><a href="pcrematching.html">pcrematching</a></td> <tr><td><a href="pcrematching.html">pcrematching</a></td>
<td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr> <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
@ -58,7 +73,7 @@ The HTML documentation for PCRE comprises the following pages:
<td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr> <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
<tr><td><a href="pcresample.html">pcresample</a></td> <tr><td><a href="pcresample.html">pcresample</a></td>
<td>&nbsp;&nbsp;Description of the sample program</td></tr> <td>&nbsp;&nbsp;Discussion of the pcredemo program</td></tr>
<tr><td><a href="pcrestack.html">pcrestack</a></td> <tr><td><a href="pcrestack.html">pcrestack</a></td>
<td>&nbsp;&nbsp;Discussion of PCRE's stack usage</td></tr> <td>&nbsp;&nbsp;Discussion of PCRE's stack usage</td></tr>
@ -68,15 +83,22 @@ The HTML documentation for PCRE comprises the following pages:
<tr><td><a href="pcretest.html">pcretest</a></td> <tr><td><a href="pcretest.html">pcretest</a></td>
<td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr> <td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr>
<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
<td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
</table> </table>
<p> <p>
There are also individual pages that summarize the interface for each function There are also individual pages that summarize the interface for each function
in the library: in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
functions.
</p> </p>
<table> <table>
<tr><td><a href="pcre_assign_jit_stack.html">pcre_assign_jit_stack</a></td>
<td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
<tr><td><a href="pcre_compile.html">pcre_compile</a></td> <tr><td><a href="pcre_compile.html">pcre_compile</a></td>
<td>&nbsp;&nbsp;Compile a regular expression</td></tr> <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
@ -96,6 +118,9 @@ in the library:
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr> (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
<td>&nbsp;&nbsp;Free study data</td></tr>
<tr><td><a href="pcre_exec.html">pcre_exec</a></td> <tr><td><a href="pcre_exec.html">pcre_exec</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr> (Perl compatible)</td></tr>
@ -124,15 +149,30 @@ in the library:
<tr><td><a href="pcre_info.html">pcre_info</a></td> <tr><td><a href="pcre_info.html">pcre_info</a></td>
<td>&nbsp;&nbsp;Obsolete information extraction function</td></tr> <td>&nbsp;&nbsp;Obsolete information extraction function</td></tr>
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
<td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
<tr><td><a href="pcre_jit_stack_free.html">pcre_jit_stack_free</a></td>
<td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td> <tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr> <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre_pattern_to_host_byte_order.html">pcre_pattern_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td> <tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
<td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr> <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
<tr><td><a href="pcre_study.html">pcre_study</a></td> <tr><td><a href="pcre_study.html">pcre_study</a></td>
<td>&nbsp;&nbsp;Study a compiled pattern</td></tr> <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
<tr><td><a href="pcre_utf16_to_host_byte_order.html">pcre_utf16_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
<tr><td><a href="pcre_utf32_to_host_byte_order.html">pcre_utf32_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
<tr><td><a href="pcre_version.html">pcre_version</a></td> <tr><td><a href="pcre_version.html">pcre_version</a></td>
<td>&nbsp;&nbsp;Return PCRE version and release date</td></tr> <td>&nbsp;&nbsp;Return PCRE version and release date</td></tr>
</table> </table>

View File

@ -23,12 +23,17 @@ man page, in case the conversion went wrong.
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br> <br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P> <P>
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b> <b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
<b>[--libs-posix] [--cflags] [--cflags-posix]</b> <b>[--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
<b>[--cflags] [--cflags-posix]</b>
</P> </P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br> <br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P> <P>
<b>pcre-config</b> returns the configuration of the installed PCRE <b>pcre-config</b> returns the configuration of the installed PCRE
libraries and the options required to compile a program to use them. libraries and the options required to compile a program to use them. Some of
the options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
respectively, and are
not available if only one of those libraries has been built. If an unavailable
option is encountered, the "usage" information is output.
</P> </P>
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br> <br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
<P> <P>
@ -50,12 +55,28 @@ output.
<P> <P>
<b>--libs</b> <b>--libs</b>
Writes to the standard output the command line options required to link Writes to the standard output the command line options required to link
with PCRE (<b>-lpcre</b> on many systems). with the 8-bit PCRE library (<b>-lpcre</b> on many systems).
</P>
<P>
<b>--libs16</b>
Writes to the standard output the command line options required to link
with the 16-bit PCRE library (<b>-lpcre16</b> on many systems).
</P>
<P>
<b>--libs32</b>
Writes to the standard output the command line options required to link
with the 32-bit PCRE library (<b>-lpcre32</b> on many systems).
</P>
<P>
<b>--libs-cpp</b>
Writes to the standard output the command line options required to link with
PCRE's C++ wrapper library (<b>-lpcrecpp</b> <b>-lpcre</b> on many
systems).
</P> </P>
<P> <P>
<b>--libs-posix</b> <b>--libs-posix</b>
Writes to the standard output the command line options required to link with Writes to the standard output the command line options required to link with
the PCRE posix emulation library (<b>-lpcreposix</b> <b>-lpcre</b> on many PCRE's POSIX API wrapper library (<b>-lpcreposix</b> <b>-lpcre</b> on many
systems). systems).
</P> </P>
<P> <P>
@ -67,7 +88,7 @@ many systems).
<P> <P>
<b>--cflags-posix</b> <b>--cflags-posix</b>
Writes to the standard output the command line options required to compile Writes to the standard output the command line options required to compile
files that use the PCRE posix emulation library (this may include some <b>-I</b> files that use PCRE's POSIX API wrapper library (this may include some <b>-I</b>
options, but is blank on many systems). options, but is blank on many systems).
</P> </P>
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br> <br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
@ -77,11 +98,11 @@ options, but is blank on many systems).
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br> <br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P> <P>
This manual page was originally written by Mark Baker for the Debian GNU/Linux This manual page was originally written by Mark Baker for the Debian GNU/Linux
system. It has been slightly revised as a generic PCRE man page. system. It has been subsequently revised as a generic PCRE man page.
</P> </P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br> <br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 18 April 2007 Last updated: 24 June 2012
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -14,41 +14,69 @@ man page, in case the conversion went wrong.
<br> <br>
<ul> <ul>
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a> <li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
<li><a name="TOC2" href="#SEC2">USER DOCUMENTATION</a> <li><a name="TOC2" href="#SEC2">SECURITY CONSIDERATIONS</a>
<li><a name="TOC3" href="#SEC3">LIMITATIONS</a> <li><a name="TOC3" href="#SEC3">USER DOCUMENTATION</a>
<li><a name="TOC4" href="#SEC4">UTF-8 AND UNICODE PROPERTY SUPPORT</a> <li><a name="TOC4" href="#SEC4">AUTHOR</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a> <li><a name="TOC5" href="#SEC5">REVISION</a>
<li><a name="TOC6" href="#SEC6">REVISION</a>
</ul> </ul>
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br> <br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
<P> <P>
The PCRE library is a set of functions that implement regular expression The PCRE library is a set of functions that implement regular expression
pattern matching using the same syntax and semantics as Perl, with just a few pattern matching using the same syntax and semantics as Perl, with just a few
differences. Certain features that appeared in Python and PCRE before they differences. Some features that appeared in Python and PCRE before they
appeared in Perl are also available using the Python syntax. There is also some appeared in Perl are also available using the Python syntax, there is some
support for certain .NET and Oniguruma syntax items, and there is an option for support for one or two .NET and Oniguruma syntax items, and there is an option
requesting some minor changes that give better JavaScript compatibility. for requesting some minor changes that give better JavaScript compatibility.
</P> </P>
<P> <P>
The current implementation of PCRE (release 7.x) corresponds approximately with Starting with release 8.30, it is possible to compile two separate PCRE
Perl 5.10, including support for UTF-8 encoded strings and Unicode general libraries: the original, which supports 8-bit character strings (including
category properties. However, UTF-8 and Unicode support has to be explicitly UTF-8 strings), and a second library that supports 16-bit character strings
(including UTF-16 strings). The build process allows either one or both to be
built. The majority of the work to make this possible was done by Zoltan
Herczeg.
</P>
<P>
Starting with release 8.32 it is possible to compile a third separate PCRE
library, which supports 32-bit character strings (including
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
libraries. The work to make this possible was done by Christian Persch.
</P>
<P>
The three libraries contain identical sets of functions, except that the names
in the 16-bit library start with <b>pcre16_</b> instead of <b>pcre_</b>, and the
names in the 32-bit library start with <b>pcre32_</b> instead of <b>pcre_</b>. To
avoid over-complication and reduce the documentation maintenance load, most of
the documentation describes the 8-bit library, with the differences for the
16-bit and 32-bit libraries described separately in the
<a href="pcre16.html"><b>pcre16</b></a>
and
<a href="pcre32.html"><b>pcre32</b></a>
pages. References to functions or structures of the form <i>pcre[16|32]_xxx</i>
should be read as meaning "<i>pcre_xxx</i> when using the 8-bit library,
<i>pcre16_xxx</i> when using the 16-bit library, or <i>pcre32_xxx</i> when using
the 32-bit library".
</P>
<P>
The current implementation of PCRE corresponds approximately with Perl 5.12,
including support for UTF-8/16/32 encoded strings and Unicode general category
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
enabled; it is not the default. The Unicode tables correspond to Unicode enabled; it is not the default. The Unicode tables correspond to Unicode
release 5.1. release 6.2.0.
</P> </P>
<P> <P>
In addition to the Perl-compatible matching function, PCRE contains an In addition to the Perl-compatible matching function, PCRE contains an
alternative matching function that matches the same compiled patterns in a alternative function that matches the same compiled patterns in a different
different way. In certain circumstances, the alternative function has some way. In certain circumstances, the alternative function has some advantages.
advantages. For a discussion of the two matching algorithms, see the For a discussion of the two matching algorithms, see the
<a href="pcrematching.html"><b>pcrematching</b></a> <a href="pcrematching.html"><b>pcrematching</b></a>
page. page.
</P> </P>
<P> <P>
PCRE is written in C and released as a C library. A number of people have PCRE is written in C and released as a C library. A number of people have
written wrappers and interfaces of various kinds. In particular, Google Inc. written wrappers and interfaces of various kinds. In particular, Google Inc.
have provided a comprehensive C++ wrapper. This is now included as part of the have provided a comprehensive C++ wrapper for the 8-bit library. This is now
PCRE distribution. The included as part of the PCRE distribution. The
<a href="pcrecpp.html"><b>pcrecpp</b></a> <a href="pcrecpp.html"><b>pcrecpp</b></a>
page has details of this interface. Other people's contributions can be found page has details of this interface. Other people's contributions can be found
in the <i>Contrib</i> directory at the primary FTP site, which is: in the <i>Contrib</i> directory at the primary FTP site, which is:
@ -72,216 +100,86 @@ function makes it possible for a client to discover which features are
available. The features themselves are described in the available. The features themselves are described in the
<a href="pcrebuild.html"><b>pcrebuild</b></a> <a href="pcrebuild.html"><b>pcrebuild</b></a>
page. Documentation about building PCRE for various operating systems can be page. Documentation about building PCRE for various operating systems can be
found in the <b>README</b> file in the source distribution. found in the <b>README</b> and <b>NON-AUTOTOOLS_BUILD</b> files in the source
distribution.
</P> </P>
<P> <P>
The library contains a number of undocumented internal functions and data The libraries contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but tables that are used by more than one of the exported external functions, but
which are not intended for use by external callers. Their names all begin with which are not intended for use by external callers. Their names all begin with
"_pcre_", which hopefully will not provoke any name clashes. In some "_pcre_" or "_pcre16_" or "_pcre32_", which hopefully will not provoke any name
environments, it is possible to control which external symbols are exported clashes. In some environments, it is possible to control which external symbols
when a shared library is built, and in these cases the undocumented symbols are are exported when a shared library is built, and in these cases the
not exported. undocumented symbols are not exported.
</P> </P>
<br><a name="SEC2" href="#TOC1">USER DOCUMENTATION</a><br> <br><a name="SEC2" href="#TOC1">SECURITY CONSIDERATIONS</a><br>
<P>
If you are using PCRE in a non-UTF application that permits users to supply
arbitrary patterns for compilation, you should be aware of a feature that
allows users to turn on UTF support from within a pattern, provided that PCRE
was built with UTF support. For example, an 8-bit pattern that begins with
"(*UTF8)" or "(*UTF)" turns on UTF-8 mode, which interprets patterns and
subjects as strings of UTF-8 characters instead of individual 8-bit characters.
This causes both the pattern and any data against which it is matched to be
checked for UTF-8 validity. If the data string is very long, such a check might
use sufficiently many resources as to cause your application to lose
performance.
</P>
<P>
The best way of guarding against this possibility is to use the
<b>pcre_fullinfo()</b> function to check the compiled pattern's options for UTF.
</P>
<P>
If your application is one that supports UTF, be aware that validity checking
can take time. If the same data string is to be matched many times, you can use
the PCRE_NO_UTF[8|16|32]_CHECK option for the second and subsequent matches to
save redundant checks.
</P>
<P>
Another way that performance can be hit is by running a pattern that has a very
large search tree against a string that will never match. Nested unlimited
repeats in a pattern are a common example. PCRE provides some protection
against this: see the PCRE_EXTRA_MATCH_LIMIT feature in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page.
</P>
<br><a name="SEC3" href="#TOC1">USER DOCUMENTATION</a><br>
<P> <P>
The user documentation for PCRE comprises a number of different sections. In The user documentation for PCRE comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format, the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format, each is a separate page, linked from the index page. In the plain text format,
all the sections are concatenated, for ease of searching. The sections are as all the sections, except the <b>pcredemo</b> section, are concatenated, for ease
follows: of searching. The sections are as follows:
<pre> <pre>
pcre this document pcre this document
pcre16 details of the 16-bit library
pcre32 details of the 32-bit library
pcre-config show PCRE installation configuration information pcre-config show PCRE installation configuration information
pcreapi details of PCRE's native C API pcreapi details of PCRE's native C API
pcrebuild options for building PCRE pcrebuild options for building PCRE
pcrecallout details of the callout feature pcrecallout details of the callout feature
pcrecompat discussion of Perl compatibility pcrecompat discussion of Perl compatibility
pcrecpp details of the C++ wrapper pcrecpp details of the C++ wrapper for the 8-bit library
pcregrep description of the <b>pcregrep</b> command pcredemo a demonstration C program that uses PCRE
pcregrep description of the <b>pcregrep</b> command (8-bit only)
pcrejit discussion of the just-in-time optimization support
pcrelimits details of size and other limits
pcrematching discussion of the two matching algorithms pcrematching discussion of the two matching algorithms
pcrepartial details of the partial matching facility pcrepartial details of the partial matching facility
pcrepattern syntax and semantics of supported regular expressions pcrepattern syntax and semantics of supported regular expressions
pcresyntax quick syntax reference
pcreperform discussion of performance issues pcreperform discussion of performance issues
pcreposix the POSIX-compatible C API pcreposix the POSIX-compatible C API for the 8-bit library
pcreprecompile details of saving and re-using precompiled patterns pcreprecompile details of saving and re-using precompiled patterns
pcresample discussion of the sample program pcresample discussion of the pcredemo program
pcrestack discussion of stack usage pcrestack discussion of stack usage
pcresyntax quick syntax reference
pcretest description of the <b>pcretest</b> testing command pcretest description of the <b>pcretest</b> testing command
pcreunicode discussion of Unicode and UTF-8/16/32 support
</pre> </pre>
In addition, in the "man" and HTML formats, there is a short page for each In addition, in the "man" and HTML formats, there is a short page for each
C library function, listing its arguments and results. C library function, listing its arguments and results.
</P> </P>
<br><a name="SEC3" href="#TOC1">LIMITATIONS</a><br> <br><a name="SEC4" href="#TOC1">AUTHOR</a><br>
<P>
There are some size limitations in PCRE but it is hoped that they will never in
practice be relevant.
</P>
<P>
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
compiled with the default internal linkage size of 2. If you want to process
regular expressions that are truly enormous, you can compile PCRE with an
internal linkage size of 3 or 4 (see the <b>README</b> file in the source
distribution and the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation for details). In these cases the limit is substantially larger.
However, the speed of execution is slower.
</P>
<P>
All values in repeating quantifiers must be less than 65536.
</P>
<P>
There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns.
</P>
<P>
The maximum length of name for a named subpattern is 32 characters, and the
maximum number of named subpatterns is 10000.
</P>
<P>
The maximum length of a subject string is the largest positive number that an
integer variable can hold. However, when using the traditional matching
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
This means that the available stack space may limit the size of a subject
string that can be processed by certain patterns. For a discussion of stack
issues, see the
<a href="pcrestack.html"><b>pcrestack</b></a>
documentation.
<a name="utf8support"></a></P>
<br><a name="SEC4" href="#TOC1">UTF-8 AND UNICODE PROPERTY SUPPORT</a><br>
<P>
From release 3.3, PCRE has had some support for character strings encoded in
the UTF-8 format. For release 4.0 this was greatly extended to cover most
common requirements, and in release 5.0 additional support for Unicode general
category properties was added.
</P>
<P>
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
the code, and, in addition, you must call
<a href="pcre_compile.html"><b>pcre_compile()</b></a>
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
(*UTF8). When either of these is the case, both the pattern and any subject
strings that are matched against it are treated as UTF-8 strings instead of
just strings of bytes.
</P>
<P>
If you compile PCRE with UTF-8 support, but do not use it at run time, the
library will be a bit bigger, but the additional run time overhead is limited
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
</P>
<P>
If PCRE is built with Unicode character property support (which implies UTF-8
support), the escape sequences \p{..}, \P{..}, and \X are supported.
The available properties that can be tested are limited to the general
category properties such as Lu for an upper case letter or Nd for a decimal
number, the Unicode script names such as Arabic or Han, and the derived
properties Any and L&. A full list is given in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation. Only the short names for properties are supported. For example,
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
compatibility with Perl 5.6. PCRE does not support this.
<a name="utf8strings"></a></P>
<br><b>
Validity of UTF-8 strings
</b><br>
<P>
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions. From
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
themselves derived from the Unicode specification. Earlier releases of PCRE
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
U+10FFFF, excluding U+D800 to U+DFFF.
</P>
<P>
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
Unicode Standard says this: "The Low Surrogate Area does not contain any
character assignments, consequently no character code charts or namelists are
provided for this area. Surrogates are reserved for use with UTF-16 and then
must be used in pairs." The code points that are encoded by UTF-16 pairs are
available as independent code points in the UTF-8 encoding. (In other words,
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
UTF-8.)
</P>
<P>
If an invalid UTF-8 string is passed to PCRE, an error return
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
your strings are valid, and therefore want to skip these checks in order to
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
at run time, PCRE assumes that the pattern or subject it is given
(respectively) contains only valid UTF-8 codes. In this case, it does not
diagnose an invalid UTF-8 string.
</P>
<P>
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
happens depends on why the string is invalid. If the string conforms to the
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
the result is undefined. Your program may crash.
</P>
<P>
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
encoded in a UTF-8-like manner as per the old RFC, you can set
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
situation, you will have to apply your own validity check.
</P>
<br><b>
General comments about UTF-8 mode
</b><br>
<P>
1. An unbraced hexadecimal escape sequence (such as \xb3) matches a two-byte
UTF-8 character if the value is greater than 127.
</P>
<P>
2. Octal numbers up to \777 are recognized, and match two-byte UTF-8
characters for values greater than \177.
</P>
<P>
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
bytes, for example: \x{100}{3}.
</P>
<P>
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
</P>
<P>
5. The escape sequence \C can be used to match a single byte in UTF-8 mode,
but its use can lead to some strange effects. This facility is not available in
the alternative matching function, <b>pcre_dfa_exec()</b>.
</P>
<P>
6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
test characters of any code value, but the characters that PCRE recognizes as
digits, spaces, or word characters remain the same set as before, all with
values less than 256. This remains true even when PCRE includes Unicode
property support, because to do otherwise would slow down PCRE in many common
cases. If you really want to test for a wider sense of, say, "digit", you
must use Unicode property tests such as \p{Nd}. Note that this also applies to
\b, because it is defined in terms of \w and \W.
</P>
<P>
7. Similarly, characters that match the POSIX named character classes are all
low-valued characters.
</P>
<P>
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
(\h, \H, \v, and \V) do match all the appropriate Unicode characters.
</P>
<P>
9. Case-insensitive matching applies only to characters whose values are less
than 128, unless PCRE is built with Unicode property support. Even when Unicode
property support is available, PCRE still uses its own character tables when
checking the case of low-valued characters, so as not to degrade performance.
The Unicode property information is used only for characters with higher
values. Even when Unicode property support is available, PCRE supports
case-insensitive matching only when there is a one-to-one mapping between a
letter's cases. There are a small number of many-to-one mappings in Unicode;
these are not supported by PCRE.
</P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
@ -295,11 +193,11 @@ Putting an actual email address here seems to have been a spam magnet, so I've
taken it away. If you want to email me, use my two initials, followed by the taken it away. If you want to email me, use my two initials, followed by the
two digits 10, at the domain cam.ac.uk. two digits 10, at the domain cam.ac.uk.
</P> </P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br> <br><a name="SEC5" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 11 April 2009 Last updated: 11 November 2012
<br> <br>
Copyright &copy; 1997-2009 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -0,0 +1,383 @@
<html>
<head>
<title>pcre16 specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre16 man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE 16-BIT API BASIC FUNCTIONS</a>
<li><a name="TOC2" href="#SEC2">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a>
<li><a name="TOC3" href="#SEC3">PCRE 16-BIT API AUXILIARY FUNCTIONS</a>
<li><a name="TOC4" href="#SEC4">PCRE 16-BIT API INDIRECTED FUNCTIONS</a>
<li><a name="TOC5" href="#SEC5">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a>
<li><a name="TOC6" href="#SEC6">THE PCRE 16-BIT LIBRARY</a>
<li><a name="TOC7" href="#SEC7">THE HEADER FILE</a>
<li><a name="TOC8" href="#SEC8">THE LIBRARY NAME</a>
<li><a name="TOC9" href="#SEC9">STRING TYPES</a>
<li><a name="TOC10" href="#SEC10">STRUCTURE TYPES</a>
<li><a name="TOC11" href="#SEC11">16-BIT FUNCTIONS</a>
<li><a name="TOC12" href="#SEC12">SUBJECT STRING OFFSETS</a>
<li><a name="TOC13" href="#SEC13">NAMED SUBPATTERNS</a>
<li><a name="TOC14" href="#SEC14">OPTION NAMES</a>
<li><a name="TOC15" href="#SEC15">CHARACTER CODES</a>
<li><a name="TOC16" href="#SEC16">ERROR NAMES</a>
<li><a name="TOC17" href="#SEC17">ERROR TEXTS</a>
<li><a name="TOC18" href="#SEC18">CALLOUTS</a>
<li><a name="TOC19" href="#SEC19">TESTING</a>
<li><a name="TOC20" href="#SEC20">NOT SUPPORTED IN 16-BIT MODE</a>
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
<li><a name="TOC22" href="#SEC22">REVISION</a>
</ul>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
<P>
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<P>
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
</P>
<P>
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<P>
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
<P>
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>);</b>
</P>
<P>
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
</P>
<P>
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
</P>
<P>
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
</P>
<P>
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
<P>
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
</P>
<P>
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
</P>
<P>
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<P>
<b>const unsigned char *pcre16_maketables(void);</b>
</P>
<P>
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
</P>
<P>
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b>const char *pcre16_version(void);</b>
</P>
<P>
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
<P>
<b>void *(*pcre16_malloc)(size_t);</b>
</P>
<P>
<b>void (*pcre16_free)(void *);</b>
</P>
<P>
<b>void *(*pcre16_stack_malloc)(size_t);</b>
</P>
<P>
<b>void (*pcre16_stack_free)(void *);</b>
</P>
<P>
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
</P>
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
<P>
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
<b>int <i>keep_boms</i>);</b>
</P>
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
<P>
Starting with release 8.30, it is possible to compile a PCRE library that
supports 16-bit character strings, including UTF-16 strings, as well as or
instead of the original 8-bit library. The majority of the work to make this
possible was done by Zoltan Herczeg. The two libraries contain identical sets
of functions, used in exactly the same way. Only the names of the functions and
the data types of their arguments and results are different. To avoid
over-complication and reduce the documentation maintenance load, most of the
PCRE documentation describes the 8-bit library, with only occasional references
to the 16-bit library. This page describes what is different when you use the
16-bit library.
</P>
<P>
WARNING: A single application can be linked with both libraries, but you must
take care when processing any particular pattern to use functions from just one
library. For example, if you want to study a pattern that was compiled with
<b>pcre16_compile()</b>, you must do so with <b>pcre16_study()</b>, not
<b>pcre_study()</b>, and you must free the study data with
<b>pcre16_free_study()</b>.
</P>
<br><a name="SEC7" href="#TOC1">THE HEADER FILE</a><br>
<P>
There is only one header file, <b>pcre.h</b>. It contains prototypes for all the
functions in all libraries, as well as definitions of flags, structures, error
codes, etc.
</P>
<br><a name="SEC8" href="#TOC1">THE LIBRARY NAME</a><br>
<P>
In Unix-like systems, the 16-bit library is called <b>libpcre16</b>, and can
normally be accesss by adding <b>-lpcre16</b> to the command for linking an
application that uses PCRE.
</P>
<br><a name="SEC9" href="#TOC1">STRING TYPES</a><br>
<P>
In the 8-bit library, strings are passed to PCRE library functions as vectors
of bytes with the C type "char *". In the 16-bit library, strings are passed as
vectors of unsigned 16-bit quantities. The macro PCRE_UCHAR16 specifies an
appropriate data type, and PCRE_SPTR16 is defined as "const PCRE_UCHAR16 *". In
very many environments, "short int" is a 16-bit data type. When PCRE is built,
it defines PCRE_UCHAR16 as "unsigned short int", but checks that it really is a
16-bit data type. If it is not, the build fails with an error message telling
the maintainer to modify the definition appropriately.
</P>
<br><a name="SEC10" href="#TOC1">STRUCTURE TYPES</a><br>
<P>
The types of the opaque structures that are used for compiled 16-bit patterns
and JIT stacks are <b>pcre16</b> and <b>pcre16_jit_stack</b> respectively. The
type of the user-accessible structure that is returned by <b>pcre16_study()</b>
is <b>pcre16_extra</b>, and the type of the structure that is used for passing
data to a callout function is <b>pcre16_callout_block</b>. These structures
contain the same fields, with the same names, as their 8-bit counterparts. The
only difference is that pointers to character strings are 16-bit instead of
8-bit types.
</P>
<br><a name="SEC11" href="#TOC1">16-BIT FUNCTIONS</a><br>
<P>
For every function in the 8-bit library there is a corresponding function in
the 16-bit library with a name that starts with <b>pcre16_</b> instead of
<b>pcre_</b>. The prototypes are listed above. In addition, there is one extra
function, <b>pcre16_utf16_to_host_byte_order()</b>. This is a utility function
that converts a UTF-16 character string to host byte order if necessary. The
other 16-bit functions expect the strings they are passed to be in host byte
order.
</P>
<P>
The <i>input</i> and <i>output</i> arguments of
<b>pcre16_utf16_to_host_byte_order()</b> may point to the same address, that is,
conversion in place is supported. The output buffer must be at least as long as
the input.
</P>
<P>
The <i>length</i> argument specifies the number of 16-bit data units in the
input string; a negative value specifies a zero-terminated string.
</P>
<P>
If <i>byte_order</i> is NULL, it is assumed that the string starts off in host
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
string (commonly as the first character).
</P>
<P>
If <i>byte_order</i> is not NULL, a non-zero value of the integer to which it
points means that the input starts off in host byte order, otherwise the
opposite order is assumed. Again, BOMs in the string can change this. The final
byte order is passed back at the end of processing.
</P>
<P>
If <i>keep_boms</i> is not zero, byte-order mark characters (0xfeff) are copied
into the output string. Otherwise they are discarded.
</P>
<P>
The result of the function is the number of 16-bit units placed into the output
buffer, including the zero terminator if the string was zero-terminated.
</P>
<br><a name="SEC12" href="#TOC1">SUBJECT STRING OFFSETS</a><br>
<P>
The offsets within subject strings that are returned by the matching functions
are in 16-bit units rather than bytes.
</P>
<br><a name="SEC13" href="#TOC1">NAMED SUBPATTERNS</a><br>
<P>
The name-to-number translation table that is maintained for named subpatterns
uses 16-bit characters. The <b>pcre16_get_stringtable_entries()</b> function
returns the length of each entry in the table as the number of 16-bit data
units.
</P>
<br><a name="SEC14" href="#TOC1">OPTION NAMES</a><br>
<P>
There are two new general option names, PCRE_UTF16 and PCRE_NO_UTF16_CHECK,
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
fact, these new options define the same bits in the options word. There is a
discussion about the
<a href="pcreunicode.html#utf16strings">validity of UTF-16 strings</a>
in the
<a href="pcreunicode.html"><b>pcreunicode</b></a>
page.
</P>
<P>
For the <b>pcre16_config()</b> function there is an option PCRE_CONFIG_UTF16
that returns 1 if UTF-16 support is configured, otherwise 0. If this option is
given to <b>pcre_config()</b> or <b>pcre32_config()</b>, or if the
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 option is given to <b>pcre16_config()</b>,
the result is the PCRE_ERROR_BADOPTION error.
</P>
<br><a name="SEC15" href="#TOC1">CHARACTER CODES</a><br>
<P>
In 16-bit mode, when PCRE_UTF16 is not set, character values are treated in the
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
from 0 to 0xffff instead of 0 to 0xff. Character types for characters less than
0xff can therefore be influenced by the locale in the same way as before.
Characters greater than 0xff have only one case, and no "type" (such as letter
or digit).
</P>
<P>
In UTF-16 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
the exception of values in the range 0xd800 to 0xdfff because those are
"surrogate" values that are used in pairs to encode values greater than 0xffff.
</P>
<P>
A UTF-16 string can indicate its endianness by special code knows as a
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
to be in host byte order. A utility function called
<b>pcre16_utf16_to_host_byte_order()</b> is provided to help with this (see
above).
</P>
<br><a name="SEC16" href="#TOC1">ERROR NAMES</a><br>
<P>
The errors PCRE_ERROR_BADUTF16_OFFSET and PCRE_ERROR_SHORTUTF16 correspond to
their 8-bit counterparts. The error PCRE_ERROR_BADMODE is given when a compiled
pattern is passed to a function that processes patterns in the other
mode, for example, if a pattern compiled with <b>pcre_compile()</b> is passed to
<b>pcre16_exec()</b>.
</P>
<P>
There are new error codes whose names begin with PCRE_UTF16_ERR for invalid
UTF-16 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
are described in the section entitled
<a href="pcreapi.html#badutf8reasons">"Reason codes for invalid UTF-8 strings"</a>
in the main
<a href="pcreapi.html"><b>pcreapi</b></a>
page. The UTF-16 errors are:
<pre>
PCRE_UTF16_ERR1 Missing low surrogate at end of string
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
PCRE_UTF16_ERR4 Non-character
</PRE>
</P>
<br><a name="SEC17" href="#TOC1">ERROR TEXTS</a><br>
<P>
If there is an error while compiling a pattern, the error text that is passed
back by <b>pcre16_compile()</b> or <b>pcre16_compile2()</b> is still an 8-bit
character string, zero-terminated.
</P>
<br><a name="SEC18" href="#TOC1">CALLOUTS</a><br>
<P>
The <i>subject</i> and <i>mark</i> fields in the callout block that is passed to
a callout function point to 16-bit vectors.
</P>
<br><a name="SEC19" href="#TOC1">TESTING</a><br>
<P>
The <b>pcretest</b> program continues to operate with 8-bit input and output
files, but it can be used for testing the 16-bit library. If it is run with the
command line option <b>-16</b>, patterns and subject strings are converted from
8-bit to 16-bit before being passed to PCRE, and the 16-bit library functions
are used instead of the 8-bit ones. Returned 16-bit strings are converted to
8-bit for output. If both the 8-bit and the 32-bit libraries were not compiled,
<b>pcretest</b> defaults to 16-bit and the <b>-16</b> option is ignored.
</P>
<P>
When PCRE is being built, the <b>RunTest</b> script that is called by "make
check" uses the <b>pcretest</b> <b>-C</b> option to discover which of the 8-bit,
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
</P>
<br><a name="SEC20" href="#TOC1">NOT SUPPORTED IN 16-BIT MODE</a><br>
<P>
Not all the features of the 8-bit library are available with the 16-bit
library. The C++ and POSIX wrapper functions support only the 8-bit library,
and the <b>pcregrep</b> program is at present 8-bit only.
</P>
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
<P>
Last updated: 08 November 2012
<br>
Copyright &copy; 1997-2012 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -0,0 +1,76 @@
<html>
<head>
<title>pcre_assign_jit_stack specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_assign_jit_stack man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<P>
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<P>
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function provides control over the memory used as a stack at run-time by a
call to <b>pcre[16|32]_exec()</b> with a pattern that has been successfully
compiled with JIT optimization. The arguments are:
<pre>
extra the data pointer returned by <b>pcre[16|32]_study()</b>
callback a callback function
data a JIT stack or a value to be passed to the callback
function
</PRE>
</P>
<P>
If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block on
the machine stack is used.
</P>
<P>
If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must
be a valid JIT stack, the result of calling <b>pcre[16|32]_jit_stack_alloc()</b>.
</P>
<P>
If <i>callback</i> not NULL, it is called with <i>data</i> as an argument at
the start of matching, in order to set up a JIT stack. If the result is NULL,
the internal 32K stack is used; otherwise the return value must be a valid JIT
stack, the result of calling <b>pcre[16|32]_jit_stack_alloc()</b>.
</P>
<P>
You may safely assign the same JIT stack to multiple patterns, as long as they
are all matched in the same thread. In a multithread application, each thread
must use its own JIT stack. For more details, see the
<a href="pcrejit.html"><b>pcrejit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -23,13 +23,23 @@ SYNOPSIS
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b> <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b> <b>const unsigned char *<i>tableptr</i>);</b>
</P> </P>
<P>
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This function compiles a regular expression into an internal form. It is the This function compiles a regular expression into an internal form. It is the
same as <b>pcre_compile2()</b>, except for the absence of the <i>errorcodeptr</i> same as <b>pcre[16|32]_compile2()</b>, except for the absence of the
argument. Its arguments are: <i>errorcodeptr</i> argument. Its arguments are:
<pre> <pre>
<i>pattern</i> A zero-terminated string containing the <i>pattern</i> A zero-terminated string containing the
regular expression to be compiled regular expression to be compiled
@ -49,7 +59,7 @@ The option bits are:
PCRE_DOLLAR_ENDONLY $ not to match newline at end PCRE_DOLLAR_ENDONLY $ not to match newline at end
PCRE_DOTALL . matches anything including NL PCRE_DOTALL . matches anything including NL
PCRE_DUPNAMES Allow duplicate names for subpatterns PCRE_DUPNAMES Allow duplicate names for subpatterns
PCRE_EXTENDED Ignore whitespace and # comments PCRE_EXTENDED Ignore white space and # comments
PCRE_EXTRA PCRE extra features PCRE_EXTRA PCRE extra features
(not much use currently) (not much use currently)
PCRE_FIRSTLINE Force matching to be before newline PCRE_FIRSTLINE Force matching to be before newline
@ -63,14 +73,23 @@ The option bits are:
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren- PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available) theses (named ones available)
PCRE_UNGREEDY Invert greediness of quantifiers PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
PCRE_UTF8 Run in UTF-8 mode validity (only relevant if
PCRE_UTF16 is set)
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
validity (only relevant if
PCRE_UTF32 is set)
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8 PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
validity (only relevant if validity (only relevant if
PCRE_UTF8 is set) PCRE_UTF8 is set)
PCRE_UCP Use Unicode properties for \d, \w, etc.
PCRE_UNGREEDY Invert greediness of quantifiers
PCRE_UTF16 Run in <b>pcre16_compile()</b> UTF-16 mode
PCRE_UTF32 Run in <b>pcre32_compile()</b> UTF-32 mode
PCRE_UTF8 Run in <b>pcre_compile()</b> UTF-8 mode
</pre> </pre>
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
PCRE_NO_UTF8_CHECK. PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
</P> </P>
<P> <P>
The yield of the function is a pointer to a private data structure that The yield of the function is a pointer to a private data structure that

View File

@ -24,15 +24,25 @@ SYNOPSIS
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b> <b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b> <b>const unsigned char *<i>tableptr</i>);</b>
</P> </P>
<P>
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This function compiles a regular expression into an internal form. It is the This function compiles a regular expression into an internal form. It is the
same as <b>pcre_compile()</b>, except for the addition of the <i>errorcodeptr</i> same as <b>pcre[16|32]_compile()</b>, except for the addition of the
argument. The arguments are: <i>errorcodeptr</i> argument. The arguments are:
</P>
<P>
<pre> <pre>
<i>pattern</i> A zero-terminated string containing the <i>pattern</i> A zero-terminated string containing the
regular expression to be compiled regular expression to be compiled
@ -45,32 +55,45 @@ argument. The arguments are:
</pre> </pre>
The option bits are: The option bits are:
<pre> <pre>
PCRE_ANCHORED Force pattern anchoring PCRE_ANCHORED Force pattern anchoring
PCRE_AUTO_CALLOUT Compile automatic callouts PCRE_AUTO_CALLOUT Compile automatic callouts
PCRE_CASELESS Do caseless matching PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
PCRE_DOLLAR_ENDONLY $ not to match newline at end PCRE_BSR_UNICODE \R matches all Unicode line endings
PCRE_DOTALL . matches anything including NL PCRE_CASELESS Do caseless matching
PCRE_DUPNAMES Allow duplicate names for subpatterns PCRE_DOLLAR_ENDONLY $ not to match newline at end
PCRE_EXTENDED Ignore whitespace and # comments PCRE_DOTALL . matches anything including NL
PCRE_EXTRA PCRE extra features PCRE_DUPNAMES Allow duplicate names for subpatterns
(not much use currently) PCRE_EXTENDED Ignore white space and # comments
PCRE_FIRSTLINE Force matching to be before newline PCRE_EXTRA PCRE extra features
PCRE_MULTILINE ^ and $ match newlines within data (not much use currently)
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence PCRE_FIRSTLINE Force matching to be before newline
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
PCRE_NEWLINE_CR Set CR as the newline sequence PCRE_MULTILINE ^ and $ match newlines within data
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren- sequences
theses (named ones available) PCRE_NEWLINE_CR Set CR as the newline sequence
PCRE_UNGREEDY Invert greediness of quantifiers PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
PCRE_UTF8 Run in UTF-8 mode PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8 PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
validity (only relevant if theses (named ones available)
PCRE_UTF8 is set) PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
validity (only relevant if
PCRE_UTF32 is set)
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
validity (only relevant if
PCRE_UTF8 is set)
PCRE_UCP Use Unicode properties for \d, \w, etc.
PCRE_UNGREEDY Invert greediness of quantifiers
PCRE_UTF16 Run <b>pcre16_compile()</b> in UTF-16 mode
PCRE_UTF32 Run <b>pcre32_compile()</b> in UTF-32 mode
PCRE_UTF8 Run <b>pcre_compile()</b> in UTF-8 mode
</pre> </pre>
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
PCRE_NO_UTF8_CHECK. PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
</P> </P>
<P> <P>
The yield of the function is a pointer to a private data structure that The yield of the function is a pointer to a private data structure that

View File

@ -21,19 +21,32 @@ SYNOPSIS
<P> <P>
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b> <b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
</P> </P>
<P>
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This function makes it possible for a client program to find out which optional This function makes it possible for a client program to find out which optional
features are available in the version of the PCRE library it is using. Its features are available in the version of the PCRE library it is using. The
arguments are as follows: arguments are as follows:
<pre> <pre>
<i>what</i> A code specifying what information is required <i>what</i> A code specifying what information is required
<i>where</i> Points to where to put the data <i>where</i> Points to where to put the data
</pre> </pre>
The available codes are: The <i>where</i> argument must point to an integer variable, except for
PCRE_CONFIG_MATCH_LIMIT and PCRE_CONFIG_MATCH_LIMIT_RECURSION, when it must
point to an unsigned long integer. The available codes are:
<pre> <pre>
PCRE_CONFIG_JIT Availability of just-in-time compiler
support (1=yes 0=no)
PCRE_CONFIG_JITTARGET String containing information about the
target architecture for the JIT compiler,
or NULL if there is no JIT support
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4 PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
PCRE_CONFIG_MATCH_LIMIT Internal resource limit PCRE_CONFIG_MATCH_LIMIT Internal resource limit
PCRE_CONFIG_MATCH_LIMIT_RECURSION PCRE_CONFIG_MATCH_LIMIT_RECURSION
@ -48,16 +61,24 @@ The available codes are:
0 all Unicode line endings 0 all Unicode line endings
1 CR, LF, or CRLF only 1 CR, LF, or CRLF only
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
Threshold of return slots, above Threshold of return slots, above which
which <b>malloc()</b> is used by <b>malloc()</b> is used by the POSIX API
the POSIX API
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap) PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no) PCRE_CONFIG_UTF16 Availability of UTF-16 support (1=yes
0=no); option for <b>pcre16_config()</b>
PCRE_CONFIG_UTF32 Availability of UTF-32 support (1=yes
0=no); option for <b>pcre32_config()</b>
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no);
option for <b>pcre_config()</b>
PCRE_CONFIG_UNICODE_PROPERTIES PCRE_CONFIG_UNICODE_PROPERTIES
Availability of Unicode property support Availability of Unicode property support
(1=yes 0=no) (1=yes 0=no)
</pre> </pre>
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. That error
is also given if PCRE_CONFIG_UTF16 or PCRE_CONFIG_UTF32 is passed to
<b>pcre_config()</b>, if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 is passed to
<b>pcre16_config()</b>, or if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 is passed to
<b>pcre32_config()</b>.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -24,6 +24,18 @@ SYNOPSIS
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b> <b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
</P> </P>
<P>
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -33,8 +45,8 @@ by name, into a given buffer. The arguments are:
<pre> <pre>
<i>code</i> Pattern that was successfully matched <i>code</i> Pattern that was successfully matched
<i>subject</i> Subject that has been successfully matched <i>subject</i> Subject that has been successfully matched
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used <i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
<i>stringcount</i> Value returned by <b>pcre_exec()</b> <i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
<i>stringname</i> Name of the required substring <i>stringname</i> Name of the required substring
<i>buffer</i> Buffer to receive the string <i>buffer</i> Buffer to receive the string
<i>buffersize</i> Size of buffer <i>buffersize</i> Size of buffer

View File

@ -23,6 +23,16 @@ SYNOPSIS
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b> <b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b> <b>int <i>buffersize</i>);</b>
</P> </P>
<P>
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -31,8 +41,8 @@ This is a convenience function for extracting a captured substring into a given
buffer. The arguments are: buffer. The arguments are:
<pre> <pre>
<i>subject</i> Subject that has been successfully matched <i>subject</i> Subject that has been successfully matched
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used <i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
<i>stringcount</i> Value returned by <b>pcre_exec()</b> <i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
<i>stringnumber</i> Number of the required substring <i>stringnumber</i> Number of the required substring
<i>buffer</i> Buffer to receive the string <i>buffer</i> Buffer to receive the string
<i>buffersize</i> Size of buffer <i>buffersize</i> Size of buffer

View File

@ -24,6 +24,18 @@ SYNOPSIS
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b> <b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b> <b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P> </P>
<P>
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<P>
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -31,10 +43,11 @@ DESCRIPTION
This function matches a compiled regular expression against a given subject This function matches a compiled regular expression against a given subject
string, using an alternative matching algorithm that scans the subject string string, using an alternative matching algorithm that scans the subject string
just once (<i>not</i> Perl-compatible). Note that the main, Perl-compatible, just once (<i>not</i> Perl-compatible). Note that the main, Perl-compatible,
matching function is <b>pcre_exec()</b>. The arguments for this function are: matching function is <b>pcre[16|32]_exec()</b>. The arguments for this function
are:
<pre> <pre>
<i>code</i> Points to the compiled pattern <i>code</i> Points to the compiled pattern
<i>extra</i> Points to an associated <b>pcre_extra</b> structure, <i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
or is NULL or is NULL
<i>subject</i> Points to the subject string <i>subject</i> Points to the subject string
<i>length</i> Length of the subject string, in bytes <i>length</i> Length of the subject string, in bytes
@ -48,44 +61,61 @@ matching function is <b>pcre_exec()</b>. The arguments for this function are:
</pre> </pre>
The options are: The options are:
<pre> <pre>
PCRE_ANCHORED Match only at the first position PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
PCRE_BSR_UNICODE \R matches all Unicode line endings PCRE_BSR_UNICODE \R matches all Unicode line endings
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
PCRE_NEWLINE_CR Set CR as the newline sequence PCRE_NEWLINE_CR Recognize CR as the only newline sequence
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_LF Recognize LF as the only newline sequence
PCRE_NOTBOL Subject is not the beginning of a line PCRE_NOTBOL Subject is not the beginning of a line
PCRE_NOTEOL Subject is not the end of a line PCRE_NOTEOL Subject is not the end of a line
PCRE_NOTEMPTY An empty string is not a valid match PCRE_NOTEMPTY An empty string is not a valid match
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8 is not a valid match
validity (only relevant if PCRE_UTF8 PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
was set at compile time) PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match validity (only relevant if PCRE_UTF16
PCRE_DFA_SHORTEST Return only the shortest match was set at compile time)
PCRE_DFA_RESTART This is a restart after a partial match PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
validity (only relevant if PCRE_UTF32
was set at compile time)
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
PCRE_PARTIAL_SOFT ) match if no full matches are found
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
even if there is a full match as well
PCRE_DFA_SHORTEST Return only the shortest match
PCRE_DFA_RESTART Restart after a partial match
</pre> </pre>
There are restrictions on what may appear in a pattern when using this matching There are restrictions on what may appear in a pattern when using this matching
function. Details are given in the function. Details are given in the
<a href="pcrematching.html"><b>pcrematching</b></a> <a href="pcrematching.html"><b>pcrematching</b></a>
documentation. documentation. For details of partial matching, see the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
page.
</P> </P>
<P> <P>
A <b>pcre_extra</b> structure contains the following fields: A <b>pcre[16|32]_extra</b> structure contains the following fields:
<pre> <pre>
<i>flags</i> Bits indicating which fields are set <i>flags</i> Bits indicating which fields are set
<i>study_data</i> Opaque data from <b>pcre_study()</b> <i>study_data</i> Opaque data from <b>pcre[16|32]_study()</b>
<i>match_limit</i> Limit on internal resource use <i>match_limit</i> Limit on internal resource use
<i>match_limit_recursion</i> Limit on internal recursion depth <i>match_limit_recursion</i> Limit on internal recursion depth
<i>callout_data</i> Opaque data passed back to callouts <i>callout_data</i> Opaque data passed back to callouts
<i>tables</i> Points to character tables or is NULL <i>tables</i> Points to character tables or is NULL
<i>mark</i> For passing back a *MARK pointer
<i>executable_jit</i> Opaque data from JIT compilation
</pre> </pre>
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
PCRE_EXTRA_TABLES. For this matching function, the <i>match_limit</i> and PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. For this
<i>match_limit_recursion</i> fields are not used, and must not be set. matching function, the <i>match_limit</i> and <i>match_limit_recursion</i> fields
are not used, and must not be set. The PCRE_EXTRA_EXECUTABLE_JIT flag and
the corresponding variable are ignored.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -23,6 +23,16 @@ SYNOPSIS
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b> <b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b> <b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P> </P>
<P>
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<P>
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -32,7 +42,7 @@ string, using a matching algorithm that is similar to Perl's. It returns
offsets to captured substrings. Its arguments are: offsets to captured substrings. Its arguments are:
<pre> <pre>
<i>code</i> Points to the compiled pattern <i>code</i> Points to the compiled pattern
<i>extra</i> Points to an associated <b>pcre_extra</b> structure, <i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
or is NULL or is NULL
<i>subject</i> Points to the subject string <i>subject</i> Points to the subject string
<i>length</i> Length of the subject string, in bytes <i>length</i> Length of the subject string, in bytes
@ -44,41 +54,50 @@ offsets to captured substrings. Its arguments are:
</pre> </pre>
The options are: The options are:
<pre> <pre>
PCRE_ANCHORED Match only at the first position PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF PCRE_BSR_ANYCRLF \R matches only CR, LF, or CRLF
PCRE_BSR_UNICODE \R matches all Unicode line endings PCRE_BSR_UNICODE \R matches all Unicode line endings
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
PCRE_NEWLINE_CR Set CR as the newline sequence PCRE_NEWLINE_CR Recognize CR as the only newline sequence
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_LF Recognize LF as the only newline sequence
PCRE_NOTBOL Subject is not the beginning of a line PCRE_NOTBOL Subject string is not the beginning of a line
PCRE_NOTEOL Subject is not the end of a line PCRE_NOTEOL Subject string is not the end of a line
PCRE_NOTEMPTY An empty string is not a valid match PCRE_NOTEMPTY An empty string is not a valid match
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8 is not a valid match
validity (only relevant if PCRE_UTF8 PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
was set at compile time) PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match validity (only relevant if PCRE_UTF16
was set at compile time)
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
validity (only relevant if PCRE_UTF32
was set at compile time)
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
PCRE_PARTIAL_SOFT ) match if no full matches are found
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
if that is found before a full match
</pre> </pre>
There are restrictions on what may appear in a pattern when partial matching is For details of partial matching, see the
requested. For details, see the
<a href="pcrepartial.html"><b>pcrepartial</b></a> <a href="pcrepartial.html"><b>pcrepartial</b></a>
page. page. A <b>pcre_extra</b> structure contains the following fields:
</P>
<P>
A <b>pcre_extra</b> structure contains the following fields:
<pre> <pre>
<i>flags</i> Bits indicating which fields are set <i>flags</i> Bits indicating which fields are set
<i>study_data</i> Opaque data from <b>pcre_study()</b> <i>study_data</i> Opaque data from <b>pcre[16|32]_study()</b>
<i>match_limit</i> Limit on internal resource use <i>match_limit</i> Limit on internal resource use
<i>match_limit_recursion</i> Limit on internal recursion depth <i>match_limit_recursion</i> Limit on internal recursion depth
<i>callout_data</i> Opaque data passed back to callouts <i>callout_data</i> Opaque data passed back to callouts
<i>tables</i> Points to character tables or is NULL <i>tables</i> Points to character tables or is NULL
<i>mark</i> For passing back a *MARK pointer
<i>executable_jit</i> Opaque data from JIT compilation
</pre> </pre>
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
PCRE_EXTRA_TABLES. PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -1,9 +1,9 @@
<html> <html>
<head> <head>
<title>pcre_info specification</title> <title>pcre_free_study specification</title>
</head> </head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB"> <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_info man page</h1> <h1>pcre_free_study man page</h1>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.
</p> </p>
@ -19,14 +19,21 @@ SYNOPSIS
<b>#include &#60;pcre.h&#62;</b> <b>#include &#60;pcre.h&#62;</b>
</P> </P>
<P> <P>
<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b> <b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
<b>*<i>firstcharptr</i>);</b> </P>
<P>
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
</P>
<P>
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
</P> </P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This function is obsolete. You should be using <b>pcre_fullinfo()</b> instead. This function is used to free the memory used for the data generated by a call
to <b>pcre[16|32]_study()</b> when it is no longer needed. The argument must be the
result of such a call.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -21,13 +21,19 @@ SYNOPSIS
<P> <P>
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b> <b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
</P> </P>
<P>
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
</P>
<P>
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This is a convenience function for freeing the store obtained by a previous This is a convenience function for freeing the store obtained by a previous
call to <b>pcre_get_substring()</b> or <b>pcre_get_named_substring()</b>. Its call to <b>pcre[16|32]_get_substring()</b> or <b>pcre[16|32]_get_named_substring()</b>.
only argument is a pointer to the string. Its only argument is a pointer to the string.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -21,13 +21,19 @@ SYNOPSIS
<P> <P>
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b> <b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
</P> </P>
<P>
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This is a convenience function for freeing the store obtained by a previous This is a convenience function for freeing the store obtained by a previous
call to <b>pcre_get_substring_list()</b>. Its only argument is a pointer to the call to <b>pcre[16|32]_get_substring_list()</b>. Its only argument is a pointer to
list of string pointers. the list of string pointers.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -22,6 +22,14 @@ SYNOPSIS
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b> <b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b> <b>int <i>what</i>, void *<i>where</i>);</b>
</P> </P>
<P>
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -29,7 +37,7 @@ DESCRIPTION
This function returns information about a compiled pattern. Its arguments are: This function returns information about a compiled pattern. Its arguments are:
<pre> <pre>
<i>code</i> Compiled regular expression <i>code</i> Compiled regular expression
<i>extra</i> Result of <b>pcre_study()</b> or NULL <i>extra</i> Result of <b>pcre[16|32]_study()</b> or NULL
<i>what</i> What information is required <i>what</i> What information is required
<i>where</i> Where to put the information <i>where</i> Where to put the information
</pre> </pre>
@ -38,20 +46,48 @@ The following information is available:
PCRE_INFO_BACKREFMAX Number of highest back reference PCRE_INFO_BACKREFMAX Number of highest back reference
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
PCRE_INFO_DEFAULT_TABLES Pointer to default tables PCRE_INFO_DEFAULT_TABLES Pointer to default tables
PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or PCRE_INFO_FIRSTBYTE Fixed first data unit for a match, or
-1 for start of string -1 for start of string
or after newline, or or after newline, or
-2 otherwise -2 otherwise
PCRE_INFO_FIRSTTABLE Table of first bytes (after studying) PCRE_INFO_FIRSTTABLE Table of first data units (after studying)
PCRE_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE_INFO_LASTLITERAL Literal last byte required PCRE_INFO_JIT Return 1 after successful JIT compilation
PCRE_INFO_JITSIZE Size of JIT compiled code
PCRE_INFO_LASTLITERAL Literal last data unit required
PCRE_INFO_MINLENGTH Lower bound length of matching strings
PCRE_INFO_NAMECOUNT Number of named subpatterns PCRE_INFO_NAMECOUNT Number of named subpatterns
PCRE_INFO_NAMEENTRYSIZE Size of name table entry PCRE_INFO_NAMEENTRYSIZE Size of name table entry
PCRE_INFO_NAMETABLE Pointer to name table PCRE_INFO_NAMETABLE Pointer to name table
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
(always returns 1 after release 8.00)
PCRE_INFO_OPTIONS Option bits used for compilation PCRE_INFO_OPTIONS Option bits used for compilation
PCRE_INFO_SIZE Size of compiled pattern PCRE_INFO_SIZE Size of compiled pattern
PCRE_INFO_STUDYSIZE Size of study data PCRE_INFO_STUDYSIZE Size of study data
PCRE_INFO_FIRSTCHARACTER Fixed first data unit for a match
PCRE_INFO_FIRSTCHARACTERFLAGS Returns
1 if there is a first data character set, which can
then be retrieved using PCRE_INFO_FIRSTCHARACTER,
2 if the first character is at the start of the data
string or after a newline, and
0 otherwise
PCRE_INFO_REQUIREDCHAR Literal last data unit required
PCRE_INFO_REQUIREDCHARFLAGS Returns 1 if the last data character is set (which can then
be retrieved using PCRE_INFO_REQUIREDCHAR); 0 otherwise
</pre>
The <i>where</i> argument must point to an integer variable, except for the
following <i>what</i> values:
<pre>
PCRE_INFO_DEFAULT_TABLES const unsigned char *
PCRE_INFO_FIRSTTABLE const unsigned char *
PCRE_INFO_NAMETABLE PCRE_SPTR16 (16-bit library)
PCRE_INFO_NAMETABLE PCRE_SPTR32 (32-bit library)
PCRE_INFO_NAMETABLE const unsigned char * (8-bit library)
PCRE_INFO_OPTIONS unsigned long int
PCRE_INFO_SIZE size_t
PCRE_INFO_FIRSTCHARACTER uint32_t
PCRE_INFO_REQUIREDCHAR uint32_t
</pre> </pre>
The yield of the function is zero on success or: The yield of the function is zero on success or:
<pre> <pre>

View File

@ -24,6 +24,18 @@ SYNOPSIS
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b> <b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>const char **<i>stringptr</i>);</b> <b>const char **<i>stringptr</i>);</b>
</P> </P>
<P>
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -33,16 +45,17 @@ arguments are:
<pre> <pre>
<i>code</i> Compiled pattern <i>code</i> Compiled pattern
<i>subject</i> Subject that has been successfully matched <i>subject</i> Subject that has been successfully matched
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used <i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
<i>stringcount</i> Value returned by <b>pcre_exec()</b> <i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
<i>stringname</i> Name of the required substring <i>stringname</i> Name of the required substring
<i>stringptr</i> Where to put the string pointer <i>stringptr</i> Where to put the string pointer
</pre> </pre>
The memory in which the substring is placed is obtained by calling The memory in which the substring is placed is obtained by calling
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can <b>pcre[16|32]_malloc()</b>. The convenience function
be used to free it when it is no longer needed. The yield of the function is <b>pcre[16|32]_free_substring()</b> can be used to free it when it is no longer
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory needed. The yield of the function is the length of the extracted substring,
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid. PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
PCRE_ERROR_NOSUBSTRING if the string name is invalid.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -22,6 +22,14 @@ SYNOPSIS
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b> <b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>);</b> <b>const char *<i>name</i>);</b>
</P> </P>
<P>
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>);</b>
</P>
<P>
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>name</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -35,8 +43,8 @@ parenthesis in a compiled pattern. Its arguments are:
The yield of the function is the number of the parenthesis if the name is The yield of the function is the number of the parenthesis if the name is
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by (PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
<b>pcre_get_stringnumber()</b>. You can obtain the complete list by calling <b>pcre[16|32]_get_stringnumber()</b>. You can obtain the complete list by calling
<b>pcre_get_stringtable_entries()</b>. <b>pcre[16|32]_get_stringtable_entries()</b>.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -22,6 +22,14 @@ SYNOPSIS
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b> <b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b> <b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
</P> </P>
<P>
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
</P>
<P>
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -29,7 +37,7 @@ DESCRIPTION
This convenience function finds, for a compiled pattern, the first and last This convenience function finds, for a compiled pattern, the first and last
entries for a given name in the table that translates capturing parenthesis entries for a given name in the table that translates capturing parenthesis
names into numbers. When names are required to be unique (PCRE_DUPNAMES is names into numbers. When names are required to be unique (PCRE_DUPNAMES is
<i>not</i> set), it is usually easier to use <b>pcre_get_stringnumber()</b> <i>not</i> set), it is usually easier to use <b>pcre[16|32]_get_stringnumber()</b>
instead. instead.
<pre> <pre>
<i>code</i> Compiled regular expression <i>code</i> Compiled regular expression

View File

@ -23,6 +23,16 @@ SYNOPSIS
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b> <b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>const char **<i>stringptr</i>);</b> <b>const char **<i>stringptr</i>);</b>
</P> </P>
<P>
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
</P>
<P>
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -31,16 +41,17 @@ This is a convenience function for extracting a captured substring. The
arguments are: arguments are:
<pre> <pre>
<i>subject</i> Subject that has been successfully matched <i>subject</i> Subject that has been successfully matched
<i>ovector</i> Offset vector that <b>pcre_exec()</b> used <i>ovector</i> Offset vector that <b>pcre[16|32]_exec()</b> used
<i>stringcount</i> Value returned by <b>pcre_exec()</b> <i>stringcount</i> Value returned by <b>pcre[16|32]_exec()</b>
<i>stringnumber</i> Number of the required substring <i>stringnumber</i> Number of the required substring
<i>stringptr</i> Where to put the string pointer <i>stringptr</i> Where to put the string pointer
</pre> </pre>
The memory in which the substring is placed is obtained by calling The memory in which the substring is placed is obtained by calling
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can <b>pcre[16|32]_malloc()</b>. The convenience function
be used to free it when it is no longer needed. The yield of the function is <b>pcre[16|32]_free_substring()</b> can be used to free it when it is no longer
the length of the substring, PCRE_ERROR_NOMEMORY if sufficient memory could not needed. The yield of the function is the length of the substring,
be obtained, or PCRE_ERROR_NOSUBSTRING if the string number is invalid. PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
PCRE_ERROR_NOSUBSTRING if the string number is invalid.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -22,6 +22,14 @@ SYNOPSIS
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b> <b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b> <b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
</P> </P>
<P>
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
</P>
<P>
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -30,17 +38,17 @@ This is a convenience function for extracting a list of all the captured
substrings. The arguments are: substrings. The arguments are:
<pre> <pre>
<i>subject</i> Subject that has been successfully matched <i>subject</i> Subject that has been successfully matched
<i>ovector</i> Offset vector that <b>pcre_exec</b> used <i>ovector</i> Offset vector that <b>pcre[16|32]_exec</b> used
<i>stringcount</i> Value returned by <b>pcre_exec</b> <i>stringcount</i> Value returned by <b>pcre[16|32]_exec</b>
<i>listptr</i> Where to put a pointer to the list <i>listptr</i> Where to put a pointer to the list
</pre> </pre>
The memory in which the substrings and the list are placed is obtained by The memory in which the substrings and the list are placed is obtained by
calling <b>pcre_malloc()</b>. The convenience function calling <b>pcre[16|32]_malloc()</b>. The convenience function
<b>pcre_free_substring_list()</b> can be used to free it when it is no longer <b>pcre[16|32]_free_substring_list()</b> can be used to free it when it is no
needed. A pointer to a list of pointers is put in the variable whose address is longer needed. A pointer to a list of pointers is put in the variable whose
in <i>listptr</i>. The list is terminated by a NULL pointer. The yield of the address is in <i>listptr</i>. The list is terminated by a NULL pointer. The
function is zero on success or PCRE_ERROR_NOMEMORY if sufficient memory could yield of the function is zero on success or PCRE_ERROR_NOMEMORY if sufficient
not be obtained. memory could not be obtained.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -0,0 +1,108 @@
<html>
<head>
<title>pcre_jit_exec specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_jit_exec man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>pcre_jit_stack *<i>jstack</i>);</b>
</P>
<P>
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>pcre_jit_stack *<i>jstack</i>);</b>
</P>
<P>
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>pcre_jit_stack *<i>jstack</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression that has been successfully
studied with one of the JIT options against a given subject string, using a
matching algorithm that is similar to Perl's. It is a "fast path" interface to
JIT, and it bypasses some of the sanity checks that <b>pcre_exec()</b> applies.
It returns offsets to captured substrings. Its arguments are:
<pre>
<i>code</i> Points to the compiled pattern
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
or is NULL
<i>subject</i> Points to the subject string
<i>length</i> Length of the subject string, in bytes
<i>startoffset</i> Offset in bytes in the subject at which to
start matching
<i>options</i> Option bits
<i>ovector</i> Points to a vector of ints for result offsets
<i>ovecsize</i> Number of elements in the vector (a multiple of 3)
<i>jstack</i> Pointer to a JIT stack
</pre>
The allowed options are:
<pre>
PCRE_NOTBOL Subject string is not the beginning of a line
PCRE_NOTEOL Subject string is not the end of a line
PCRE_NOTEMPTY An empty string is not a valid match
PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
is not a valid match
PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
validity (only relevant if PCRE_UTF16
was set at compile time)
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
validity (only relevant if PCRE_UTF32
was set at compile time)
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
PCRE_PARTIAL_SOFT ) match if no full matches are found
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
if that is found before a full match
</pre>
However, the PCRE_NO_UTF[8|16|32]_CHECK options have no effect, as this check
is never applied. For details of partial matching, see the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
page. A <b>pcre_extra</b> structure contains the following fields:
<pre>
<i>flags</i> Bits indicating which fields are set
<i>study_data</i> Opaque data from <b>pcre[16|32]_study()</b>
<i>match_limit</i> Limit on internal resource use
<i>match_limit_recursion</i> Limit on internal recursion depth
<i>callout_data</i> Opaque data passed back to callouts
<i>tables</i> Points to character tables or is NULL
<i>mark</i> For passing back a *MARK pointer
<i>executable_jit</i> Opaque data from JIT compilation
</pre>
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the JIT API in the
<a href="pcrejit.html"><b>pcrejit</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -0,0 +1,55 @@
<html>
<head>
<title>pcre_jit_stack_alloc specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_jit_stack_alloc man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
<b>int <i>maxsize</i>);</b>
</P>
<P>
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
<b>int <i>maxsize</i>);</b>
</P>
<P>
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
<b>int <i>maxsize</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is used to create a stack for use by the code compiled by the JIT
optimization of <b>pcre[16|32]_study()</b>. The arguments are a starting size for
the stack, and a maximum size to which it is allowed to grow. The result can be
passed to the JIT run-time code by <b>pcre[16|32]_assign_jit_stack()</b>, or that
function can set up a callback for obtaining a stack. A maximum stack size of
512K to 1M should be more than enough for any pattern. For more details, see
the
<a href="pcrejit.html"><b>pcrejit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -0,0 +1,48 @@
<html>
<head>
<title>pcre_jit_stack_free specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_jit_stack_free man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
</P>
<P>
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
</P>
<P>
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is used to free a JIT stack that was created by
<b>pcre[16|32]_jit_stack_alloc()</b> when it is no longer needed. For more details,
see the
<a href="pcrejit.html"><b>pcrejit</b></a>
page.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -21,15 +21,21 @@ SYNOPSIS
<P> <P>
<b>const unsigned char *pcre_maketables(void);</b> <b>const unsigned char *pcre_maketables(void);</b>
</P> </P>
<P>
<b>const unsigned char *pcre16_maketables(void);</b>
</P>
<P>
<b>const unsigned char *pcre32_maketables(void);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This function builds a set of character tables for character values less than This function builds a set of character tables for character values less than
256. These can be passed to <b>pcre_compile()</b> to override PCRE's internal, 256. These can be passed to <b>pcre[16|32]_compile()</b> to override PCRE's
built-in tables (which were made by <b>pcre_maketables()</b> when PCRE was internal, built-in tables (which were made by <b>pcre[16|32]_maketables()</b> when
compiled). You might want to do this if you are using a non-standard locale. PCRE was compiled). You might want to do this if you are using a non-standard
The function yields a pointer to the tables. locale. The function yields a pointer to the tables.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -0,0 +1,58 @@
<html>
<head>
<title>pcre_pattern_to_host_byte_order specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_pattern_to_host_byte_order man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<P>
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<P>
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function ensures that the bytes in 2-byte and 4-byte values in a compiled
pattern are in the correct order for the current host. It is useful when a
pattern that has been compiled on one host is transferred to another that might
have different endianness. The arguments are:
<pre>
<i>code</i> A compiled regular expression
<i>extra</i> Points to an associated <b>pcre[16|32]_extra</b> structure,
or is NULL
<i>tables</i> Pointer to character tables, or NULL to
set the built-in default
</pre>
The result is 0 for success, a negative PCRE_ERROR_xxx value otherwise.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -21,6 +21,12 @@ SYNOPSIS
<P> <P>
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b> <b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
</P> </P>
<P>
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
</P>
<P>
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>

View File

@ -22,6 +22,14 @@ SYNOPSIS
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b> <b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b> <b>const char **<i>errptr</i>);</b>
</P> </P>
<P>
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<P>
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
@ -30,11 +38,12 @@ This function studies a compiled pattern, to see if additional information can
be extracted that might speed up matching. Its arguments are: be extracted that might speed up matching. Its arguments are:
<pre> <pre>
<i>code</i> A compiled regular expression <i>code</i> A compiled regular expression
<i>options</i> Options for <b>pcre_study()</b> <i>options</i> Options for <b>pcre[16|32]_study()</b>
<i>errptr</i> Where to put an error message <i>errptr</i> Where to put an error message
</pre> </pre>
If the function succeeds, it returns a value that can be passed to If the function succeeds, it returns a value that can be passed to
<b>pcre_exec()</b> via its <i>extra</i> argument. <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> via their <i>extra</i>
arguments.
</P> </P>
<P> <P>
If the function returns NULL, either it could not find any additional If the function returns NULL, either it could not find any additional
@ -42,8 +51,11 @@ information, or there was an error. You can tell the difference by looking at
the error value. It is NULL in first case. the error value. It is NULL in first case.
</P> </P>
<P> <P>
There are currently no options defined; the value of the second argument should The only option is PCRE_STUDY_JIT_COMPILE. It requests just-in-time compilation
always be zero. if possible. If PCRE has been compiled without JIT support, this option is
ignored. See the
<a href="pcrejit.html"><b>pcrejit</b></a>
page for further details.
</P> </P>
<P> <P>
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the

View File

@ -0,0 +1,57 @@
<html>
<head>
<title>pcre_utf16_to_host_byte_order specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_utf16_to_host_byte_order man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
<b>int <i>keep_boms</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function, which exists only in the 16-bit library, converts a UTF-16
string to the correct order for the current host, taking account of any byte
order marks (BOMs) within the string. Its arguments are:
<pre>
<i>output</i> pointer to output buffer, may be the same as <i>input</i>
<i>input</i> pointer to input buffer
<i>length</i> number of 16-bit units in the input, or negative for
a zero-terminated string
<i>host_byte_order</i> a NULL value or a non-zero value pointed to means
start in host byte order
<i>keep_boms</i> if non-zero, BOMs are copied to the output string
</pre>
The result of the function is the number of 16-bit units placed into the output
buffer, including the zero terminator if the string was zero-terminated.
</P>
<P>
If <i>host_byte_order</i> is not NULL, it is set to indicate the byte order that
is current at the end of the string.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -19,13 +19,20 @@ SYNOPSIS
<b>#include &#60;pcre.h&#62;</b> <b>#include &#60;pcre.h&#62;</b>
</P> </P>
<P> <P>
<b>char *pcre_version(void);</b> <b>const char *pcre_version(void);</b>
</P>
<P>
<b>const char *pcre16_version(void);</b>
</P>
<P>
<b>const char *pcre32_version(void);</b>
</P> </P>
<br><b> <br><b>
DESCRIPTION DESCRIPTION
</b><br> </b><br>
<P> <P>
This function returns a character string that gives the version number of the This function (even in the 16-bit and 32-bit libraries) returns a
zero-terminated, 8-bit character string that gives the version number of the
PCRE library and the date of its release. PCRE library and the date of its release.
</P> </P>
<P> <P>

File diff suppressed because it is too large Load Diff

View File

@ -14,23 +14,28 @@ man page, in case the conversion went wrong.
<br> <br>
<ul> <ul>
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a> <li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
<li><a name="TOC2" href="#SEC2">C++ SUPPORT</a> <li><a name="TOC2" href="#SEC2">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a>
<li><a name="TOC3" href="#SEC3">UTF-8 SUPPORT</a> <li><a name="TOC3" href="#SEC3">BUILDING SHARED AND STATIC LIBRARIES</a>
<li><a name="TOC4" href="#SEC4">UNICODE CHARACTER PROPERTY SUPPORT</a> <li><a name="TOC4" href="#SEC4">C++ SUPPORT</a>
<li><a name="TOC5" href="#SEC5">CODE VALUE OF NEWLINE</a> <li><a name="TOC5" href="#SEC5">UTF-8, UTF-16 AND UTF-32 SUPPORT</a>
<li><a name="TOC6" href="#SEC6">WHAT \R MATCHES</a> <li><a name="TOC6" href="#SEC6">UNICODE CHARACTER PROPERTY SUPPORT</a>
<li><a name="TOC7" href="#SEC7">BUILDING SHARED AND STATIC LIBRARIES</a> <li><a name="TOC7" href="#SEC7">JUST-IN-TIME COMPILER SUPPORT</a>
<li><a name="TOC8" href="#SEC8">POSIX MALLOC USAGE</a> <li><a name="TOC8" href="#SEC8">CODE VALUE OF NEWLINE</a>
<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a> <li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a> <li><a name="TOC10" href="#SEC10">POSIX MALLOC USAGE</a>
<li><a name="TOC11" href="#SEC11">LIMITING PCRE RESOURCE USAGE</a> <li><a name="TOC11" href="#SEC11">HANDLING VERY LARGE PATTERNS</a>
<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a> <li><a name="TOC12" href="#SEC12">AVOIDING EXCESSIVE STACK USAGE</a>
<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a> <li><a name="TOC13" href="#SEC13">LIMITING PCRE RESOURCE USAGE</a>
<li><a name="TOC14" href="#SEC14">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a> <li><a name="TOC14" href="#SEC14">CREATING CHARACTER TABLES AT BUILD TIME</a>
<li><a name="TOC15" href="#SEC15">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a> <li><a name="TOC15" href="#SEC15">USING EBCDIC CODE</a>
<li><a name="TOC16" href="#SEC16">SEE ALSO</a> <li><a name="TOC16" href="#SEC16">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
<li><a name="TOC17" href="#SEC17">AUTHOR</a> <li><a name="TOC17" href="#SEC17">PCREGREP BUFFER SIZE</a>
<li><a name="TOC18" href="#SEC18">REVISION</a> <li><a name="TOC18" href="#SEC18">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
<li><a name="TOC21" href="#SEC21">SEE ALSO</a>
<li><a name="TOC22" href="#SEC22">AUTHOR</a>
<li><a name="TOC23" href="#SEC23">REVISION</a>
</ul> </ul>
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br> <br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
<P> <P>
@ -39,10 +44,17 @@ the library is compiled. It assumes use of the <b>configure</b> script, where
the optional features are selected or deselected by providing options to the optional features are selected or deselected by providing options to
<b>configure</b> before running the <b>make</b> command. However, the same <b>configure</b> before running the <b>make</b> command. However, the same
options can be selected in both Unix-like and non-Unix-like environments using options can be selected in both Unix-like and non-Unix-like environments using
the GUI facility of <b>CMakeSetup</b> if you are using <b>CMake</b> instead of the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead of
<b>configure</b> to build PCRE. <b>configure</b> to build PCRE.
</P> </P>
<P> <P>
There is a lot more information about building PCRE without using
<b>configure</b> (including information about using <b>CMake</b> or building "by
hand") in the file called <i>NON-AUTOTOOLS-BUILD</i>, which is part of the PCRE
distribution. You should consult this file as well as the <i>README</i> file if
you are building in a non-Unix-like environment.
</P>
<P>
The complete list of options for <b>configure</b> (which includes the standard The complete list of options for <b>configure</b> (which includes the standard
ones such as the selection of the installation directory) can be obtained by ones such as the selection of the installation directory) can be obtained by
running running
@ -55,45 +67,93 @@ The following sections include descriptions of options whose names begin with
--enable and --disable always come in pairs, so the complementary option always --enable and --disable always come in pairs, so the complementary option always
exists as well, but as it specifies the default, it is not described. exists as well, but as it specifies the default, it is not described.
</P> </P>
<br><a name="SEC2" href="#TOC1">C++ SUPPORT</a><br> <br><a name="SEC2" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
<P> <P>
By default, the <b>configure</b> script will search for a C++ compiler and C++ By default, a library called <b>libpcre</b> is built, containing functions that
header files. If it finds them, it automatically builds the C++ wrapper library take string arguments contained in vectors of bytes, either as single-byte
for PCRE. You can disable this by adding characters, or interpreted as UTF-8 strings. You can also build a separate
library, called <b>libpcre16</b>, in which strings are contained in vectors of
16-bit data units and interpreted either as single-unit characters or UTF-16
strings, by adding
<pre>
--enable-pcre16
</pre>
to the <b>configure</b> command. You can also build a separate
library, called <b>libpcre32</b>, in which strings are contained in vectors of
32-bit data units and interpreted either as single-unit characters or UTF-32
strings, by adding
<pre>
--enable-pcre32
</pre>
to the <b>configure</b> command. If you do not want the 8-bit library, add
<pre>
--disable-pcre8
</pre>
as well. At least one of the three libraries must be built. Note that the C++
and POSIX wrappers are for the 8-bit library only, and that <b>pcregrep</b> is
an 8-bit program. None of these are built if you select only the 16-bit or
32-bit libraries.
</P>
<br><a name="SEC3" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
<P>
The PCRE building process uses <b>libtool</b> to build both shared and static
Unix libraries by default. You can suppress one of these by adding one of
<pre>
--disable-shared
--disable-static
</pre>
to the <b>configure</b> command, as required.
</P>
<br><a name="SEC4" href="#TOC1">C++ SUPPORT</a><br>
<P>
By default, if the 8-bit library is being built, the <b>configure</b> script
will search for a C++ compiler and C++ header files. If it finds them, it
automatically builds the C++ wrapper library (which supports only 8-bit
strings). You can disable this by adding
<pre> <pre>
--disable-cpp --disable-cpp
</pre> </pre>
to the <b>configure</b> command. to the <b>configure</b> command.
</P> </P>
<br><a name="SEC3" href="#TOC1">UTF-8 SUPPORT</a><br> <br><a name="SEC5" href="#TOC1">UTF-8, UTF-16 AND UTF-32 SUPPORT</a><br>
<P> <P>
To build PCRE with support for UTF-8 Unicode character strings, add To build PCRE with support for UTF Unicode character strings, add
<pre> <pre>
--enable-utf8 --enable-utf
</pre> </pre>
to the <b>configure</b> command. Of itself, this does not make PCRE treat to the <b>configure</b> command. This setting applies to all three libraries,
strings as UTF-8. As well as compiling PCRE with this option, you also have adding support for UTF-8 to the 8-bit library, support for UTF-16 to the 16-bit
have to set the PCRE_UTF8 option when you call the <b>pcre_compile()</b> library, and support for UTF-32 to the to the 32-bit library. There are no
function. separate options for enabling UTF-8, UTF-16 and UTF-32 independently because
that would allow ridiculous settings such as requesting UTF-16 support while
building only the 8-bit library. It is not possible to build one library with
UTF support and another without in the same configuration. (For backwards
compatibility, --enable-utf8 is a synonym of --enable-utf.)
</P> </P>
<P> <P>
If you set --enable-utf8 when compiling in an EBCDIC environment, PCRE expects Of itself, this setting does not make PCRE treat strings as UTF-8, UTF-16 or
its input to be either ASCII or UTF-8 (depending on the runtime option). It is UTF-32. As well as compiling PCRE with this option, you also have have to set
the PCRE_UTF8, PCRE_UTF16 or PCRE_UTF32 option (as appropriate) when you call
one of the pattern compiling functions.
</P>
<P>
If you set --enable-utf when compiling in an EBCDIC environment, PCRE expects
its input to be either ASCII or UTF-8 (depending on the run-time option). It is
not possible to support both EBCDIC and UTF-8 codes in the same version of the not possible to support both EBCDIC and UTF-8 codes in the same version of the
library. Consequently, --enable-utf8 and --enable-ebcdic are mutually library. Consequently, --enable-utf and --enable-ebcdic are mutually
exclusive. exclusive.
</P> </P>
<br><a name="SEC4" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br> <br><a name="SEC6" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
<P> <P>
UTF-8 support allows PCRE to process character values greater than 255 in the UTF support allows the libraries to process character codepoints up to 0x10ffff
strings that it handles. On its own, however, it does not provide any in the strings that they handle. On its own, however, it does not provide any
facilities for accessing the properties of such characters. If you want to be facilities for accessing the properties of such characters. If you want to be
able to use the pattern escapes \P, \p, and \X, which refer to Unicode able to use the pattern escapes \P, \p, and \X, which refer to Unicode
character properties, you must add character properties, you must add
<pre> <pre>
--enable-unicode-properties --enable-unicode-properties
</pre> </pre>
to the <b>configure</b> command. This implies UTF-8 support, even if you have to the <b>configure</b> command. This implies UTF support, even if you have
not explicitly requested it. not explicitly requested it.
</P> </P>
<P> <P>
@ -103,7 +163,24 @@ supported. Details are given in the
<a href="pcrepattern.html"><b>pcrepattern</b></a> <a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation. documentation.
</P> </P>
<br><a name="SEC5" href="#TOC1">CODE VALUE OF NEWLINE</a><br> <br><a name="SEC7" href="#TOC1">JUST-IN-TIME COMPILER SUPPORT</a><br>
<P>
Just-in-time compiler support is included in the build by specifying
<pre>
--enable-jit
</pre>
This support is available only for certain hardware architectures. If this
option is set for an unsupported architecture, a compile time error occurs.
See the
<a href="pcrejit.html"><b>pcrejit</b></a>
documentation for a discussion of JIT usage. When JIT support is enabled,
pcregrep automatically makes use of it, unless you add
<pre>
--disable-pcregrep-jit
</pre>
to the "configure" command.
</P>
<br><a name="SEC8" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
<P> <P>
By default, PCRE interprets the linefeed (LF) character as indicating the end By default, PCRE interprets the linefeed (LF) character as indicating the end
of a line. This is the normal newline character on Unix-like systems. You can of a line. This is the normal newline character on Unix-like systems. You can
@ -136,7 +213,7 @@ Whatever line ending convention is selected when PCRE is built can be
overridden when the library functions are called. At build time it is overridden when the library functions are called. At build time it is
conventional to use the standard for your operating system. conventional to use the standard for your operating system.
</P> </P>
<br><a name="SEC6" href="#TOC1">WHAT \R MATCHES</a><br> <br><a name="SEC9" href="#TOC1">WHAT \R MATCHES</a><br>
<P> <P>
By default, the sequence \R in a pattern matches any Unicode newline sequence, By default, the sequence \R in a pattern matches any Unicode newline sequence,
whatever has been selected as the line ending sequence. If you specify whatever has been selected as the line ending sequence. If you specify
@ -147,19 +224,9 @@ the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
selected when PCRE is built can be overridden when the library functions are selected when PCRE is built can be overridden when the library functions are
called. called.
</P> </P>
<br><a name="SEC7" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br> <br><a name="SEC10" href="#TOC1">POSIX MALLOC USAGE</a><br>
<P> <P>
The PCRE building process uses <b>libtool</b> to build both shared and static When the 8-bit library is called through the POSIX interface (see the
Unix libraries by default. You can suppress one of these by adding one of
<pre>
--disable-shared
--disable-static
</pre>
to the <b>configure</b> command, as required.
</P>
<br><a name="SEC8" href="#TOC1">POSIX MALLOC USAGE</a><br>
<P>
When PCRE is called through the POSIX interface (see the
<a href="pcreposix.html"><b>pcreposix</b></a> <a href="pcreposix.html"><b>pcreposix</b></a>
documentation), additional working storage is required for holding the pointers documentation), additional working storage is required for holding the pointers
to capturing substrings, because PCRE requires three integers per substring, to capturing substrings, because PCRE requires three integers per substring,
@ -173,23 +240,26 @@ such as
</pre> </pre>
to the <b>configure</b> command. to the <b>configure</b> command.
</P> </P>
<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br> <br><a name="SEC11" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
<P> <P>
Within a compiled pattern, offset values are used to point from one part to Within a compiled pattern, offset values are used to point from one part to
another (for example, from an opening parenthesis to an alternation another (for example, from an opening parenthesis to an alternation
metacharacter). By default, two-byte values are used for these offsets, leading metacharacter). By default, in the 8-bit and 16-bit libraries, two-byte values
to a maximum size for a compiled pattern of around 64K. This is sufficient to are used for these offsets, leading to a maximum size for a compiled pattern of
handle all but the most gigantic patterns. Nevertheless, some people do want to around 64K. This is sufficient to handle all but the most gigantic patterns.
process enormous patterns, so it is possible to compile PCRE to use three-byte Nevertheless, some people do want to process truly enormous patterns, so it is
or four-byte offsets by adding a setting such as possible to compile PCRE to use three-byte or four-byte offsets by adding a
setting such as
<pre> <pre>
--with-link-size=3 --with-link-size=3
</pre> </pre>
to the <b>configure</b> command. The value given must be 2, 3, or 4. Using to the <b>configure</b> command. The value given must be 2, 3, or 4. For the
16-bit library, a value of 3 is rounded up to 4. In these libraries, using
longer offsets slows down the operation of PCRE because it has to load longer offsets slows down the operation of PCRE because it has to load
additional bytes when handling them. additional data when handling them. For the 32-bit library the value is always
4 and cannot be overridden; the value of --with-link-size is ignored.
</P> </P>
<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br> <br><a name="SEC12" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
<P> <P>
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
by making recursive calls to an internal function called <b>match()</b>. In by making recursive calls to an internal function called <b>match()</b>. In
@ -209,7 +279,7 @@ to the <b>configure</b> command. With this configuration, PCRE will use the
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory
management functions. By default these point to <b>malloc()</b> and management functions. By default these point to <b>malloc()</b> and
<b>free()</b>, but you can replace the pointers so that your own functions are <b>free()</b>, but you can replace the pointers so that your own functions are
used. used instead.
</P> </P>
<P> <P>
Separate functions are provided rather than using <b>pcre_malloc</b> and Separate functions are provided rather than using <b>pcre_malloc</b> and
@ -218,9 +288,9 @@ requested are always the same, and the blocks are always freed in reverse
order. A calling program might be able to implement optimized functions that order. A calling program might be able to implement optimized functions that
perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
slowly when built in this way. This option affects only the <b>pcre_exec()</b> slowly when built in this way. This option affects only the <b>pcre_exec()</b>
function; it is not relevant for the the <b>pcre_dfa_exec()</b> function. function; it is not relevant for <b>pcre_dfa_exec()</b>.
</P> </P>
<br><a name="SEC11" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br> <br><a name="SEC13" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
<P> <P>
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b> (sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
@ -249,7 +319,7 @@ constraints. However, you can set a lower limit by adding, for example,
</pre> </pre>
to the <b>configure</b> command. This value can also be overridden at run time. to the <b>configure</b> command. This value can also be overridden at run time.
</P> </P>
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br> <br><a name="SEC14" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<P> <P>
PCRE uses fixed tables for processing characters whose code values are less PCRE uses fixed tables for processing characters whose code values are less
than 256. By default, PCRE is built with a set of tables that are distributed than 256. By default, PCRE is built with a set of tables that are distributed
@ -260,13 +330,13 @@ only. If you add
</pre> </pre>
to the <b>configure</b> command, the distributed tables are no longer used. to the <b>configure</b> command, the distributed tables are no longer used.
Instead, a program called <b>dftables</b> is compiled and run. This outputs the Instead, a program called <b>dftables</b> is compiled and run. This outputs the
source for new set of tables, created in the default locale of your C runtime source for new set of tables, created in the default locale of your C run-time
system. (This method of replacing the tables does not work if you are cross system. (This method of replacing the tables does not work if you are cross
compiling, because <b>dftables</b> is run on the local host. If you need to compiling, because <b>dftables</b> is run on the local host. If you need to
create alternative tables when cross compiling, you will have to do so "by create alternative tables when cross compiling, you will have to do so "by
hand".) hand".)
</P> </P>
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br> <br><a name="SEC15" href="#TOC1">USING EBCDIC CODE</a><br>
<P> <P>
PCRE assumes by default that it will run in an environment where the character PCRE assumes by default that it will run in an environment where the character
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
@ -278,9 +348,26 @@ EBCDIC environment by adding
to the <b>configure</b> command. This setting implies to the <b>configure</b> command. This setting implies
--enable-rebuild-chartables. You should only use it if you know that you are in --enable-rebuild-chartables. You should only use it if you know that you are in
an EBCDIC environment (for example, an IBM mainframe operating system). The an EBCDIC environment (for example, an IBM mainframe operating system). The
--enable-ebcdic option is incompatible with --enable-utf8. --enable-ebcdic option is incompatible with --enable-utf.
</P> </P>
<br><a name="SEC14" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br> <P>
The EBCDIC character that corresponds to an ASCII LF is assumed to have the
value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In
such an environment you should use
<pre>
--enable-ebcdic-nl25
</pre>
as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR has the
same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is <i>not</i>
chosen as LF is made to correspond to the Unicode NEL character (which, in
Unicode, is 0x85).
</P>
<P>
The options that select newline behaviour, such as --enable-newline-is-cr,
and equivalent run-time options, refer to these character values in an EBCDIC
environment.
</P>
<br><a name="SEC16" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
<P> <P>
By default, <b>pcregrep</b> reads all files as plain text. You can build it so By default, <b>pcregrep</b> reads all files as plain text. You can build it so
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
@ -293,7 +380,22 @@ to the <b>configure</b> command. These options naturally require that the
relevant libraries are installed on your system. Configuration will fail if relevant libraries are installed on your system. Configuration will fail if
they are not. they are not.
</P> </P>
<br><a name="SEC15" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br> <br><a name="SEC17" href="#TOC1">PCREGREP BUFFER SIZE</a><br>
<P>
<b>pcregrep</b> uses an internal buffer to hold a "window" on the file it is
scanning, in order to be able to output "before" and "after" lines when it
finds a match. The size of the buffer is controlled by a parameter whose
default value is 20K. The buffer itself is three times this size, but because
of the way it is used for holding "before" lines, the longest line that is
guaranteed to be processable is the parameter size. You can change the default
parameter value by adding, for example,
<pre>
--with-pcregrep-bufsize=50K
</pre>
to the <b>configure</b> command. The caller of \fPpcregrep\fP can, however,
override this value by specifying a run-time option.
</P>
<br><a name="SEC18" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
<P> <P>
If you add If you add
<pre> <pre>
@ -302,7 +404,7 @@ If you add
to the <b>configure</b> command, <b>pcretest</b> is linked with the to the <b>configure</b> command, <b>pcretest</b> is linked with the
<b>libreadline</b> library, and when its input is from a terminal, it reads it <b>libreadline</b> library, and when its input is from a terminal, it reads it
using the <b>readline()</b> function. This provides line-editing and history using the <b>readline()</b> function. This provides line-editing and history
facilities. Note that <b>libreadline</b> is GPL-licenced, so if you distribute a facilities. Note that <b>libreadline</b> is GPL-licensed, so if you distribute a
binary of <b>pcretest</b> linked in this way, there may be licensing issues. binary of <b>pcretest</b> linked in this way, there may be licensing issues.
</P> </P>
<P> <P>
@ -324,11 +426,78 @@ automatically included, you may need to add something like
</pre> </pre>
immediately before the <b>configure</b> command. immediately before the <b>configure</b> command.
</P> </P>
<br><a name="SEC16" href="#TOC1">SEE ALSO</a><br> <br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
<P> <P>
<b>pcreapi</b>(3), <b>pcre_config</b>(3). By adding the
<pre>
--enable-valgrind
</pre>
option to to the <b>configure</b> command, PCRE will use valgrind annotations
to mark certain memory regions as unaddressable. This allows it to detect
invalid memory accesses, and is mostly useful for debugging PCRE itself.
</P> </P>
<br><a name="SEC17" href="#TOC1">AUTHOR</a><br> <br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
<P>
If your C compiler is gcc, you can build a version of PCRE that can generate a
code coverage report for its test suite. To enable this, you must install
<b>lcov</b> version 1.6 or above. Then specify
<pre>
--enable-coverage
</pre>
to the <b>configure</b> command and build PCRE in the usual way.
</P>
<P>
Note that using <b>ccache</b> (a caching C compiler) is incompatible with code
coverage reporting. If you have configured <b>ccache</b> to run automatically
on your system, you must set the environment variable
<pre>
CCACHE_DISABLE=1
</pre>
before running <b>make</b> to build PCRE, so that <b>ccache</b> is not used.
</P>
<P>
When --enable-coverage is used, the following addition targets are added to the
<i>Makefile</i>:
<pre>
make coverage
</pre>
This creates a fresh coverage report for the PCRE test suite. It is equivalent
to running "make coverage-reset", "make coverage-baseline", "make check", and
then "make coverage-report".
<pre>
make coverage-reset
</pre>
This zeroes the coverage counters, but does nothing else.
<pre>
make coverage-baseline
</pre>
This captures baseline coverage information.
<pre>
make coverage-report
</pre>
This creates the coverage report.
<pre>
make coverage-clean-report
</pre>
This removes the generated coverage report without cleaning the coverage data
itself.
<pre>
make coverage-clean-data
</pre>
This removes the captured coverage data without removing the coverage files
created at compile time (*.gcno).
<pre>
make coverage-clean
</pre>
This cleans all coverage data including the generated coverage report. For more
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
documentation.
</P>
<br><a name="SEC21" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcreapi</b>(3), <b>pcre16</b>, <b>pcre32</b>, <b>pcre_config</b>(3).
</P>
<br><a name="SEC22" href="#TOC1">AUTHOR</a><br>
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
@ -337,11 +506,11 @@ University Computing Service
Cambridge CB2 3QH, England. Cambridge CB2 3QH, England.
<br> <br>
</P> </P>
<br><a name="SEC18" href="#TOC1">REVISION</a><br> <br><a name="SEC23" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 17 March 2009 Last updated: 30 October 2012
<br> <br>
Copyright &copy; 1997-2009 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -13,23 +13,35 @@ from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong. man page, in case the conversion went wrong.
<br> <br>
<ul> <ul>
<li><a name="TOC1" href="#SEC1">PCRE CALLOUTS</a> <li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">MISSING CALLOUTS</a> <li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">THE CALLOUT INTERFACE</a> <li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
<li><a name="TOC4" href="#SEC4">RETURN VALUES</a> <li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a> <li><a name="TOC5" href="#SEC5">RETURN VALUES</a>
<li><a name="TOC6" href="#SEC6">REVISION</a> <li><a name="TOC6" href="#SEC6">AUTHOR</a>
<li><a name="TOC7" href="#SEC7">REVISION</a>
</ul> </ul>
<br><a name="SEC1" href="#TOC1">PCRE CALLOUTS</a><br> <br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P> <P>
<b>int (*pcre_callout)(pcre_callout_block *);</b> <b>int (*pcre_callout)(pcre_callout_block *);</b>
</P> </P>
<P> <P>
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
</P>
<P>
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
PCRE provides a feature called "callout", which is a means of temporarily PCRE provides a feature called "callout", which is a means of temporarily
passing control to the caller of PCRE in the middle of pattern matching. The passing control to the caller of PCRE in the middle of pattern matching. The
caller of PCRE provides an external function by putting its entry point in the caller of PCRE provides an external function by putting its entry point in the
global variable <i>pcre_callout</i>. By default, this variable contains NULL, global variable <i>pcre_callout</i> (<i>pcre16_callout</i> for the 16-bit
which disables all calling out. library, <i>pcre32_callout</i> for the 32-bit library). By default, this
variable contains NULL, which disables all calling out.
</P> </P>
<P> <P>
Within a regular expression, (?C) indicates the points at which the external Within a regular expression, (?C) indicates the points at which the external
@ -39,9 +51,9 @@ For example, this pattern has two callout points:
<pre> <pre>
(?C1)abc(?C2)def (?C1)abc(?C2)def
</pre> </pre>
If the PCRE_AUTO_CALLOUT option bit is set when <b>pcre_compile()</b> is called, If the PCRE_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE
PCRE automatically inserts callouts, all with number 255, before each item in automatically inserts callouts, all with number 255, before each item in the
the pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern pattern. For example, if PCRE_AUTO_CALLOUT is used with the pattern
<pre> <pre>
A(\d{2}|--) A(\d{2}|--)
</pre> </pre>
@ -59,7 +71,12 @@ command has an option that sets automatic callouts; when it is used, the output
indicates how the pattern is matched. This is useful information when you are indicates how the pattern is matched. This is useful information when you are
trying to optimize the performance of a particular pattern. trying to optimize the performance of a particular pattern.
</P> </P>
<br><a name="SEC2" href="#TOC1">MISSING CALLOUTS</a><br> <P>
The use of callouts in a pattern makes it ineligible for optimization by the
just-in-time compiler. Studying such a pattern with the PCRE_STUDY_JIT_COMPILE
option always fails.
</P>
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
<P> <P>
You should be aware that, because of optimizations in the way PCRE matches You should be aware that, because of optimizations in the way PCRE matches
patterns by default, callouts sometimes do not happen. For example, if the patterns by default, callouts sometimes do not happen. For example, if the
@ -73,34 +90,46 @@ the callout is never reached. However, with "abyd", though the result is still
no match, the callout is obeyed. no match, the callout is obeyed.
</P> </P>
<P> <P>
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE If the pattern is studied, PCRE knows the minimum length of a matching string,
option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. This slows down the and will immediately give a "no match" return without actually running a match
matching process, but does ensure that callouts such as the example above are if the subject is not long enough, or, for unanchored patterns, if it has
obeyed. been scanned far enough.
</P> </P>
<br><a name="SEC3" href="#TOC1">THE CALLOUT INTERFACE</a><br> <P>
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
option to the matching function, or by starting the pattern with
(*NO_START_OPT). This slows down the matching process, but does ensure that
callouts such as the example above are obeyed.
</P>
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
<P> <P>
During matching, when PCRE reaches a callout point, the external function During matching, when PCRE reaches a callout point, the external function
defined by <i>pcre_callout</i> is called (if it is set). This applies to both defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called
the <b>pcre_exec()</b> and the <b>pcre_dfa_exec()</b> matching functions. The (if it is set). This applies to both normal and DFA matching. The only
only argument to the callout function is a pointer to a <b>pcre_callout</b> argument to the callout function is a pointer to a <b>pcre_callout</b>
block. This structure contains the following fields: or <b>pcre[16|32]_callout</b> block.
These structures contains the following fields:
<pre> <pre>
int <i>version</i>; int <i>version</i>;
int <i>callout_number</i>; int <i>callout_number</i>;
int *<i>offset_vector</i>; int *<i>offset_vector</i>;
const char *<i>subject</i>; const char *<i>subject</i>; (8-bit version)
int <i>subject_length</i>; PCRE_SPTR16 <i>subject</i>; (16-bit version)
int <i>start_match</i>; PCRE_SPTR32 <i>subject</i>; (32-bit version)
int <i>current_position</i>; int <i>subject_length</i>;
int <i>capture_top</i>; int <i>start_match</i>;
int <i>capture_last</i>; int <i>current_position</i>;
void *<i>callout_data</i>; int <i>capture_top</i>;
int <i>pattern_position</i>; int <i>capture_last</i>;
int <i>next_item_length</i>; void *<i>callout_data</i>;
int <i>pattern_position</i>;
int <i>next_item_length</i>;
const unsigned char *<i>mark</i>; (8-bit version)
const PCRE_UCHAR16 *<i>mark</i>; (16-bit version)
const PCRE_UCHAR32 *<i>mark</i>; (32-bit version)
</pre> </pre>
The <i>version</i> field is an integer containing the version number of the The <i>version</i> field is an integer containing the version number of the
block format. The initial version was 0; the current version is 1. The version block format. The initial version was 0; the current version is 2. The version
number will change again in future if additional fields are added, but the number will change again in future if additional fields are added, but the
intention is never to remove any of the existing fields. intention is never to remove any of the existing fields.
</P> </P>
@ -111,15 +140,15 @@ automatically generated callouts).
</P> </P>
<P> <P>
The <i>offset_vector</i> field is a pointer to the vector of offsets that was The <i>offset_vector</i> field is a pointer to the vector of offsets that was
passed by the caller to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. When passed by the caller to the matching function. When <b>pcre_exec()</b> or
<b>pcre_exec()</b> is used, the contents can be inspected in order to extract <b>pcre[16|32]_exec()</b> is used, the contents can be inspected, in order to extract
substrings that have been matched so far, in the same way as for extracting substrings that have been matched so far, in the same way as for extracting
substrings after a match has completed. For <b>pcre_dfa_exec()</b> this field is substrings after a match has completed. For the DFA matching functions, this
not useful. field is not useful.
</P> </P>
<P> <P>
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
that were passed to <b>pcre_exec()</b>. that were passed to the matching function.
</P> </P>
<P> <P>
The <i>start_match</i> field normally contains the offset within the subject at The <i>start_match</i> field normally contains the offset within the subject at
@ -134,53 +163,59 @@ The <i>current_position</i> field contains the offset within the subject of the
current match pointer. current match pointer.
</P> </P>
<P> <P>
When the <b>pcre_exec()</b> function is used, the <i>capture_top</i> field When the <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> is used, the
contains one more than the number of the highest numbered captured substring so <i>capture_top</i> field contains one more than the number of the highest
far. If no substrings have been captured, the value of <i>capture_top</i> is numbered captured substring so far. If no substrings have been captured, the
one. This is always the case when <b>pcre_dfa_exec()</b> is used, because it value of <i>capture_top</i> is one. This is always the case when the DFA
does not support captured substrings. functions are used, because they do not support captured substrings.
</P> </P>
<P> <P>
The <i>capture_last</i> field contains the number of the most recently captured The <i>capture_last</i> field contains the number of the most recently captured
substring. If no substrings have been captured, its value is -1. This is always substring. If no substrings have been captured, its value is -1. This is always
the case when <b>pcre_dfa_exec()</b> is used. the case for the DFA matching functions.
</P> </P>
<P> <P>
The <i>callout_data</i> field contains a value that is passed to The <i>callout_data</i> field contains a value that is passed to a matching
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> specifically so that it can be function specifically so that it can be passed back in callouts. It is passed
passed back in callouts. It is passed in the <i>pcre_callout</i> field of the in the <i>callout_data</i> field of a <b>pcre_extra</b> or <b>pcre[16|32]_extra</b>
<b>pcre_extra</b> data structure. If no such data was passed, the value of data structure. If no such data was passed, the value of <i>callout_data</i> in
<i>callout_data</i> in a <b>pcre_callout</b> block is NULL. There is a a callout block is NULL. There is a description of the <b>pcre_extra</b>
description of the <b>pcre_extra</b> structure in the structure in the
<a href="pcreapi.html"><b>pcreapi</b></a> <a href="pcreapi.html"><b>pcreapi</b></a>
documentation. documentation.
</P> </P>
<P> <P>
The <i>pattern_position</i> field is present from version 1 of the The <i>pattern_position</i> field is present from version 1 of the callout
<i>pcre_callout</i> structure. It contains the offset to the next item to be structure. It contains the offset to the next item to be matched in the pattern
matched in the pattern string. string.
</P> </P>
<P> <P>
The <i>next_item_length</i> field is present from version 1 of the The <i>next_item_length</i> field is present from version 1 of the callout
<i>pcre_callout</i> structure. It contains the length of the next item to be structure. It contains the length of the next item to be matched in the pattern
matched in the pattern string. When the callout immediately precedes an string. When the callout immediately precedes an alternation bar, a closing
alternation bar, a closing parenthesis, or the end of the pattern, the length parenthesis, or the end of the pattern, the length is zero. When the callout
is zero. When the callout precedes an opening parenthesis, the length is that precedes an opening parenthesis, the length is that of the entire subpattern.
of the entire subpattern.
</P> </P>
<P> <P>
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
help in distinguishing between different automatic callouts, which all have the help in distinguishing between different automatic callouts, which all have the
same callout number. However, they are set for all callouts. same callout number. However, they are set for all callouts.
</P> </P>
<br><a name="SEC4" href="#TOC1">RETURN VALUES</a><br> <P>
The <i>mark</i> field is present from version 2 of the callout structure. In
callouts from <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> it contains a pointer to
the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
(*THEN) item in the match, or NULL if no such items have been passed. Instances
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
callouts from the DFA matching functions this field always contains NULL.
</P>
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
<P> <P>
The external callout function returns an integer to PCRE. If the value is zero, The external callout function returns an integer to PCRE. If the value is zero,
matching proceeds as normal. If the value is greater than zero, matching fails matching proceeds as normal. If the value is greater than zero, matching fails
at the current point, but the testing of other matching possibilities goes at the current point, but the testing of other matching possibilities goes
ahead, just as if a lookahead assertion had failed. If the value is less than ahead, just as if a lookahead assertion had failed. If the value is less than
zero, the match is abandoned, and <b>pcre_exec()</b> (or <b>pcre_dfa_exec()</b>) zero, the match is abandoned, the matching function returns the negative value.
returns the negative value.
</P> </P>
<P> <P>
Negative values should normally be chosen from the set of PCRE_ERROR_xxx Negative values should normally be chosen from the set of PCRE_ERROR_xxx
@ -188,7 +223,7 @@ values. In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions; The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
it will never be used by PCRE itself. it will never be used by PCRE itself.
</P> </P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br> <br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
@ -197,11 +232,11 @@ University Computing Service
Cambridge CB2 3QH, England. Cambridge CB2 3QH, England.
<br> <br>
</P> </P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br> <br><a name="SEC7" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 15 March 2009 Last updated: 24 June 2012
<br> <br>
Copyright &copy; 1997-2009 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -17,23 +17,22 @@ DIFFERENCES BETWEEN PCRE AND PERL
</b><br> </b><br>
<P> <P>
This document describes the differences in the ways that PCRE and Perl handle This document describes the differences in the ways that PCRE and Perl handle
regular expressions. The differences described here are mainly with respect to regular expressions. The differences described here are with respect to Perl
Perl 5.8, though PCRE versions 7.0 and later contain some features that are versions 5.10 and above.
expected to be in the forthcoming Perl 5.10.
</P> </P>
<P> <P>
1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what 1. PCRE has only a subset of Perl's Unicode support. Details of what it does
it does have are given in the have are given in the
<a href="pcre.html#utf8support">section on UTF-8 support</a> <a href="pcreunicode.html"><b>pcreunicode</b></a>
in the main
<a href="pcre.html"><b>pcre</b></a>
page. page.
</P> </P>
<P> <P>
2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits 2. PCRE allows repeat quantifiers only on parenthesized assertions, but they do
them, but they do not mean what you might think. For example, (?!a){3} does not mean what you might think. For example, (?!a){3} does not assert that the
not assert that the next three characters are not "a". It just asserts that the next three characters are not "a". It just asserts that the next character is
next character is not "a" three times. not "a" three times (in principle: PCRE optimizes this to run the assertion
just once). Perl allows repeat quantifiers on other assertions such as \b, but
these do not seem to have any use.
</P> </P>
<P> <P>
3. Capturing subpatterns that occur inside negative lookahead assertions are 3. Capturing subpatterns that occur inside negative lookahead assertions are
@ -50,16 +49,22 @@ represent a binary zero.
</P> </P>
<P> <P>
5. The following Perl escape sequences are not supported: \l, \u, \L, 5. The following Perl escape sequences are not supported: \l, \u, \L,
\U, and \N. In fact these are implemented by Perl's general string-handling \U, and \N when followed by a character name or Unicode value. (\N on its
and are not part of its pattern matching engine. If any of these are own, matching a non-newline character, is supported.) In fact these are
encountered by PCRE, an error is generated. implemented by Perl's general string-handling and are not part of its pattern
matching engine. If any of these are encountered by PCRE, an error is
generated by default. However, if the PCRE_JAVASCRIPT_COMPAT option is set,
\U and \u are interpreted as JavaScript interprets them.
</P> </P>
<P> <P>
6. The Perl escape sequences \p, \P, and \X are supported only if PCRE is 6. The Perl escape sequences \p, \P, and \X are supported only if PCRE is
built with Unicode character property support. The properties that can be built with Unicode character property support. The properties that can be
tested with \p and \P are limited to the general category properties such as tested with \p and \P are limited to the general category properties such as
Lu and Nd, script names such as Greek or Han, and the derived properties Any Lu and Nd, script names such as Greek or Han, and the derived properties Any
and L&. and L&. PCRE does support the Cs (surrogate) property, which Perl does not; the
Perl documentation says "Because Perl hides the need for the user to understand
the internal representation of Unicode characters, there is no need to
implement the somewhat messy concept of surrogates."
</P> </P>
<P> <P>
7. PCRE does support the \Q...\E escape for quoting substrings. Characters in 7. PCRE does support the \Q...\E escape for quoting substrings. Characters in
@ -79,37 +84,67 @@ The \Q...\E sequence is recognized both inside and outside character classes.
<P> <P>
8. Fairly obviously, PCRE does not support the (?{code}) and (??{code}) 8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
constructions. However, there is support for recursive patterns. This is not constructions. However, there is support for recursive patterns. This is not
available in Perl 5.8, but will be in Perl 5.10. Also, the PCRE "callout" available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE "callout"
feature allows an external function to be called during pattern matching. See feature allows an external function to be called during pattern matching. See
the the
<a href="pcrecallout.html"><b>pcrecallout</b></a> <a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation for details. documentation for details.
</P> </P>
<P> <P>
9. Subpatterns that are called recursively or as "subroutines" are always 9. Subpatterns that are called as subroutines (whether or not recursively) are
treated as atomic groups in PCRE. This is like Python, but unlike Perl. always treated as atomic groups in PCRE. This is like Python, but unlike Perl.
Captured values that are set outside a subroutine call can be reference from
inside in PCRE, but not in Perl. There is a discussion that explains these
differences in more detail in the
<a href="pcrepattern.html#recursiondifference">section on recursion differences from Perl</a>
in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
page.
</P> </P>
<P> <P>
10. There are some differences that are concerned with the settings of captured 10. If any of the backtracking control verbs are used in an assertion or in a
subpattern that is called as a subroutine (whether or not recursively), their
effect is confined to that subpattern; it does not extend to the surrounding
pattern. This is not always the case in Perl. In particular, if (*THEN) is
present in a group that is called as a subroutine, its action is limited to
that group, even if the group does not contain any | characters. There is one
exception to this: the name from a *(MARK), (*PRUNE), or (*THEN) that is
encountered in a successful positive assertion <i>is</i> passed back when a
match succeeds (compare capturing parentheses in assertions). Note that such
subpatterns are processed as anchored at the point where they are tested.
</P>
<P>
11. There are some differences that are concerned with the settings of captured
strings when part of a pattern is repeated. For example, matching "aba" against strings when part of a pattern is repeated. For example, matching "aba" against
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b". the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
</P> </P>
<P> <P>
11. PCRE does support Perl 5.10's backtracking verbs (*ACCEPT), (*FAIL), (*F), 12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
(*COMMIT), (*PRUNE), (*SKIP), and (*THEN), but only in the forms without an names is not as general as Perl's. This is a consequence of the fact the PCRE
argument. PCRE does not support (*MARK). If (*ACCEPT) is within capturing works internally just with numbers, using an external table to translate
parentheses, PCRE does not set that capture group; this is different to Perl. between numbers and names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b)B),
where the two capturing parentheses have the same number but different names,
is not supported, and causes an error at compile time. If it were allowed, it
would not be possible to distinguish which parentheses matched, because both
names map to capturing subpattern number 1. To avoid this confusing situation,
an error is given at compile time.
</P> </P>
<P> <P>
12. PCRE provides some extensions to the Perl regular expression facilities. 13. Perl recognizes comments in some places that PCRE does not, for example,
Perl 5.10 will include new features that are not in earlier versions, some of between the ( and ? at the start of a subpattern. If the /x modifier is set,
which (such as named parentheses) have been in PCRE for some time. This list is Perl allows white space between ( and ? but PCRE never does, even if the
with respect to Perl 5.10: PCRE_EXTENDED option is set.
</P>
<P>
14. PCRE provides some extensions to the Perl regular expression facilities.
Perl 5.10 includes new features that are not in earlier versions of Perl, some
of which (such as named parentheses) have been in PCRE for some time. This list
is with respect to Perl 5.10:
<br> <br>
<br> <br>
(a) Although lookbehind assertions must match fixed length strings, each (a) Although lookbehind assertions in PCRE must match fixed length strings,
alternative branch of a lookbehind assertion can match a different length of each alternative branch of a lookbehind assertion can match a different length
string. Perl requires them all to have the same length. of string. Perl requires them all to have the same length.
<br> <br>
<br> <br>
(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $ (b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
@ -130,8 +165,8 @@ question mark they are.
only at the first matching position in the subject string. only at the first matching position in the subject string.
<br> <br>
<br> <br>
(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, and PCRE_NO_AUTO_CAPTURE (f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, and
options for <b>pcre_exec()</b> have no Perl equivalents. PCRE_NO_AUTO_CAPTURE options for <b>pcre_exec()</b> have no Perl equivalents.
<br> <br>
<br> <br>
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF (g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
@ -145,11 +180,13 @@ by the PCRE_BSR_ANYCRLF option.
<br> <br>
<br> <br>
(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on (j) Patterns compiled by PCRE can be saved and re-used at a later time, even on
different hosts that have the other endianness. different hosts that have the other endianness. However, this does not apply to
optimized data created by the just-in-time compiler.
<br> <br>
<br> <br>
(k) The alternative matching function (<b>pcre_dfa_exec()</b>) matches in a (k) The alternative matching functions (<b>pcre_dfa_exec()</b>,
different way and is not Perl-compatible. <b>pcre16_dfa_exec()</b> and <b>pcre32_dfa_exec()</b>,) match in a different way
and are not Perl-compatible.
<br> <br>
<br> <br>
(l) PCRE recognizes some special sequences such as (*CR) at the start of (l) PCRE recognizes some special sequences such as (*CR) at the start of
@ -170,9 +207,9 @@ Cambridge CB2 3QH, England.
REVISION REVISION
</b><br> </b><br>
<P> <P>
Last updated: 11 September 2007 Last updated: 25 August 2012
<br> <br>
Copyright &copy; 1997-2007 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -35,7 +35,8 @@ man page, in case the conversion went wrong.
The C++ wrapper for PCRE was provided by Google Inc. Some additional The C++ wrapper for PCRE was provided by Google Inc. Some additional
functionality was added by Giuseppe Maxia. This brief man page was constructed functionality was added by Giuseppe Maxia. This brief man page was constructed
from the notes in the <i>pcrecpp.h</i> file, which should be consulted for from the notes in the <i>pcrecpp.h</i> file, which should be consulted for
further details. further details. Note that the C++ wrapper supports only the original 8-bit
PCRE library. There is no 16-bit or 32-bit support at present.
</P> </P>
<br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br> <br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br>
<P> <P>
@ -191,7 +192,7 @@ supported:
PCRE_DOTALL dot matches newlines /s PCRE_DOTALL dot matches newlines /s
PCRE_DOLLAR_ENDONLY $ matches only at end N/A PCRE_DOLLAR_ENDONLY $ matches only at end N/A
PCRE_EXTRA strict escape parsing N/A PCRE_EXTRA strict escape parsing N/A
PCRE_EXTENDED ignore whitespaces /x PCRE_EXTENDED ignore white spaces /x
PCRE_UTF8 handles UTF8 chars built-in PCRE_UTF8 handles UTF8 chars built-in
PCRE_UNGREEDY reverses * and *? N/A PCRE_UNGREEDY reverses * and *? N/A
PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*) PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*)
@ -232,7 +233,7 @@ Normally, to pass one or more modifiers to a RE class, you declare
a <i>RE_Options</i> object, set the appropriate options, and pass this a <i>RE_Options</i> object, set the appropriate options, and pass this
object to a RE constructor. Example: object to a RE constructor. Example:
<pre> <pre>
RE_options opt; RE_Options opt;
opt.set_caseless(true); opt.set_caseless(true);
if (RE("HELLO", opt).PartialMatch("hello world")) ... if (RE("HELLO", opt).PartialMatch("hello world")) ...
</pre> </pre>
@ -282,10 +283,7 @@ is defined in the pcrecpp namespace.
Example: read lines of the form "var = value" from a string. Example: read lines of the form "var = value" from a string.
string contents = ...; // Fill string somehow string contents = ...; // Fill string somehow
pcrecpp::StringPiece input(contents); // Wrap in a StringPiece pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
</PRE>
</P>
<P>
<pre>
string var; string var;
int value; int value;
pcrecpp::RE re("(\\w+) = (\\d+)\n"); pcrecpp::RE re("(\\w+) = (\\d+)\n");
@ -363,7 +361,7 @@ Copyright &copy; 2007 Google Inc.
</P> </P>
<br><a name="SEC12" href="#TOC1">REVISION</a><br> <br><a name="SEC12" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 17 March 2009 Last updated: 08 January 2012
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -0,0 +1,426 @@
<html>
<head>
<title>pcredemo specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcredemo man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
</ul>
<PRE>
/*************************************************
* PCRE DEMONSTRATION PROGRAM *
*************************************************/
/* This is a demonstration program to illustrate the most straightforward ways
of calling the PCRE regular expression library from a C program. See the
pcresample documentation for a short discussion ("man pcresample" if you have
the PCRE man pages installed).
In Unix-like environments, if PCRE is installed in your standard system
libraries, you should be able to compile this program using this command:
gcc -Wall pcredemo.c -lpcre -o pcredemo
If PCRE is not installed in a standard place, it is likely to be installed with
support for the pkg-config mechanism. If you have pkg-config, you can compile
this program using this command:
gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
If you do not have pkg-config, you may have to use this:
gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
-R/usr/local/lib -lpcre -o pcredemo
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
library files for PCRE are installed on your system. Only some operating
systems (e.g. Solaris) use the -R option.
Building under Windows:
If you want to statically link this program against a non-dll .a file, you must
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
pcre_free() exported functions will be declared __declspec(dllimport), with
unwanted results. So in this environment, uncomment the following line. */
/* #define PCRE_STATIC */
#include &lt;stdio.h&gt;
#include &lt;string.h&gt;
#include &lt;pcre.h&gt;
#define OVECCOUNT 30 /* should be a multiple of 3 */
int main(int argc, char **argv)
{
pcre *re;
const char *error;
char *pattern;
char *subject;
unsigned char *name_table;
unsigned int option_bits;
int erroffset;
int find_all;
int crlf_is_newline;
int namecount;
int name_entry_size;
int ovector[OVECCOUNT];
int subject_length;
int rc, i;
int utf8;
/**************************************************************************
* First, sort out the command line. There is only one possible option at *
* the moment, "-g" to request repeated matching to find all occurrences, *
* like Perl's /g option. We set the variable find_all to a non-zero value *
* if the -g option is present. Apart from that, there must be exactly two *
* arguments. *
**************************************************************************/
find_all = 0;
for (i = 1; i &lt; argc; i++)
{
if (strcmp(argv[i], "-g") == 0) find_all = 1;
else break;
}
/* After the options, we require exactly two arguments, which are the pattern,
and the subject string. */
if (argc - i != 2)
{
printf("Two arguments required: a regex and a subject string\n");
return 1;
}
pattern = argv[i];
subject = argv[i+1];
subject_length = (int)strlen(subject);
/*************************************************************************
* Now we are going to compile the regular expression pattern, and handle *
* and errors that are detected. *
*************************************************************************/
re = pcre_compile(
pattern, /* the pattern */
0, /* default options */
&amp;error, /* for error message */
&amp;erroffset, /* for error offset */
NULL); /* use default character tables */
/* Compilation failed: print the error message and exit */
if (re == NULL)
{
printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
return 1;
}
/*************************************************************************
* If the compilation succeeded, we call PCRE again, in order to do a *
* pattern match against the subject string. This does just ONE match. If *
* further matching is needed, it will be done below. *
*************************************************************************/
rc = pcre_exec(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output vector */
/* Matching failed: handle error cases */
if (rc &lt; 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
/*
Handle other special cases if you like
*/
default: printf("Matching error %d\n", rc); break;
}
pcre_free(re); /* Release memory used for the compiled pattern */
return 1;
}
/* Match succeded */
printf("\nMatch succeeded at offset %d\n", ovector[0]);
/*************************************************************************
* We have found the first match within the subject string. If the output *
* vector wasn't big enough, say so. Then output any substrings that were *
* captured. *
*************************************************************************/
/* The output vector wasn't big enough */
if (rc == 0)
{
rc = OVECCOUNT/3;
printf("ovector only has room for %d captured substrings\n", rc - 1);
}
/* Show substrings stored in the output vector by number. Obviously, in a real
application you might want to do things other than print them. */
for (i = 0; i &lt; rc; i++)
{
char *substring_start = subject + ovector[2*i];
int substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, substring_length, substring_start);
}
/**************************************************************************
* That concludes the basic part of this demonstration program. We have *
* compiled a pattern, and performed a single match. The code that follows *
* shows first how to access named substrings, and then how to code for *
* repeated matches on the same subject. *
**************************************************************************/
/* See if there are any named substrings, and if so, show them by name. First
we have to extract the count of named parentheses from the pattern. */
(void)pcre_fullinfo(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
PCRE_INFO_NAMECOUNT, /* number of named substrings */
&amp;namecount); /* where to put the answer */
if (namecount &lt;= 0) printf("No named substrings\n"); else
{
unsigned char *tabptr;
printf("Named substrings\n");
/* Before we can access the substrings, we must extract the table for
translating names to numbers, and the size of each entry in the table. */
(void)pcre_fullinfo(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
PCRE_INFO_NAMETABLE, /* address of the table */
&amp;name_table); /* where to put the answer */
(void)pcre_fullinfo(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
&amp;name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. */
tabptr = name_table;
for (i = 0; i &lt; namecount; i++)
{
int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
/*************************************************************************
* If the "-g" option was given on the command line, we want to continue *
* to search for additional matches in the subject string, in a similar *
* way to the /g option in Perl. This turns out to be trickier than you *
* might think because of the possibility of matching an empty string. *
* What happens is as follows: *
* *
* If the previous match was NOT for an empty string, we can just start *
* the next match at the end of the previous one. *
* *
* If the previous match WAS for an empty string, we can't do that, as it *
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
* The first of these tells PCRE that an empty string at the start of the *
* subject is not a valid match; other possibilities must be tried. The *
* second flag restricts PCRE to one match attempt at the initial string *
* position. If this match succeeds, an alternative to the empty string *
* match has been found, and we can print it and proceed round the loop, *
* advancing by the length of whatever was found. If this match does not *
* succeed, we still stay in the loop, advancing by just one character. *
* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
* more than one byte. *
* *
* However, there is a complication concerned with newlines. When the *
* newline convention is such that CRLF is a valid newline, we must *
* advance by two characters rather than one. The newline convention can *
* be set in the regex by (*CR), etc.; if not, we must find the default. *
*************************************************************************/
if (!find_all) /* Check for -g */
{
pcre_free(re); /* Release the memory used for the compiled pattern */
return 0; /* Finish unless -g was given */
}
/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
sequence. First, find the options with which the regex was compiled; extract
the UTF-8 state, and mask off all but the newline options. */
(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &amp;option_bits);
utf8 = option_bits &amp; PCRE_UTF8;
option_bits &amp;= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
/* If no newline options were set, find the default newline convention from the
build configuration. */
if (option_bits == 0)
{
int d;
(void)pcre_config(PCRE_CONFIG_NEWLINE, &amp;d);
/* Note that these values are always the ASCII ones, even in
EBCDIC environments. CR = 13, NL = 10. */
option_bits = (d == 13)? PCRE_NEWLINE_CR :
(d == 10)? PCRE_NEWLINE_LF :
(d == (13&lt;&lt;8 | 10))? PCRE_NEWLINE_CRLF :
(d == -2)? PCRE_NEWLINE_ANYCRLF :
(d == -1)? PCRE_NEWLINE_ANY : 0;
}
/* See if CRLF is a valid newline sequence. */
crlf_is_newline =
option_bits == PCRE_NEWLINE_ANY ||
option_bits == PCRE_NEWLINE_CRLF ||
option_bits == PCRE_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */
for (;;)
{
int options = 0; /* Normally no options */
int start_offset = ovector[1]; /* Start at end of previous match */
/* If the previous match was for an empty string, we are finished if we are
at the end of the subject. Otherwise, arrange to run another match at the
same point to see if a non-empty match can be found. */
if (ovector[0] == ovector[1])
{
if (ovector[0] == subject_length) break;
options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
}
/* Run the next matching operation */
rc = pcre_exec(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
subject, /* the subject string */
subject_length, /* the length of the subject */
start_offset, /* starting offset in the subject */
options, /* options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output vector */
/* This time, a result of NOMATCH isn't an error. If the value in "options"
is zero, it just means we have found all possible matches, so the loop ends.
Otherwise, it means we have failed to find a non-empty-string match at a
point where there was a previous empty-string match. In this case, we do what
Perl does: advance the matching position by one character, and continue. We
do this by setting the "end of previous match" offset, because that is picked
up at the top of the loop as the point at which to start again.
There are two complications: (a) When CRLF is a valid newline sequence, and
the current position is just before it, advance by an extra byte. (b)
Otherwise we must ensure that we skip an entire UTF-8 character if we are in
UTF-8 mode. */
if (rc == PCRE_ERROR_NOMATCH)
{
if (options == 0) break; /* All matches found */
ovector[1] = start_offset + 1; /* Advance one byte */
if (crlf_is_newline &amp;&amp; /* If CRLF is newline &amp; */
start_offset &lt; subject_length - 1 &amp;&amp; /* we are at CRLF, */
subject[start_offset] == '\r' &amp;&amp;
subject[start_offset + 1] == '\n')
ovector[1] += 1; /* Advance by one more. */
else if (utf8) /* Otherwise, ensure we */
{ /* advance a whole UTF-8 */
while (ovector[1] &lt; subject_length) /* character. */
{
if ((subject[ovector[1]] &amp; 0xc0) != 0x80) break;
ovector[1] += 1;
}
}
continue; /* Go round the loop again */
}
/* Other matching errors are not recoverable. */
if (rc &lt; 0)
{
printf("Matching error %d\n", rc);
pcre_free(re); /* Release memory used for the compiled pattern */
return 1;
}
/* Match succeded */
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
/* The match succeeded, but the output vector wasn't big enough. */
if (rc == 0)
{
rc = OVECCOUNT/3;
printf("ovector only has room for %d captured substrings\n", rc - 1);
}
/* As before, show substrings stored in the output vector by number, and then
also any named substrings. */
for (i = 0; i &lt; rc; i++)
{
char *substring_start = subject + ovector[2*i];
int substring_length = ovector[2*i+1] - ovector[2*i];
printf("%2d: %.*s\n", i, substring_length, substring_start);
}
if (namecount &lt;= 0) printf("No named substrings\n"); else
{
unsigned char *tabptr = name_table;
printf("Named substrings\n");
for (i = 0; i &lt; namecount; i++)
{
int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
tabptr += name_entry_size;
}
}
} /* End of loop to find second and subsequent matches */
printf("\n");
pcre_free(re); /* Release memory used for the compiled pattern */
return 0;
}
/* End of pcredemo.c */
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -16,16 +16,17 @@ man page, in case the conversion went wrong.
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a> <li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a> <li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a> <li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
<li><a name="TOC4" href="#SEC4">OPTIONS</a> <li><a name="TOC4" href="#SEC4">BINARY FILES</a>
<li><a name="TOC5" href="#SEC5">ENVIRONMENT VARIABLES</a> <li><a name="TOC5" href="#SEC5">OPTIONS</a>
<li><a name="TOC6" href="#SEC6">NEWLINES</a> <li><a name="TOC6" href="#SEC6">ENVIRONMENT VARIABLES</a>
<li><a name="TOC7" href="#SEC7">OPTIONS COMPATIBILITY</a> <li><a name="TOC7" href="#SEC7">NEWLINES</a>
<li><a name="TOC8" href="#SEC8">OPTIONS WITH DATA</a> <li><a name="TOC8" href="#SEC8">OPTIONS COMPATIBILITY</a>
<li><a name="TOC9" href="#SEC9">MATCHING ERRORS</a> <li><a name="TOC9" href="#SEC9">OPTIONS WITH DATA</a>
<li><a name="TOC10" href="#SEC10">DIAGNOSTICS</a> <li><a name="TOC10" href="#SEC10">MATCHING ERRORS</a>
<li><a name="TOC11" href="#SEC11">SEE ALSO</a> <li><a name="TOC11" href="#SEC11">DIAGNOSTICS</a>
<li><a name="TOC12" href="#SEC12">AUTHOR</a> <li><a name="TOC12" href="#SEC12">SEE ALSO</a>
<li><a name="TOC13" href="#SEC13">REVISION</a> <li><a name="TOC13" href="#SEC13">AUTHOR</a>
<li><a name="TOC14" href="#SEC14">REVISION</a>
</ul> </ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br> <br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P> <P>
@ -49,7 +50,7 @@ without delimiters. For example:
If you attempt to use delimiters (for example, by surrounding a pattern with If you attempt to use delimiters (for example, by surrounding a pattern with
slashes, as is common in Perl scripts), they are interpreted as part of the slashes, as is common in Perl scripts), they are interpreted as part of the
pattern. Quotes can of course be used to delimit patterns on the command line pattern. Quotes can of course be used to delimit patterns on the command line
because they are interpreted by the shell, and indeed they are required if a because they are interpreted by the shell, and indeed quotes are required if a
pattern contains white space or shell metacharacters. pattern contains white space or shell metacharacters.
</P> </P>
<P> <P>
@ -74,31 +75,40 @@ possible to search for patterns that span line boundaries. What defines a line
boundary is controlled by the <b>-N</b> (<b>--newline</b>) option. boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
</P> </P>
<P> <P>
Patterns are limited to 8K or BUFSIZ characters, whichever is the greater. The amount of memory used for buffering files that are being scanned is
controlled by a parameter that can be set by the <b>--buffer-size</b> option.
The default value for this parameter is specified when <b>pcregrep</b> is built,
with the default default being 20K. A block of memory three times this size is
used (to allow for buffering "before" and "after" lines). An error occurs if a
line overflows the buffer.
</P>
<P>
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to (specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
each line in the order in which they are defined, except that all the <b>-e</b> each line in the order in which they are defined, except that all the <b>-e</b>
patterns are tried before the <b>-f</b> patterns. patterns are tried before the <b>-f</b> patterns.
</P> </P>
<P> <P>
By default, as soon as one pattern matches (or fails to match when <b>-v</b> is By default, as soon as one pattern matches a line, no further patterns are
used), no further patterns are considered. However, if <b>--colour</b> (or considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
<b>--color</b>) is used to colour the matching substrings, or if matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
<b>--only-matching</b>, <b>--file-offsets</b>, or <b>--line-offsets</b> is used to <b>--line-offsets</b> is used to output only the part of the line that matched
output only the part of the line that matched (either shown literally, or as an (either shown literally, or as an offset), scanning resumes immediately
offset), scanning resumes immediately following the match, so that further following the match, so that further matches on the same line can be found. If
matches on the same line can be found. If there are multiple patterns, they are there are multiple patterns, they are all tried on the remainder of the line,
all tried on the remainder of the line, but patterns that follow the one that but patterns that follow the one that matched are not tried on the earlier part
matched are not tried on the earlier part of the line. of the line.
</P> </P>
<P> <P>
This is the same behaviour as GNU grep, but it does mean that the order in This behaviour means that the order in which multiple patterns are specified
which multiple patterns are specified can affect the output when one of the can affect the output when one of the above options is used. This is no longer
above options is used. the same behaviour as GNU grep, which now manages to display earlier matches
for later patterns (as long as there is no overlap).
</P> </P>
<P> <P>
Patterns that can match an empty string are accepted, but empty string Patterns that can match an empty string are accepted, but empty string
matches are not recognized. An example is the pattern "(super)?(man)?", in matches are never recognized. An example is the pattern "(super)?(man)?", in
which all components are optional. This pattern finds all occurrences of both which all components are optional. This pattern finds all occurrences of both
"super" and "man"; the output differs from matching with "super|man" when only "super" and "man"; the output differs from matching with "super|man" when only
the matching substrings are being shown. the matching substrings are being shown.
@ -117,10 +127,25 @@ of these file types by running it with the <b>--help</b> option. If the
appropriate support is not present, files are treated as plain text. The appropriate support is not present, files are treated as plain text. The
standard input is always so treated. standard input is always so treated.
</P> </P>
<br><a name="SEC4" href="#TOC1">OPTIONS</a><br> <br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
<P>
By default, a file that contains a binary zero byte within the first 1024 bytes
is identified as a binary file, and is processed specially. (GNU grep also
identifies binary files in this manner.) See the <b>--binary-files</b> option
for a means of changing the way binary files are handled.
</P>
<br><a name="SEC5" href="#TOC1">OPTIONS</a><br>
<P>
The order in which some of the options appear can affect the output. For
example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
names. Whichever comes later in the command line will be the one that takes
effect. Similarly, except where noted below, if an option is given twice, the
later setting is used. Numerical values for options may be followed by K or M,
to signify multiplication by 1024 or 1024*1024 respectively.
</P>
<P> <P>
<b>--</b> <b>--</b>
This terminate the list of options. It is useful if the next item on the This terminates the list of options. It is useful if the next item on the
command line starts with a hyphen but is not an option. This allows for the command line starts with a hyphen but is not an option. This allows for the
processing of patterns and filenames that start with hyphens. processing of patterns and filenames that start with hyphens.
</P> </P>
@ -134,6 +159,11 @@ of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
guarantees to have up to 8K of following text available for context output. guarantees to have up to 8K of following text available for context output.
</P> </P>
<P> <P>
<b>-a</b>, <b>--text</b>
Treat binary files as text. This is equivalent to
<b>--binary-files</b>=<i>text</i>.
</P>
<P>
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i> <b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
Output <i>number</i> lines of context before each matching line. If filenames Output <i>number</i> lines of context before each matching line. If filenames
and/or line numbers are being output, a hyphen separator is used instead of a and/or line numbers are being output, a hyphen separator is used instead of a
@ -143,16 +173,36 @@ of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
guarantees to have up to 8K of preceding text available for context output. guarantees to have up to 8K of preceding text available for context output.
</P> </P>
<P> <P>
<b>--binary-files=</b><i>word</i>
Specify how binary files are to be processed. If the word is "binary" (the
default), pattern matching is performed on binary files, but the only output is
"Binary file &#60;name&#62; matches" when a match succeeds. If the word is "text",
which is equivalent to the <b>-a</b> or <b>--text</b> option, binary files are
processed in the same way as any other file. In this case, when a match
succeeds, the output may be binary garbage, which can have nasty effects if
sent to a terminal. If the word is "without-match", which is equivalent to the
<b>-I</b> option, binary files are not processed at all; they are assumed not to
be of interest.
</P>
<P>
<b>--buffer-size=</b><i>number</i>
Set the parameter that controls how much memory is used for buffering files
that are being scanned.
</P>
<P>
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i> <b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
Output <i>number</i> lines of context both before and after each matching line. Output <i>number</i> lines of context both before and after each matching line.
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value. This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
</P> </P>
<P> <P>
<b>-c</b>, <b>--count</b> <b>-c</b>, <b>--count</b>
Do not output individual lines; instead just output a count of the number of Do not output individual lines from the files that are being scanned; instead
lines that would otherwise have been output. If several files are given, a output the number of lines that would otherwise have been shown. If no lines
count is output for each of them. In this mode, the <b>-A</b>, <b>-B</b>, and are selected, the number zero is output. If several files are are being
<b>-C</b> options are ignored. scanned, a count is output for each of them. However, if the
<b>--files-with-matches</b> option is also used, only those files whose counts
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
<b>-B</b>, and <b>-C</b> options are ignored.
</P> </P>
<P> <P>
<b>--colour</b>, <b>--color</b> <b>--colour</b>, <b>--color</b>
@ -169,8 +219,8 @@ coloured. The value (which is optional, see above) may be "never", "always", or
connected to a terminal. More resources are used when colouring is enabled, connected to a terminal. More resources are used when colouring is enabled,
because <b>pcregrep</b> has to search for all possible matches in a line, not because <b>pcregrep</b> has to search for all possible matches in a line, not
just one, in order to colour them all. just one, in order to colour them all.
</P> <br>
<P> <br>
The colour that is used can be specified by setting the environment variable The colour that is used can be specified by setting the environment variable
PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
string of two numbers, separated by a semicolon. They are copied directly into string of two numbers, separated by a semicolon. They are copied directly into
@ -187,10 +237,12 @@ it is to be processed. Valid values are "read" (the default) or "skip"
<P> <P>
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i> <b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
If an input path is a directory, "action" specifies how it is to be processed. If an input path is a directory, "action" specifies how it is to be processed.
Valid values are "read" (the default), "recurse" (equivalent to the <b>-r</b> Valid values are "read" (the default in non-Windows environments, for
option), or "skip" (silently skip the path). In the default case, directories compatibility with GNU grep), "recurse" (equivalent to the <b>-r</b> option), or
are read as if they were ordinary files. In some operating systems the effect "skip" (silently skip the path, the default in Windows environments). In the
of reading a directory like this is an immediate end-of-file. "read" case, directories are read as if they were ordinary files. In some
operating systems the effect of reading a directory like this is an immediate
end-of-file; in others it may provoke an error.
</P> </P>
<P> <P>
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i> <b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
@ -198,59 +250,94 @@ Specify a pattern to be matched. This option can be used multiple times in
order to specify several patterns. It can also be used as a way of specifying a order to specify several patterns. It can also be used as a way of specifying a
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
pattern is taken from the command line; all arguments are treated as file pattern is taken from the command line; all arguments are treated as file
names. There is an overall maximum of 100 patterns. They are applied to each names. There is no limit to the number of patterns. They are applied to each
line in the order in which they are defined until one matches (or fails to line in the order in which they are defined until one matches.
match if <b>-v</b> is used). If <b>-f</b> is used with <b>-e</b>, the command line <br>
patterns are matched first, followed by the patterns from the file, independent <br>
of the order in which these options are specified. Note that multiple use of If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
<b>-e</b> is not the same as a single pattern with alternatives. For example, followed by the patterns from the file(s), independent of the order in which
X|Y finds the first character in a line that is X or Y, whereas if the two these options are specified. Note that multiple use of <b>-e</b> is not the same
patterns are given separately, <b>pcregrep</b> finds X if it is present, even if as a single pattern with alternatives. For example, X|Y finds the first
it follows Y in the line. It finds Y only if there is no X in the line. This character in a line that is X or Y, whereas if the two patterns are given
really matters only if you are using <b>-o</b> to show the part(s) of the line separately, with X first, <b>pcregrep</b> finds X if it is present, even if it
that matched. follows Y in the line. It finds Y only if there is no X in the line. This
matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
of the line that matched.
</P> </P>
<P> <P>
<b>--exclude</b>=<i>pattern</i> <b>--exclude</b>=<i>pattern</i>
When <b>pcregrep</b> is searching the files in a directory as a consequence of Files (but not directories) whose names match the pattern are skipped without
the <b>-r</b> (recursive search) option, any regular files whose names match the being processed. This applies to all files, whether listed on the command line,
pattern are excluded. Subdirectories are not excluded by this option; they are obtained from <b>--file-list</b>, or by scanning a directory. The pattern is a
searched recursively, subject to the <b>--exclude_dir</b> and PCRE regular expression, and is matched against the final component of the file
<b>--include_dir</b> options. The pattern is a PCRE regular expression, and is name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
matched against the final component of the file name (not the entire path). If apply to this pattern. The option may be given any number of times in order to
a file name matches both <b>--include</b> and <b>--exclude</b>, it is excluded. specify multiple patterns. If a file name matches both an <b>--include</b>
There is no short form for this option. and an <b>--exclude</b> pattern, it is excluded. There is no short form for this
option.
</P> </P>
<P> <P>
<b>--exclude_dir</b>=<i>pattern</i> <b>--exclude-from=</b><i>filename</i>
When <b>pcregrep</b> is searching the contents of a directory as a consequence Treat each non-empty line of the file as the data for an <b>--exclude</b>
of the <b>-r</b> (recursive search) option, any subdirectories whose names match option. What constitutes a newline when reading the file is the operating
the pattern are excluded. (Note that the \fP--exclude\fP option does not affect system's default. The <b>--newline</b> option has no effect on this option. This
subdirectories.) The pattern is a PCRE regular expression, and is matched option may be given more than once in order to specify a number of files to
against the final component of the name (not the entire path). If a read.
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it </P>
is excluded. There is no short form for this option. <P>
<b>--exclude-dir</b>=<i>pattern</i>
Directories whose names match the pattern are skipped without being processed,
whatever the setting of the <b>--recursive</b> option. This applies to all
directories, whether listed on the command line, obtained from
<b>--file-list</b>, or by scanning a parent directory. The pattern is a PCRE
regular expression, and is matched against the final component of the directory
name, not the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not
apply to this pattern. The option may be given any number of times in order to
specify more than one pattern. If a directory matches both <b>--include-dir</b>
and <b>--exclude-dir</b>, it is excluded. There is no short form for this
option.
</P> </P>
<P> <P>
<b>-F</b>, <b>--fixed-strings</b> <b>-F</b>, <b>--fixed-strings</b>
Interpret each pattern as a list of fixed strings, separated by newlines, Interpret each data-matching pattern as a list of fixed strings, separated by
instead of as a regular expression. The <b>-w</b> (match as a word) and <b>-x</b> newlines, instead of as a regular expression. What constitutes a newline for
(match whole line) options can be used with <b>-F</b>. They apply to each of the this purpose is controlled by the <b>--newline</b> option. The <b>-w</b> (match
fixed strings. A line is selected if any of the fixed strings are found in it as a word) and <b>-x</b> (match whole line) options can be used with <b>-F</b>.
(subject to <b>-w</b> or <b>-x</b>, if present). They apply to each of the fixed strings. A line is selected if any of the fixed
strings are found in it (subject to <b>-w</b> or <b>-x</b>, if present). This
option applies only to the patterns that are matched against the contents of
files; it does not apply to patterns specified by any of the <b>--include</b> or
<b>--exclude</b> options.
</P> </P>
<P> <P>
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i> <b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
Read a number of patterns from the file, one per line, and match them against Read patterns from the file, one per line, and match them against
each line of input. A data line is output if any of the patterns match it. The each line of input. What constitutes a newline when reading the file is the
filename can be given as "-" to refer to the standard input. When <b>-f</b> is operating system's default. The <b>--newline</b> option has no effect on this
used, patterns specified on the command line using <b>-e</b> may also be option. Trailing white space is removed from each line, and blank lines are
present; they are tested before the file's patterns. However, no other pattern ignored. An empty file contains no patterns and therefore matches nothing. See
is taken from the command line; all arguments are treated as file names. There also the comments about multiple patterns versus a single pattern with
is an overall maximum of 100 patterns. Trailing white space is removed from alternatives in the description of <b>-e</b> above.
each line, and blank lines are ignored. An empty file contains no patterns and <br>
therefore matches nothing. See also the comments about multiple patterns versus <br>
a single pattern with alternatives in the description of <b>-e</b> above. If this option is given more than once, all the specified files are
read. A data line is output if any of the patterns match it. A filename can
be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
specified on the command line using <b>-e</b> may also be present; they are
tested before the file's patterns. However, no other pattern is taken from the
command line; all arguments are treated as the names of paths to be searched.
</P>
<P>
<b>--file-list</b>=<i>filename</i>
Read a list of files and/or directories that are to be scanned from the given
file, one per line. Trailing white space is removed from each line, and blank
lines are ignored. These paths are processed before any that are listed on the
command line. The filename can be given as "-" to refer to the standard input.
If <b>--file</b> and <b>--file-list</b> are both specified as "-", patterns are
read first. This is useful only when the standard input is a terminal, from
which further lines (the list of files) can be read after an end-of-file
indication. If this option is given more than once, all the specified files are
read.
</P> </P>
<P> <P>
<b>--file-offsets</b> <b>--file-offsets</b>
@ -279,7 +366,13 @@ If a line number is also being output, it follows the file name.
<P> <P>
<b>--help</b> <b>--help</b>
Output a help message, giving brief details of the command options and file Output a help message, giving brief details of the command options and file
type support, and then exit. type support, and then exit. Anything else on the command line is
ignored.
</P>
<P>
<b>-I</b>
Treat binary files as never matching. This is equivalent to
<b>--binary-files</b>=<i>without-match</i>.
</P> </P>
<P> <P>
<b>-i</b>, <b>--ignore-case</b> <b>-i</b>, <b>--ignore-case</b>
@ -287,24 +380,35 @@ Ignore upper/lower case distinctions during comparisons.
</P> </P>
<P> <P>
<b>--include</b>=<i>pattern</i> <b>--include</b>=<i>pattern</i>
When <b>pcregrep</b> is searching the files in a directory as a consequence of If any <b>--include</b> patterns are specified, the only files that are
the <b>-r</b> (recursive search) option, only those regular files whose names processed are those that match one of the patterns (and do not match an
match the pattern are included. Subdirectories are always included and searched <b>--exclude</b> pattern). This option does not affect directories, but it
recursively, subject to the \fP--include_dir\fP and <b>--exclude_dir</b> applies to all files, whether listed on the command line, obtained from
options. The pattern is a PCRE regular expression, and is matched against the <b>--file-list</b>, or by scanning a directory. The pattern is a PCRE regular
final component of the file name (not the entire path). If a file name matches expression, and is matched against the final component of the file name, not
both <b>--include</b> and <b>--exclude</b>, it is excluded. There is no short the entire path. The <b>-F</b>, <b>-w</b>, and <b>-x</b> options do not apply to
form for this option. this pattern. The option may be given any number of times. If a file name
matches both an <b>--include</b> and an <b>--exclude</b> pattern, it is excluded.
There is no short form for this option.
</P> </P>
<P> <P>
<b>--include_dir</b>=<i>pattern</i> <b>--include-from=</b><i>filename</i>
When <b>pcregrep</b> is searching the contents of a directory as a consequence Treat each non-empty line of the file as the data for an <b>--include</b>
of the <b>-r</b> (recursive search) option, only those subdirectories whose option. What constitutes a newline for this purpose is the operating system's
names match the pattern are included. (Note that the <b>--include</b> option default. The <b>--newline</b> option has no effect on this option. This option
does not affect subdirectories.) The pattern is a PCRE regular expression, and may be given any number of times; all the files are read.
is matched against the final component of the name (not the entire path). If a </P>
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it <P>
is excluded. There is no short form for this option. <b>--include-dir</b>=<i>pattern</i>
If any <b>--include-dir</b> patterns are specified, the only directories that
are processed are those that match one of the patterns (and do not match an
<b>--exclude-dir</b> pattern). This applies to all directories, whether listed
on the command line, obtained from <b>--file-list</b>, or by scanning a parent
directory. The pattern is a PCRE regular expression, and is matched against the
final component of the directory name, not the entire path. The <b>-F</b>,
<b>-w</b>, and <b>-x</b> options do not apply to this pattern. The option may be
given any number of times. If a directory matches both <b>--include-dir</b> and
<b>--exclude-dir</b>, it is excluded. There is no short form for this option.
</P> </P>
<P> <P>
<b>-L</b>, <b>--files-without-match</b> <b>-L</b>, <b>--files-without-match</b>
@ -316,8 +420,11 @@ output once, on a separate line.
<b>-l</b>, <b>--files-with-matches</b> <b>-l</b>, <b>--files-with-matches</b>
Instead of outputting lines from the files, just output the names of the files Instead of outputting lines from the files, just output the names of the files
containing lines that would have been output. Each file name is output containing lines that would have been output. Each file name is output
once, on a separate line. Searching stops as soon as a matching line is found once, on a separate line. Searching normally stops as soon as a matching line
in a file. is found in a file. However, if the <b>-c</b> (count) option is also used,
matching continues in order to obtain the correct count, and those files that
have at least one match are listed along with their counts. Using this option
with <b>-c</b> is a way of suppressing the listing of files with no matches.
</P> </P>
<P> <P>
<b>--label</b>=<i>name</i> <b>--label</b>=<i>name</i>
@ -326,6 +433,17 @@ are being output. If not supplied, "(standard input)" is used. There is no
short form for this option. short form for this option.
</P> </P>
<P> <P>
<b>--line-buffered</b>
When this option is given, input is read and processed line by line, and the
output is flushed after each write. By default, input is read in large chunks,
unless <b>pcregrep</b> can determine that it is reading from a terminal (which
is currently possible only in Unix-like environments). Output to terminal is
normally automatically flushed by the operating system. This option can be
useful when the input or output is attached to a pipe and you do not want
<b>pcregrep</b> to buffer up large amounts of data. However, its use will affect
performance, and the <b>-M</b> (multiline) option ceases to work.
</P>
<P>
<b>--line-offsets</b> <b>--line-offsets</b>
Instead of showing lines or parts of lines that match, show each match as a Instead of showing lines or parts of lines that match, show each match as a
line number, the offset from the start of the line, and a length. The line line number, the offset from the start of the line, and a length. The line
@ -343,27 +461,62 @@ locale is specified, the PCRE library's default (usually the "C" locale) is
used. There is no short form for this option. used. There is no short form for this option.
</P> </P>
<P> <P>
<b>--match-limit</b>=<i>number</i>
Processing some regular expression patterns can require a very large amount of
memory, leading in some cases to a program crash if not enough is available.
Other patterns may take a very long time to search for all possible matching
strings. The <b>pcre_exec()</b> function that is called by <b>pcregrep</b> to do
the matching has two parameters that can limit the resources that it uses.
<br>
<br>
The <b>--match-limit</b> option provides a means of limiting resource usage
when processing patterns that are not going to match, but which have a very
large number of possibilities in their search trees. The classic example is a
pattern that uses nested unlimited repeats. Internally, PCRE uses a function
called <b>match()</b> which it calls repeatedly (sometimes recursively). The
limit set by <b>--match-limit</b> is imposed on the number of times this
function is called during a match, which has the effect of limiting the amount
of backtracking that can take place.
<br>
<br>
The <b>--recursion-limit</b> option is similar to <b>--match-limit</b>, but
instead of limiting the total number of times that <b>match()</b> is called, it
limits the depth of recursive calls, which in turn limits the amount of memory
that can be used. The recursion depth is a smaller number than the total number
of calls, because not all calls to <b>match()</b> are recursive. This limit is
of use only if it is set smaller than <b>--match-limit</b>.
<br>
<br>
There are no short forms for these options. The default settings are specified
when the PCRE library is compiled, with the default default being 10 million.
</P>
<P>
<b>-M</b>, <b>--multiline</b> <b>-M</b>, <b>--multiline</b>
Allow patterns to match more than one line. When this option is given, patterns Allow patterns to match more than one line. When this option is given, patterns
may usefully contain literal newline characters and internal occurrences of ^ may usefully contain literal newline characters and internal occurrences of ^
and $ characters. The output for any one match may consist of more than one and $ characters. The output for a successful match may consist of more than
line. When this option is set, the PCRE library is called in "multiline" mode. one line, the last of which is the one in which the match ended. If the matched
string ends with a newline sequence the output ends at the end of that line.
<br>
<br>
When this option is set, the PCRE library is called in "multiline" mode.
There is a limit to the number of lines that can be matched, imposed by the way There is a limit to the number of lines that can be matched, imposed by the way
that <b>pcregrep</b> buffers the input file as it scans it. However, that <b>pcregrep</b> buffers the input file as it scans it. However,
<b>pcregrep</b> ensures that at least 8K characters or the rest of the document <b>pcregrep</b> ensures that at least 8K characters or the rest of the document
(whichever is the shorter) are available for forward matching, and similarly (whichever is the shorter) are available for forward matching, and similarly
the previous 8K characters (or all the previous characters, if fewer than 8K) the previous 8K characters (or all the previous characters, if fewer than 8K)
are guaranteed to be available for lookbehind assertions. are guaranteed to be available for lookbehind assertions. This option does not
work when input is read line by line (see \fP--line-buffered\fP.)
</P> </P>
<P> <P>
<b>-N</b> <i>newline-type</i>, <b>--newline=</b><i>newline-type</i> <b>-N</b> <i>newline-type</i>, <b>--newline</b>=<i>newline-type</i>
The PCRE library supports five different conventions for indicating The PCRE library supports five different conventions for indicating
the ends of lines. They are the single-character sequences CR (carriage return) the ends of lines. They are the single-character sequences CR (carriage return)
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention, and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
which recognizes any of the preceding three types, and an "any" convention, in which recognizes any of the preceding three types, and an "any" convention, in
which any Unicode line ending sequence is assumed to end a line. The Unicode which any Unicode line ending sequence is assumed to end a line. The Unicode
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
(formfeed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and (form feed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
PS (paragraph separator, U+2029). PS (paragraph separator, U+2029).
<br> <br>
<br> <br>
@ -371,10 +524,13 @@ When the PCRE library is built, a default line-ending sequence is specified.
This is normally the standard sequence for the operating system. Unless This is normally the standard sequence for the operating system. Unless
otherwise specified by this option, <b>pcregrep</b> uses the library's default. otherwise specified by this option, <b>pcregrep</b> uses the library's default.
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
makes it possible to use <b>pcregrep</b> on files that have come from other makes it possible to use <b>pcregrep</b> to scan files that have come from other
environments without having to modify their line endings. If the data that is environments without having to modify their line endings. If the data that is
being scanned does not agree with the convention set by this option, being scanned does not agree with the convention set by this option,
<b>pcregrep</b> may behave in strange ways. <b>pcregrep</b> may behave in strange ways. Note that this option does not
apply to files specified by the <b>-f</b>, <b>--exclude-from</b>, or
<b>--include-from</b> options, which are expected to use the operating system's
standard newline sequence.
</P> </P>
<P> <P>
<b>-n</b>, <b>--line-number</b> <b>-n</b>, <b>--line-number</b>
@ -384,14 +540,46 @@ output, it precedes the line number. This option is forced if
<b>--line-offsets</b> is used. <b>--line-offsets</b> is used.
</P> </P>
<P> <P>
<b>--no-jit</b>
If the PCRE library is built with support for just-in-time compiling (which
speeds up matching), <b>pcregrep</b> automatically makes use of this, unless it
was explicitly disabled at build time. This option can be used to disable the
use of JIT at run time. It is provided for testing and working round problems.
It should never be needed in normal use.
</P>
<P>
<b>-o</b>, <b>--only-matching</b> <b>-o</b>, <b>--only-matching</b>
Show only the part of the line that matched a pattern. In this mode, no Show only the part of the line that matched a pattern instead of the whole
context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
ignored. If there is more than one match in a line, each of them is shown <b>-C</b> options are ignored. If there is more than one match in a line, each
separately. If <b>-o</b> is combined with <b>-v</b> (invert the sense of the of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
match to find non-matching lines), no output is generated, but the return code sense of the match to find non-matching lines), no output is generated, but the
is set appropriately. This option is mutually exclusive with return code is set appropriately. If the matched portion of the line is empty,
<b>--file-offsets</b> and <b>--line-offsets</b>. nothing is output unless the file name or line number are being printed, in
which case they are shown on an otherwise empty line. This option is mutually
exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
</P>
<P>
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
Show only the part of the line that matched the capturing parentheses of the
given number. Up to 32 capturing parentheses are supported, and -o0 is
equivalent to <b>-o</b> without a number. Because these options can be given
without an argument (see above), if an argument is present, it must be given in
the same shell item, for example, -o3 or --only-matching=2. The comments given
for the non-argument case above also apply to this case. If the specified
capturing parentheses do not exist in the pattern, or were not set in the
match, nothing is output unless the file name or line number are being printed.
<br>
<br>
If this option is given multiple times, multiple substrings are output, in the
order the options are given. For example, -o3 -o1 -o3 causes the substrings
matched by capturing parentheses 3 and 1 and then 3 again to be output. By
default, there is no separator (but see the next option).
</P>
<P>
<b>--om-separator</b>=<i>text</i>
Specify a separating string for multiple occurrences of <b>-o</b>. The default
is an empty string. Separating strings are never coloured.
</P> </P>
<P> <P>
<b>-q</b>, <b>--quiet</b> <b>-q</b>, <b>--quiet</b>
@ -407,6 +595,10 @@ immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
option to "recurse". option to "recurse".
</P> </P>
<P> <P>
<b>--recursion-limit</b>=<i>number</i>
See <b>--match-limit</b> above.
</P>
<P>
<b>-s</b>, <b>--no-messages</b> <b>-s</b>, <b>--no-messages</b>
Suppress error messages about non-existent or unreadable files. Such files are Suppress error messages about non-existent or unreadable files. Such files are
quietly skipped. However, the return code is still 2, even if matches were quietly skipped. However, the return code is still 2, even if matches were
@ -415,13 +607,15 @@ found in other files.
<P> <P>
<b>-u</b>, <b>--utf-8</b> <b>-u</b>, <b>--utf-8</b>
Operate in UTF-8 mode. This option is available only if PCRE has been compiled Operate in UTF-8 mode. This option is available only if PCRE has been compiled
with UTF-8 support. Both patterns and subject lines must be valid strings of with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
UTF-8 characters. <b>--include</b> options) and all subject lines that are scanned must be valid
strings of UTF-8 characters.
</P> </P>
<P> <P>
<b>-V</b>, <b>--version</b> <b>-V</b>, <b>--version</b>
Write the version numbers of <b>pcregrep</b> and the PCRE library that is being Write the version numbers of <b>pcregrep</b> and the PCRE library to the
used to the standard error stream. standard output and then exit. Anything else on the command line is
ignored.
</P> </P>
<P> <P>
<b>-v</b>, <b>--invert-match</b> <b>-v</b>, <b>--invert-match</b>
@ -431,50 +625,74 @@ the patterns are the ones that are found.
<P> <P>
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b> <b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
Force the patterns to match only whole words. This is equivalent to having \b Force the patterns to match only whole words. This is equivalent to having \b
at the start and end of the pattern. at the start and end of the pattern. This option applies only to the patterns
that are matched against the contents of files; it does not apply to patterns
specified by any of the <b>--include</b> or <b>--exclude</b> options.
</P> </P>
<P> <P>
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b> <b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
Force the patterns to be anchored (each must start matching at the beginning of Force the patterns to be anchored (each must start matching at the beginning of
a line) and in addition, require them to match entire lines. This is a line) and in addition, require them to match entire lines. This is equivalent
equivalent to having ^ and $ characters at the start and end of each to having ^ and $ characters at the start and end of each alternative branch in
alternative branch in every pattern. every pattern. This option applies only to the patterns that are matched
against the contents of files; it does not apply to patterns specified by any
of the <b>--include</b> or <b>--exclude</b> options.
</P> </P>
<br><a name="SEC5" href="#TOC1">ENVIRONMENT VARIABLES</a><br> <br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
<P> <P>
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
order, for a locale. The first one that is set is used. This can be overridden order, for a locale. The first one that is set is used. This can be overridden
by the <b>--locale</b> option. If no locale is set, the PCRE library's default by the <b>--locale</b> option. If no locale is set, the PCRE library's default
(usually the "C" locale) is used. (usually the "C" locale) is used.
</P> </P>
<br><a name="SEC6" href="#TOC1">NEWLINES</a><br> <br><a name="SEC7" href="#TOC1">NEWLINES</a><br>
<P> <P>
The <b>-N</b> (<b>--newline</b>) option allows <b>pcregrep</b> to scan files with The <b>-N</b> (<b>--newline</b>) option allows <b>pcregrep</b> to scan files with
different newline conventions from the default. However, the setting of this different newline conventions from the default. Any parts of the input files
option does not affect the way in which <b>pcregrep</b> writes information to that are written to the standard output are copied identically, with whatever
the standard error and output streams. It uses the string "\n" in C newline sequences they have in the input. However, the setting of this option
<b>printf()</b> calls to indicate newlines, relying on the C I/O library to does not affect the interpretation of files specified by the <b>-f</b>,
convert this to an appropriate sequence if the output is sent to a file. <b>--exclude-from</b>, or <b>--include-from</b> options, which are assumed to use
the operating system's standard newline sequence, nor does it affect the way in
which <b>pcregrep</b> writes informational messages to the standard error and
output streams. For these it uses the string "\n" to indicate newlines,
relying on the C I/O library to convert this to an appropriate sequence.
</P> </P>
<br><a name="SEC7" href="#TOC1">OPTIONS COMPATIBILITY</a><br> <br><a name="SEC8" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
<P> <P>
The majority of short and long forms of <b>pcregrep</b>'s options are the same Many of the short and long forms of <b>pcregrep</b>'s options are the same
as in the GNU <b>grep</b> program. Any long option of the form as in the GNU <b>grep</b> program. Any long option of the form
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b> <b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
(PCRE terminology). However, the <b>--locale</b>, <b>-M</b>, <b>--multiline</b>, (PCRE terminology). However, the <b>--file-list</b>, <b>--file-offsets</b>,
<b>-u</b>, and <b>--utf-8</b> options are specific to <b>pcregrep</b>. <b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
<b>--recursion-limit</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
<b>pcregrep</b>, as is the use of the <b>--only-matching</b> option with a
capturing parentheses number.
</P> </P>
<br><a name="SEC8" href="#TOC1">OPTIONS WITH DATA</a><br> <P>
Although most of the common options work the same way, a few are different in
<b>pcregrep</b>. For example, the <b>--include</b> option's argument is a glob
for GNU <b>grep</b>, but a regular expression for <b>pcregrep</b>. If both the
<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
without counts, but <b>pcregrep</b> gives the counts.
</P>
<br><a name="SEC9" href="#TOC1">OPTIONS WITH DATA</a><br>
<P> <P>
There are four different ways in which an option with data can be specified. There are four different ways in which an option with data can be specified.
If a short form option is used, the data may follow immediately, or in the next If a short form option is used, the data may follow immediately, or (with one
command line item. For example: exception) in the next command line item. For example:
<pre> <pre>
-f/some/file -f/some/file
-f /some/file -f /some/file
</pre> </pre>
The exception is the <b>-o</b> option, which may appear with or without data.
Because of this, if data is present, it must follow immediately in the same
item, for example -o3.
</P>
<P>
If a long form option is used, the data may appear in the same command line If a long form option is used, the data may appear in the same command line
item, separated by an equals character, or (with one exception) it may appear item, separated by an equals character, or (with two exceptions) it may appear
in the next command line item. For example: in the next command line item. For example:
<pre> <pre>
--file=/some/file --file=/some/file
@ -486,12 +704,12 @@ separate the file name from the option, because the shell does not treat ~
specially unless it is at the start of an item. specially unless it is at the start of an item.
</P> </P>
<P> <P>
The exception to the above is the <b>--colour</b> (or <b>--color</b>) option, The exceptions to the above are the <b>--colour</b> (or <b>--color</b>) and
for which the data is optional. If this option does have data, it must be given <b>--only-matching</b> options, for which the data is optional. If one of these
in the first form, using an equals character. Otherwise it will be assumed that options does have data, it must be given in the first form, using an equals
it has no data. character. Otherwise <b>pcregrep</b> will assume that it has no data.
</P> </P>
<br><a name="SEC9" href="#TOC1">MATCHING ERRORS</a><br> <br><a name="SEC10" href="#TOC1">MATCHING ERRORS</a><br>
<P> <P>
It is possible to supply a regular expression that takes a very long time to It is possible to supply a regular expression that takes a very long time to
fail to match certain lines. Such patterns normally involve nested indefinite fail to match certain lines. Such patterns normally involve nested indefinite
@ -501,19 +719,25 @@ in these circumstances. If this happens, <b>pcregrep</b> outputs an error
message and the line that caused the problem to the standard error stream. If message and the line that caused the problem to the standard error stream. If
there are more than 20 such errors, <b>pcregrep</b> gives up. there are more than 20 such errors, <b>pcregrep</b> gives up.
</P> </P>
<br><a name="SEC10" href="#TOC1">DIAGNOSTICS</a><br> <P>
The <b>--match-limit</b> option of <b>pcregrep</b> can be used to set the overall
resource limit; there is a second option called <b>--recursion-limit</b> that
sets a limit on the amount of memory (usually stack) that is used (see the
discussion of these options above).
</P>
<br><a name="SEC11" href="#TOC1">DIAGNOSTICS</a><br>
<P> <P>
Exit status is 0 if any matches were found, 1 if no matches were found, and 2 Exit status is 0 if any matches were found, 1 if no matches were found, and 2
for syntax errors and non-existent or inacessible files (even if matches were for syntax errors, overlong lines, non-existent or inaccessible files (even if
found in other files) or too many matching errors. Using the <b>-s</b> option to matches were found in other files) or too many matching errors. Using the
suppress error messages about inaccessble files does not affect the return <b>-s</b> option to suppress error messages about inaccessible files does not
code. affect the return code.
</P> </P>
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br> <br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
<P> <P>
<b>pcrepattern</b>(3), <b>pcretest</b>(1). <b>pcrepattern</b>(3), <b>pcresyntax</b>(3), <b>pcretest</b>(1).
</P> </P>
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br> <br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
@ -522,11 +746,11 @@ University Computing Service
Cambridge CB2 3QH, England. Cambridge CB2 3QH, England.
<br> <br>
</P> </P>
<br><a name="SEC13" href="#TOC1">REVISION</a><br> <br><a name="SEC14" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 01 March 2009 Last updated: 13 September 2012
<br> <br>
Copyright &copy; 1997-2009 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -0,0 +1,458 @@
<html>
<head>
<title>pcrejit specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrejit man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE JUST-IN-TIME COMPILER SUPPORT</a>
<li><a name="TOC2" href="#SEC2">8-BIT, 16-BIT AND 32-BIT SUPPORT</a>
<li><a name="TOC3" href="#SEC3">AVAILABILITY OF JIT SUPPORT</a>
<li><a name="TOC4" href="#SEC4">SIMPLE USE OF JIT</a>
<li><a name="TOC5" href="#SEC5">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a>
<li><a name="TOC6" href="#SEC6">RETURN VALUES FROM JIT EXECUTION</a>
<li><a name="TOC7" href="#SEC7">SAVING AND RESTORING COMPILED PATTERNS</a>
<li><a name="TOC8" href="#SEC8">CONTROLLING THE JIT STACK</a>
<li><a name="TOC9" href="#SEC9">JIT STACK FAQ</a>
<li><a name="TOC10" href="#SEC10">EXAMPLE CODE</a>
<li><a name="TOC11" href="#SEC11">JIT FAST PATH API</a>
<li><a name="TOC12" href="#SEC12">SEE ALSO</a>
<li><a name="TOC13" href="#SEC13">AUTHOR</a>
<li><a name="TOC14" href="#SEC14">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE JUST-IN-TIME COMPILER SUPPORT</a><br>
<P>
Just-in-time compiling is a heavyweight optimization that can greatly speed up
pattern matching. However, it comes at the cost of extra processing before the
match is performed. Therefore, it is of most benefit when the same pattern is
going to be matched many times. This does not necessarily mean many calls of a
matching function; if the pattern is not anchored, matching attempts may take
place many times at various positions in the subject, even for a single call.
Therefore, if the subject string is very long, it may still pay to use JIT for
one-off matches.
</P>
<P>
JIT support applies only to the traditional Perl-compatible matching function.
It does not apply when the DFA matching function is being used. The code for
this support was written by Zoltan Herczeg.
</P>
<br><a name="SEC2" href="#TOC1">8-BIT, 16-BIT AND 32-BIT SUPPORT</a><br>
<P>
JIT support is available for all of the 8-bit, 16-bit and 32-bit PCRE
libraries. To keep this documentation simple, only the 8-bit interface is
described in what follows. If you are using the 16-bit library, substitute the
16-bit functions and 16-bit structures (for example, <i>pcre16_jit_stack</i>
instead of <i>pcre_jit_stack</i>). If you are using the 32-bit library,
substitute the 32-bit functions and 32-bit structures (for example,
<i>pcre32_jit_stack</i> instead of <i>pcre_jit_stack</i>).
</P>
<br><a name="SEC3" href="#TOC1">AVAILABILITY OF JIT SUPPORT</a><br>
<P>
JIT support is an optional feature of PCRE. The "configure" option --enable-jit
(or equivalent CMake option) must be set when PCRE is built if you want to use
JIT. The support is limited to the following hardware platforms:
<pre>
ARM v5, v7, and Thumb2
Intel x86 32-bit and 64-bit
MIPS 32-bit
Power PC 32-bit and 64-bit
SPARC 32-bit (experimental)
</pre>
If --enable-jit is set on an unsupported platform, compilation fails.
</P>
<P>
A program that is linked with PCRE 8.20 or later can tell if JIT support is
available by calling <b>pcre_config()</b> with the PCRE_CONFIG_JIT option. The
result is 1 when JIT is available, and 0 otherwise. However, a simple program
does not need to check this in order to use JIT. The normal API is implemented
in a way that falls back to the interpretive code if JIT is not available. For
programs that need the best possible performance, there is also a "fast path"
API that is JIT-specific.
</P>
<P>
If your program may sometimes be linked with versions of PCRE that are older
than 8.20, but you want to use JIT when it is available, you can test
the values of PCRE_MAJOR and PCRE_MINOR, or the existence of a JIT macro such
as PCRE_CONFIG_JIT, for compile-time control of your code.
</P>
<br><a name="SEC4" href="#TOC1">SIMPLE USE OF JIT</a><br>
<P>
You have to do two things to make use of the JIT support in the simplest way:
<pre>
(1) Call <b>pcre_study()</b> with the PCRE_STUDY_JIT_COMPILE option for
each compiled pattern, and pass the resulting <b>pcre_extra</b> block to
<b>pcre_exec()</b>.
(2) Use <b>pcre_free_study()</b> to free the <b>pcre_extra</b> block when it is
no longer needed, instead of just freeing it yourself. This ensures that
any JIT data is also freed.
</pre>
For a program that may be linked with pre-8.20 versions of PCRE, you can insert
<pre>
#ifndef PCRE_STUDY_JIT_COMPILE
#define PCRE_STUDY_JIT_COMPILE 0
#endif
</pre>
so that no option is passed to <b>pcre_study()</b>, and then use something like
this to free the study data:
<pre>
#ifdef PCRE_CONFIG_JIT
pcre_free_study(study_ptr);
#else
pcre_free(study_ptr);
#endif
</pre>
PCRE_STUDY_JIT_COMPILE requests the JIT compiler to generate code for complete
matches. If you want to run partial matches using the PCRE_PARTIAL_HARD or
PCRE_PARTIAL_SOFT options of <b>pcre_exec()</b>, you should set one or both of
the following options in addition to, or instead of, PCRE_STUDY_JIT_COMPILE
when you call <b>pcre_study()</b>:
<pre>
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
</pre>
The JIT compiler generates different optimized code for each of the three
modes (normal, soft partial, hard partial). When <b>pcre_exec()</b> is called,
the appropriate code is run if it is available. Otherwise, the pattern is
matched using interpretive code.
</P>
<P>
In some circumstances you may need to call additional functions. These are
described in the section entitled
<a href="#stackcontrol">"Controlling the JIT stack"</a>
below.
</P>
<P>
If JIT support is not available, PCRE_STUDY_JIT_COMPILE etc. are ignored, and
no JIT data is created. Otherwise, the compiled pattern is passed to the JIT
compiler, which turns it into machine code that executes much faster than the
normal interpretive code. When <b>pcre_exec()</b> is passed a <b>pcre_extra</b>
block containing a pointer to JIT code of the appropriate mode (normal or
hard/soft partial), it obeys that code instead of running the interpreter. The
result is identical, but the compiled JIT code runs much faster.
</P>
<P>
There are some <b>pcre_exec()</b> options that are not supported for JIT
execution. There are also some pattern items that JIT cannot handle. Details
are given below. In both cases, execution automatically falls back to the
interpretive code. If you want to know whether JIT was actually used for a
particular match, you should arrange for a JIT callback function to be set up
as described in the section entitled
<a href="#stackcontrol">"Controlling the JIT stack"</a>
below, even if you do not need to supply a non-default JIT stack. Such a
callback function is called whenever JIT code is about to be obeyed. If the
execution options are not right for JIT execution, the callback function is not
obeyed.
</P>
<P>
If the JIT compiler finds an unsupported item, no JIT data is generated. You
can find out if JIT execution is available after studying a pattern by calling
<b>pcre_fullinfo()</b> with the PCRE_INFO_JIT option. A result of 1 means that
JIT compilation was successful. A result of 0 means that JIT support is not
available, or the pattern was not studied with PCRE_STUDY_JIT_COMPILE etc., or
the JIT compiler was not able to handle the pattern.
</P>
<P>
Once a pattern has been studied, with or without JIT, it can be used as many
times as you like for matching different subject strings.
</P>
<br><a name="SEC5" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
<P>
The only <b>pcre_exec()</b> options that are supported for JIT execution are
PCRE_NO_UTF8_CHECK, PCRE_NO_UTF16_CHECK, PCRE_NO_UTF32_CHECK, PCRE_NOTBOL,
PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, PCRE_PARTIAL_HARD, and
PCRE_PARTIAL_SOFT.
</P>
<P>
The unsupported pattern items are:
<pre>
\C match a single byte; not supported in UTF-8 mode
(?Cn) callouts
(*PRUNE) )
(*SKIP) ) backtracking control verbs
(*THEN) )
</pre>
Support for some of these may be added in future.
</P>
<br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT EXECUTION</a><br>
<P>
When a pattern is matched using JIT execution, the return values are the same
as those given by the interpretive <b>pcre_exec()</b> code, with the addition of
one new error code: PCRE_ERROR_JIT_STACKLIMIT. This means that the memory used
for the JIT stack was insufficient. See
<a href="#stackcontrol">"Controlling the JIT stack"</a>
below for a discussion of JIT stack usage. For compatibility with the
interpretive <b>pcre_exec()</b> code, no more than two-thirds of the
<i>ovector</i> argument is used for passing back captured substrings.
</P>
<P>
The error code PCRE_ERROR_MATCHLIMIT is returned by the JIT code if searching a
very large pattern tree goes on for too long, as it is in the same circumstance
when JIT is not used, but the details of exactly what is counted are not the
same. The PCRE_ERROR_RECURSIONLIMIT error code is never returned by JIT
execution.
</P>
<br><a name="SEC7" href="#TOC1">SAVING AND RESTORING COMPILED PATTERNS</a><br>
<P>
The code that is generated by the JIT compiler is architecture-specific, and is
also position dependent. For those reasons it cannot be saved (in a file or
database) and restored later like the bytecode and other data of a compiled
pattern. Saving and restoring compiled patterns is not something many people
do. More detail about this facility is given in the
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
documentation. It should be possible to run <b>pcre_study()</b> on a saved and
restored pattern, and thereby recreate the JIT data, but because JIT
compilation uses significant resources, it is probably not worth doing this;
you might as well recompile the original pattern.
<a name="stackcontrol"></a></P>
<br><a name="SEC8" href="#TOC1">CONTROLLING THE JIT STACK</a><br>
<P>
When the compiled JIT code runs, it needs a block of memory to use as a stack.
By default, it uses 32K on the machine stack. However, some large or
complicated patterns need more than this. The error PCRE_ERROR_JIT_STACKLIMIT
is given when there is not enough stack. Three functions are provided for
managing blocks of memory for use as JIT stacks. There is further discussion
about the use of JIT stacks in the section entitled
<a href="#stackcontrol">"JIT stack FAQ"</a>
below.
</P>
<P>
The <b>pcre_jit_stack_alloc()</b> function creates a JIT stack. Its arguments
are a starting size and a maximum size, and it returns a pointer to an opaque
structure of type <b>pcre_jit_stack</b>, or NULL if there is an error. The
<b>pcre_jit_stack_free()</b> function can be used to free a stack that is no
longer needed. (For the technically minded: the address space is allocated by
mmap or VirtualAlloc.)
</P>
<P>
JIT uses far less memory for recursion than the interpretive code,
and a maximum stack size of 512K to 1M should be more than enough for any
pattern.
</P>
<P>
The <b>pcre_assign_jit_stack()</b> function specifies which stack JIT code
should use. Its arguments are as follows:
<pre>
pcre_extra *extra
pcre_jit_callback callback
void *data
</pre>
The <i>extra</i> argument must be the result of studying a pattern with
PCRE_STUDY_JIT_COMPILE etc. There are three cases for the values of the other
two options:
<pre>
(1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
on the machine stack is used.
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
a valid JIT stack, the result of calling <b>pcre_jit_stack_alloc()</b>.
(3) If <i>callback</i> is not NULL, it must point to a function that is
called with <i>data</i> as an argument at the start of matching, in
order to set up a JIT stack. If the return from the callback
function is NULL, the internal 32K stack is used; otherwise the
return value must be a valid JIT stack, the result of calling
<b>pcre_jit_stack_alloc()</b>.
</pre>
A callback function is obeyed whenever JIT code is about to be run; it is not
obeyed when <b>pcre_exec()</b> is called with options that are incompatible for
JIT execution. A callback function can therefore be used to determine whether a
match operation was executed by JIT or by the interpreter.
</P>
<P>
You may safely use the same JIT stack for more than one pattern (either by
assigning directly or by callback), as long as the patterns are all matched
sequentially in the same thread. In a multithread application, if you do not
specify a JIT stack, or if you assign or pass back NULL from a callback, that
is thread-safe, because each thread has its own machine stack. However, if you
assign or pass back a non-NULL JIT stack, this must be a different stack for
each thread so that the application is thread-safe.
</P>
<P>
Strictly speaking, even more is allowed. You can assign the same non-NULL stack
to any number of patterns as long as they are not used for matching by multiple
threads at the same time. For example, you can assign the same stack to all
compiled patterns, and use a global mutex in the callback to wait until the
stack is available for use. However, this is an inefficient solution, and not
recommended.
</P>
<P>
This is a suggestion for how a multithreaded program that needs to set up
non-default JIT stacks might operate:
<pre>
During thread initalization
thread_local_var = pcre_jit_stack_alloc(...)
During thread exit
pcre_jit_stack_free(thread_local_var)
Use a one-line callback function
return thread_local_var
</pre>
All the functions described in this section do nothing if JIT is not available,
and <b>pcre_assign_jit_stack()</b> does nothing unless the <b>extra</b> argument
is non-NULL and points to a <b>pcre_extra</b> block that is the result of a
successful study with PCRE_STUDY_JIT_COMPILE etc.
<a name="stackfaq"></a></P>
<br><a name="SEC9" href="#TOC1">JIT STACK FAQ</a><br>
<P>
(1) Why do we need JIT stacks?
<br>
<br>
PCRE (and JIT) is a recursive, depth-first engine, so it needs a stack where
the local data of the current node is pushed before checking its child nodes.
Allocating real machine stack on some platforms is difficult. For example, the
stack chain needs to be updated every time if we extend the stack on PowerPC.
Although it is possible, its updating time overhead decreases performance. So
we do the recursion in memory.
</P>
<P>
(2) Why don't we simply allocate blocks of memory with <b>malloc()</b>?
<br>
<br>
Modern operating systems have a nice feature: they can reserve an address space
instead of allocating memory. We can safely allocate memory pages inside this
address space, so the stack could grow without moving memory data (this is
important because of pointers). Thus we can allocate 1M address space, and use
only a single memory page (usually 4K) if that is enough. However, we can still
grow up to 1M anytime if needed.
</P>
<P>
(3) Who "owns" a JIT stack?
<br>
<br>
The owner of the stack is the user program, not the JIT studied pattern or
anything else. The user program must ensure that if a stack is used by
<b>pcre_exec()</b>, (that is, it is assigned to the pattern currently running),
that stack must not be used by any other threads (to avoid overwriting the same
memory area). The best practice for multithreaded programs is to allocate a
stack for each thread, and return this stack through the JIT callback function.
</P>
<P>
(4) When should a JIT stack be freed?
<br>
<br>
You can free a JIT stack at any time, as long as it will not be used by
<b>pcre_exec()</b> again. When you assign the stack to a pattern, only a pointer
is set. There is no reference counting or any other magic. You can free the
patterns and stacks in any order, anytime. Just <i>do not</i> call
<b>pcre_exec()</b> with a pattern pointing to an already freed stack, as that
will cause SEGFAULT. (Also, do not free a stack currently used by
<b>pcre_exec()</b> in another thread). You can also replace the stack for a
pattern at any time. You can even free the previous stack before assigning a
replacement.
</P>
<P>
(5) Should I allocate/free a stack every time before/after calling
<b>pcre_exec()</b>?
<br>
<br>
No, because this is too costly in terms of resources. However, you could
implement some clever idea which release the stack if it is not used in let's
say two minutes. The JIT callback can help to achieve this without keeping a
list of the currently JIT studied patterns.
</P>
<P>
(6) OK, the stack is for long term memory allocation. But what happens if a
pattern causes stack overflow with a stack of 1M? Is that 1M kept until the
stack is freed?
<br>
<br>
Especially on embedded sytems, it might be a good idea to release memory
sometimes without freeing the stack. There is no API for this at the moment.
Probably a function call which returns with the currently allocated memory for
any stack and another which allows releasing memory (shrinking the stack) would
be a good idea if someone needs this.
</P>
<P>
(7) This is too much of a headache. Isn't there any better solution for JIT
stack handling?
<br>
<br>
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
out this complicated API.
</P>
<br><a name="SEC10" href="#TOC1">EXAMPLE CODE</a><br>
<P>
This is a single-threaded example that specifies a JIT stack without using a
callback.
<pre>
int rc;
int ovector[30];
pcre *re;
pcre_extra *extra;
pcre_jit_stack *jit_stack;
re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
/* Check for errors */
extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
/* Check for error (NULL) */
pcre_assign_jit_stack(extra, NULL, jit_stack);
rc = pcre_exec(re, extra, subject, length, 0, 0, ovector, 30);
/* Check results */
pcre_free(re);
pcre_free_study(extra);
pcre_jit_stack_free(jit_stack);
</PRE>
</P>
<br><a name="SEC11" href="#TOC1">JIT FAST PATH API</a><br>
<P>
Because the API described above falls back to interpreted execution when JIT is
not available, it is convenient for programs that are written for general use
in many environments. However, calling JIT via <b>pcre_exec()</b> does have a
performance impact. Programs that are written for use where JIT is known to be
available, and which need the best possible performance, can instead use a
"fast path" API to call JIT execution directly instead of calling
<b>pcre_exec()</b> (obviously only for patterns that have been successfully
studied by JIT).
</P>
<P>
The fast path function is called <b>pcre_jit_exec()</b>, and it takes exactly
the same arguments as <b>pcre_exec()</b>, plus one additional argument that
must point to a JIT stack. The JIT stack arrangements described above do not
apply. The return values are the same as for <b>pcre_exec()</b>.
</P>
<P>
When you call <b>pcre_exec()</b>, as well as testing for invalid options, a
number of other sanity checks are performed on the arguments. For example, if
the subject pointer is NULL, or its length is negative, an immediate error is
given. Also, unless PCRE_NO_UTF[8|16|32] is set, a UTF subject string is tested
for validity. In the interests of speed, these checks do not happen on the JIT
fast path, and if invalid data is passed, the result is undefined.
</P>
<P>
Bypassing the sanity checks and the <b>pcre_exec()</b> wrapping can give
speedups of more than 10%.
</P>
<br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcreapi</b>(3)
</P>
<br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel (FAQ by Zoltan Herczeg)
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC14" href="#TOC1">REVISION</a><br>
<P>
Last updated: 31 October 2012
<br>
Copyright &copy; 1997-2012 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -0,0 +1,86 @@
<html>
<head>
<title>pcrelimits specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrelimits man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SIZE AND OTHER LIMITATIONS
</b><br>
<P>
There are some size limitations in PCRE but it is hoped that they will never in
practice be relevant.
</P>
<P>
The maximum length of a compiled pattern is approximately 64K data units (bytes
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
the 32-bit library) if PCRE is compiled with the default internal linkage size
of 2 bytes. If you want to process regular expressions that are truly enormous,
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
the source distribution and the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation for details. In these cases the limit is substantially larger.
However, the speed of execution is slower.
</P>
<P>
All values in repeating quantifiers must be less than 65536.
</P>
<P>
There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns.
</P>
<P>
There is a limit to the number of forward references to subsequent subpatterns
of around 200,000. Repeated forward references with fixed upper limits, for
example, (?2){0,100} when subpattern number 2 is to the right, are included in
the count. There is no limit to the number of backward references.
</P>
<P>
The maximum length of name for a named subpattern is 32 characters, and the
maximum number of named subpatterns is 10000.
</P>
<P>
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
</P>
<P>
The maximum length of a subject string is the largest positive number that an
integer variable can hold. However, when using the traditional matching
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
This means that the available stack space may limit the size of a subject
string that can be processed by certain patterns. For a discussion of stack
issues, see the
<a href="pcrestack.html"><b>pcrestack</b></a>
documentation.
</P>
<br><b>
AUTHOR
</b><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><b>
REVISION
</b><br>
<P>
Last updated: 04 May 2012
<br>
Copyright &copy; 1997-2012 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -26,13 +26,17 @@ man page, in case the conversion went wrong.
<P> <P>
This document describes the two different algorithms that are available in PCRE This document describes the two different algorithms that are available in PCRE
for matching a compiled regular expression against a given subject string. The for matching a compiled regular expression against a given subject string. The
"standard" algorithm is the one provided by the <b>pcre_exec()</b> function. "standard" algorithm is the one provided by the <b>pcre_exec()</b>,
This works in the same was as Perl's matching function, and provides a <b>pcre16_exec()</b> and <b>pcre32_exec()</b> functions. These work in the same
Perl-compatible matching operation. as as Perl's matching function, and provide a Perl-compatible matching operation.
The just-in-time (JIT) optimization that is described in the
<a href="pcrejit.html"><b>pcrejit</b></a>
documentation is compatible with these functions.
</P> </P>
<P> <P>
An alternative algorithm is provided by the <b>pcre_dfa_exec()</b> function; An alternative algorithm is provided by the <b>pcre_dfa_exec()</b>,
this operates in a different way, and is not Perl-compatible. It has advantages <b>pcre16_dfa_exec()</b> and <b>pcre32_dfa_exec()</b> functions; they operate in
a different way, and are not Perl-compatible. This alternative has advantages
and disadvantages compared with the standard algorithm, and these are described and disadvantages compared with the standard algorithm, and these are described
below. below.
</P> </P>
@ -96,22 +100,28 @@ traditional finite state machine (it keeps multiple states active
simultaneously). simultaneously).
</P> </P>
<P> <P>
Although the general principle of this matching algorithm is that it scans the
subject string only once, without backtracking, there is one exception: when a
lookaround assertion is encountered, the characters following or preceding the
current point have to be independently inspected.
</P>
<P>
The scan continues until either the end of the subject is reached, or there are The scan continues until either the end of the subject is reached, or there are
no more unterminated paths. At this point, terminated paths represent the no more unterminated paths. At this point, terminated paths represent the
different matching possibilities (if there are none, the match has failed). different matching possibilities (if there are none, the match has failed).
Thus, if there is more than one possible match, this algorithm finds all of Thus, if there is more than one possible match, this algorithm finds all of
them, and in particular, it finds the longest. In PCRE, there is an option to them, and in particular, it finds the longest. The matches are returned in
stop the algorithm after the first match (which is necessarily the shortest) decreasing order of length. There is an option to stop the algorithm after the
has been found. first match (which is necessarily the shortest) is found.
</P> </P>
<P> <P>
Note that all the matches that are found start at the same point in the Note that all the matches that are found start at the same point in the
subject. If the pattern subject. If the pattern
<pre> <pre>
cat(er(pillar)?) cat(er(pillar)?)?
</pre> </pre>
is matched against the string "the caterpillar catchment", the result will be is matched against the string "the caterpillar catchment", the result will be
the three strings "cat", "cater", and "caterpillar" that start at the fourth the three strings "caterpillar", "cater", and "cat" that start at the fifth
character of the subject. The algorithm does not automatically move on to find character of the subject. The algorithm does not automatically move on to find
matches that start at later positions. matches that start at later positions.
</P> </P>
@ -157,10 +167,10 @@ and not on others), is not supported. It causes an error if encountered.
always 1, and the value of the <i>capture_last</i> field is always -1. always 1, and the value of the <i>capture_last</i> field is always -1.
</P> </P>
<P> <P>
7. The \C escape sequence, which (in the standard algorithm) matches a single 7. The \C escape sequence, which (in the standard algorithm) always matches a
byte, even in UTF-8 mode, is not supported because the alternative algorithm single data unit, even in UTF-8, UTF-16 or UTF-32 modes, is not supported in
moves through the subject string one character at a time, for all active paths these modes, because the alternative algorithm moves through the subject string
through the tree. one character (not data unit) at a time, for all active paths through the tree.
</P> </P>
<P> <P>
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not 8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
@ -177,16 +187,15 @@ match using the standard algorithm, you have to do kludgy things with
callouts. callouts.
</P> </P>
<P> <P>
2. There is much better support for partial matching. The restrictions on the 2. Because the alternative algorithm scans the subject string just once, and
content of the pattern that apply when using the standard algorithm for partial never needs to backtrack (except for lookbehinds), it is possible to pass very
matching do not apply to the alternative algorithm. For non-anchored patterns, long subject strings to the matching function in several pieces, checking for
the starting position of a partial match is available. partial matching each time. Although it is possible to do multi-segment
</P> matching using the standard algorithm by retaining partially matched
<P> substrings, it is more complicated. The
3. Because the alternative algorithm scans the subject string just once, and <a href="pcrepartial.html"><b>pcrepartial</b></a>
never needs to backtrack, it is possible to pass very long subject strings to documentation gives details of partial matching and discusses multi-segment
the matching function in several pieces, checking for partial matching each matching.
time.
</P> </P>
<br><a name="SEC6" href="#TOC1">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br> <br><a name="SEC6" href="#TOC1">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
<P> <P>
@ -215,9 +224,9 @@ Cambridge CB2 3QH, England.
</P> </P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br> <br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 19 April 2008 Last updated: 08 January 2012
<br> <br>
Copyright &copy; 1997-2008 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -14,19 +14,24 @@ man page, in case the conversion went wrong.
<br> <br>
<ul> <ul>
<li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE</a> <li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE</a>
<li><a name="TOC2" href="#SEC2">RESTRICTED PATTERNS FOR PCRE_PARTIAL</a> <li><a name="TOC2" href="#SEC2">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a>
<li><a name="TOC3" href="#SEC3">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a> <li><a name="TOC3" href="#SEC3">PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a>
<li><a name="TOC4" href="#SEC4">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a> <li><a name="TOC4" href="#SEC4">PARTIAL MATCHING AND WORD BOUNDARIES</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a> <li><a name="TOC5" href="#SEC5">FORMERLY RESTRICTED PATTERNS</a>
<li><a name="TOC6" href="#SEC6">REVISION</a> <li><a name="TOC6" href="#SEC6">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a>
<li><a name="TOC7" href="#SEC7">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a>
<li><a name="TOC8" href="#SEC8">MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()</a>
<li><a name="TOC9" href="#SEC9">ISSUES WITH MULTI-SEGMENT MATCHING</a>
<li><a name="TOC10" href="#SEC10">AUTHOR</a>
<li><a name="TOC11" href="#SEC11">REVISION</a>
</ul> </ul>
<br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE</a><br> <br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE</a><br>
<P> <P>
In normal use of PCRE, if the subject string that is passed to In normal use of PCRE, if the subject string that is passed to a matching
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> matches as far as it goes, but is function matches as far as it goes, but is too short to match the entire
too short to match the entire pattern, PCRE_ERROR_NOMATCH is returned. There pattern, PCRE_ERROR_NOMATCH is returned. There are circumstances where it might
are circumstances where it might be helpful to distinguish this case from other be helpful to distinguish this case from other cases in which there is no
cases in which there is no match. match.
</P> </P>
<P> <P>
Consider, for example, an application where a human is required to type in data Consider, for example, an application where a human is required to type in data
@ -37,78 +42,230 @@ in the form <i>ddmmmyy</i>, defined by this pattern:
</pre> </pre>
If the application sees the user's keystrokes one by one, and can check that If the application sees the user's keystrokes one by one, and can check that
what has been typed so far is potentially valid, it is able to raise an error what has been typed so far is potentially valid, it is able to raise an error
as soon as a mistake is made, possibly beeping and not reflecting the as soon as a mistake is made, by beeping and not reflecting the character that
character that has been typed. This immediate feedback is likely to be a better has been typed, for example. This immediate feedback is likely to be a better
user interface than a check that is delayed until the entire string has been user interface than a check that is delayed until the entire string has been
entered. entered. Partial matching can also be useful when the subject string is very
long and is not all available at once.
</P> </P>
<P> <P>
PCRE supports the concept of partial matching by means of the PCRE_PARTIAL PCRE supports partial matching by means of the PCRE_PARTIAL_SOFT and
option, which can be set when calling <b>pcre_exec()</b> or PCRE_PARTIAL_HARD options, which can be set when calling any of the matching
<b>pcre_dfa_exec()</b>. When this flag is set for <b>pcre_exec()</b>, the return functions. For backwards compatibility, PCRE_PARTIAL is a synonym for
code PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if at any time PCRE_PARTIAL_SOFT. The essential difference between the two options is whether
during the matching process the last part of the subject string matched part of or not a partial match is preferred to an alternative complete match, though
the pattern. Unfortunately, for non-anchored matching, it is not possible to the details differ between the two types of matching function. If both options
obtain the position of the start of the partial match. No captured data is set are set, PCRE_PARTIAL_HARD takes precedence.
when PCRE_ERROR_PARTIAL is returned.
</P> </P>
<P> <P>
When PCRE_PARTIAL is set for <b>pcre_dfa_exec()</b>, the return code If you want to use partial matching with just-in-time optimized code, you must
PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if the end of the call <b>pcre_study()</b>, <b>pcre16_study()</b> or <b>pcre32_study()</b> with one
subject is reached, there have been no complete matches, but there is still at or both of these options:
least one matching possibility. The portion of the string that provided the
partial match is set as the first matching string.
</P>
<P>
Using PCRE_PARTIAL disables one of PCRE's optimizations. PCRE remembers the
last literal byte in a pattern, and abandons matching immediately if such a
byte is not present in the subject string. This optimization cannot be used
for a subject string that might match only partially.
</P>
<br><a name="SEC2" href="#TOC1">RESTRICTED PATTERNS FOR PCRE_PARTIAL</a><br>
<P>
Because of the way certain internal optimizations are implemented in the
<b>pcre_exec()</b> function, the PCRE_PARTIAL option cannot be used with all
patterns. These restrictions do not apply when <b>pcre_dfa_exec()</b> is used.
For <b>pcre_exec()</b>, repeated single characters such as
<pre> <pre>
a{2,4} PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
</pre> </pre>
and repeated single metasequences such as PCRE_STUDY_JIT_COMPILE should also be set if you are going to run non-partial
<pre> matches on the same pattern. If the appropriate JIT study mode has not been set
\d+ for a match, the interpretive matching code is used.
</pre>
are not permitted if the maximum number of occurrences is greater than one.
Optional items such as \d? (where the maximum is one) are permitted.
Quantifiers with any values are permitted after parentheses, so the invalid
examples above can be coded thus:
<pre>
(a){2,4}
(\d)+
</pre>
These constructions run more slowly, but for the kinds of application that are
envisaged for this facility, this is not felt to be a major restriction.
</P> </P>
<P> <P>
If PCRE_PARTIAL is set for a pattern that does not conform to the restrictions, Setting a partial matching option disables two of PCRE's standard
<b>pcre_exec()</b> returns the error code PCRE_ERROR_BADPARTIAL (-13). optimizations. PCRE remembers the last literal data unit in a pattern, and
You can use the PCRE_INFO_OKPARTIAL call to <b>pcre_fullinfo()</b> to find out abandons matching immediately if it is not present in the subject string. This
if a compiled pattern can be used for partial matching. optimization cannot be used for a subject string that might match only
partially. If the pattern was studied, PCRE knows the minimum length of a
matching string, and does not bother to run the matching function on shorter
strings. This optimization is also disabled for partial matching.
</P> </P>
<br><a name="SEC3" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a><br> <br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec() OR pcre[16|32]_exec()</a><br>
<P>
A partial match occurs during a call to <b>pcre_exec()</b> or
<b>pcre[16|32]_exec()</b> when the end of the subject string is reached successfully,
but matching cannot continue because more characters are needed. However, at
least one character in the subject must have been inspected. This character
need not form part of the final matched string; lookbehind assertions and the
\K escape sequence provide ways of inspecting characters before the start of a
matched substring. The requirement for inspecting at least one character exists
because an empty string can always be matched; without such a restriction there
would always be a partial match of an empty string at the end of the subject.
</P>
<P>
If there are at least two slots in the offsets vector when a partial match is
returned, the first slot is set to the offset of the earliest character that
was inspected. For convenience, the second offset points to the end of the
subject so that a substring can easily be identified.
</P>
<P>
For the majority of patterns, the first offset identifies the start of the
partially matched string. However, for patterns that contain lookbehind
assertions, or \K, or begin with \b or \B, earlier characters have been
inspected while carrying out the match. For example:
<pre>
/(?&#60;=abc)123/
</pre>
This pattern matches "123", but only if it is preceded by "abc". If the subject
string is "xyzabc12", the offsets after a partial match are for the substring
"abc12", because all these characters are needed if another match is tried
with extra characters added to the subject.
</P>
<P>
What happens when a partial match is identified depends on which of the two
partial matching options are set.
</P>
<br><b>
PCRE_PARTIAL_SOFT WITH pcre_exec() OR pcre[16|32]_exec()
</b><br>
<P>
If PCRE_PARTIAL_SOFT is set when <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b>
identifies a partial match, the partial match is remembered, but matching
continues as normal, and other alternatives in the pattern are tried. If no
complete match can be found, PCRE_ERROR_PARTIAL is returned instead of
PCRE_ERROR_NOMATCH.
</P>
<P>
This option is "soft" because it prefers a complete match over a partial match.
All the various matching items in a pattern behave as if the subject string is
potentially complete. For example, \z, \Z, and $ match at the end of the
subject, as normal, and for \b and \B the end of the subject is treated as a
non-alphanumeric.
</P>
<P>
If there is more than one partial match, the first one that was found provides
the data that is returned. Consider this pattern:
<pre>
/123\w+X|dogY/
</pre>
If this is matched against the subject string "abc123dog", both
alternatives fail to match, but the end of the subject is reached during
matching, so PCRE_ERROR_PARTIAL is returned. The offsets are set to 3 and 9,
identifying "123dog" as the first partial match that was found. (In this
example, there are two partial matches, because "dog" on its own partially
matches the second alternative.)
</P>
<br><b>
PCRE_PARTIAL_HARD WITH pcre_exec() OR pcre[16|32]_exec()
</b><br>
<P>
If PCRE_PARTIAL_HARD is set for <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b>,
PCRE_ERROR_PARTIAL is returned as soon as a partial match is found, without
continuing to search for possible complete matches. This option is "hard"
because it prefers an earlier partial match over a later complete match. For
this reason, the assumption is made that the end of the supplied subject string
may not be the true end of the available data, and so, if \z, \Z, \b, \B,
or $ are encountered at the end of the subject, the result is
PCRE_ERROR_PARTIAL, provided that at least one character in the subject has
been inspected.
</P>
<P>
Setting PCRE_PARTIAL_HARD also affects the way UTF-8 and UTF-16
subject strings are checked for validity. Normally, an invalid sequence
causes the error PCRE_ERROR_BADUTF8 or PCRE_ERROR_BADUTF16. However, in the
special case of a truncated character at the end of the subject,
PCRE_ERROR_SHORTUTF8 or PCRE_ERROR_SHORTUTF16 is returned when
PCRE_PARTIAL_HARD is set.
</P>
<br><b>
Comparing hard and soft partial matching
</b><br>
<P>
The difference between the two partial matching options can be illustrated by a
pattern such as:
<pre>
/dog(sbody)?/
</pre>
This matches either "dog" or "dogsbody", greedily (that is, it prefers the
longer string if possible). If it is matched against the string "dog" with
PCRE_PARTIAL_SOFT, it yields a complete match for "dog". However, if
PCRE_PARTIAL_HARD is set, the result is PCRE_ERROR_PARTIAL. On the other hand,
if the pattern is made ungreedy the result is different:
<pre>
/dog(sbody)??/
</pre>
In this case the result is always a complete match because that is found first,
and matching never continues after finding a complete match. It might be easier
to follow this explanation by thinking of the two patterns like this:
<pre>
/dog(sbody)?/ is the same as /dogsbody|dog/
/dog(sbody)??/ is the same as /dog|dogsbody/
</pre>
The second pattern will never match "dogsbody", because it will always find the
shorter match first.
</P>
<br><a name="SEC3" href="#TOC1">PARTIAL MATCHING USING pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a><br>
<P>
The DFA functions move along the subject string character by character, without
backtracking, searching for all possible matches simultaneously. If the end of
the subject is reached before the end of the pattern, there is the possibility
of a partial match, again provided that at least one character has been
inspected.
</P>
<P>
When PCRE_PARTIAL_SOFT is set, PCRE_ERROR_PARTIAL is returned only if there
have been no complete matches. Otherwise, the complete matches are returned.
However, if PCRE_PARTIAL_HARD is set, a partial match takes precedence over any
complete matches. The portion of the string that was inspected when the longest
partial match was found is set as the first matching string, provided there are
at least two slots in the offsets vector.
</P>
<P>
Because the DFA functions always search for all possible matches, and there is
no difference between greedy and ungreedy repetition, their behaviour is
different from the standard functions when PCRE_PARTIAL_HARD is set. Consider
the string "dog" matched against the ungreedy pattern shown above:
<pre>
/dog(sbody)??/
</pre>
Whereas the standard functions stop as soon as they find the complete match for
"dog", the DFA functions also find the partial match for "dogsbody", and so
return that when PCRE_PARTIAL_HARD is set.
</P>
<br><a name="SEC4" href="#TOC1">PARTIAL MATCHING AND WORD BOUNDARIES</a><br>
<P>
If a pattern ends with one of sequences \b or \B, which test for word
boundaries, partial matching with PCRE_PARTIAL_SOFT can give counter-intuitive
results. Consider this pattern:
<pre>
/\bcat\b/
</pre>
This matches "cat", provided there is a word boundary at either end. If the
subject string is "the cat", the comparison of the final "t" with a following
character cannot take place, so a partial match is found. However, normal
matching carries on, and \b matches at the end of the subject when the last
character is a letter, so a complete match is found. The result, therefore, is
<i>not</i> PCRE_ERROR_PARTIAL. Using PCRE_PARTIAL_HARD in this case does yield
PCRE_ERROR_PARTIAL, because then the partial match takes precedence.
</P>
<br><a name="SEC5" href="#TOC1">FORMERLY RESTRICTED PATTERNS</a><br>
<P>
For releases of PCRE prior to 8.00, because of the way certain internal
optimizations were implemented in the <b>pcre_exec()</b> function, the
PCRE_PARTIAL option (predecessor of PCRE_PARTIAL_SOFT) could not be used with
all patterns. From release 8.00 onwards, the restrictions no longer apply, and
partial matching with can be requested for any pattern.
</P>
<P>
Items that were formerly restricted were repeated single characters and
repeated metasequences. If PCRE_PARTIAL was set for a pattern that did not
conform to the restrictions, <b>pcre_exec()</b> returned the error code
PCRE_ERROR_BADPARTIAL (-13). This error code is no longer in use. The
PCRE_INFO_OKPARTIAL call to <b>pcre_fullinfo()</b> to find out if a compiled
pattern can be used for partial matching now always returns 1.
</P>
<br><a name="SEC6" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a><br>
<P> <P>
If the escape sequence \P is present in a <b>pcretest</b> data line, the If the escape sequence \P is present in a <b>pcretest</b> data line, the
PCRE_PARTIAL flag is used for the match. Here is a run of <b>pcretest</b> that PCRE_PARTIAL_SOFT option is used for the match. Here is a run of <b>pcretest</b>
uses the date example quoted above: that uses the date example quoted above:
<pre> <pre>
re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
data&#62; 25jun04\P data&#62; 25jun04\P
0: 25jun04 0: 25jun04
1: jun 1: jun
data&#62; 25dec3\P data&#62; 25dec3\P
Partial match Partial match: 23dec3
data&#62; 3ju\P data&#62; 3ju\P
Partial match Partial match: 3ju
data&#62; 3juj\P data&#62; 3juj\P
No match No match
data&#62; j\P data&#62; j\P
@ -116,34 +273,22 @@ uses the date example quoted above:
</pre> </pre>
The first data string is matched completely, so <b>pcretest</b> shows the The first data string is matched completely, so <b>pcretest</b> shows the
matched substrings. The remaining four strings do not match the complete matched substrings. The remaining four strings do not match the complete
pattern, but the first two are partial matches. The same test, using pattern, but the first two are partial matches. Similar output is obtained
<b>pcre_dfa_exec()</b> matching (by means of the \D escape sequence), produces if DFA matching is used.
the following output:
<pre>
re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
data&#62; 25jun04\P\D
0: 25jun04
data&#62; 23dec3\P\D
Partial match: 23dec3
data&#62; 3ju\P\D
Partial match: 3ju
data&#62; 3juj\P\D
No match
data&#62; j\P\D
No match
</pre>
Notice that in this case the portion of the string that was matched is made
available.
</P> </P>
<br><a name="SEC4" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a><br>
<P> <P>
When a partial match has been found using <b>pcre_dfa_exec()</b>, it is possible If the escape sequence \P is present more than once in a <b>pcretest</b> data
to continue the match by providing additional subject data and calling line, the PCRE_PARTIAL_HARD option is set for the match.
<b>pcre_dfa_exec()</b> again with the same compiled regular expression, this </P>
time setting the PCRE_DFA_RESTART option. You must also pass the same working <br><a name="SEC7" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec() OR pcre[16|32]_dfa_exec()</a><br>
space as before, because this is where details of the previous partial match <P>
are stored. Here is an example using <b>pcretest</b>, using the \R escape When a partial match has been found using a DFA matching function, it is
sequence to set the PCRE_DFA_RESTART option (\P and \D are as above): possible to continue the match by providing additional subject data and calling
the function again with the same compiled regular expression, this time setting
the PCRE_DFA_RESTART option. You must pass the same working space as before,
because this is where details of the previous partial match are stored. Here is
an example using <b>pcretest</b>, using the \R escape sequence to set the
PCRE_DFA_RESTART option (\D specifies the use of the DFA matching function):
<pre> <pre>
re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/ re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
data&#62; 23ja\P\D data&#62; 23ja\P\D
@ -158,33 +303,94 @@ not retain the previously partially-matched string. It is up to the calling
program to do that if it needs to. program to do that if it needs to.
</P> </P>
<P> <P>
You can set PCRE_PARTIAL with PCRE_DFA_RESTART to continue partial matching You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
over multiple segments. This facility can be used to pass very long subject PCRE_DFA_RESTART to continue partial matching over multiple segments. This
strings to <b>pcre_dfa_exec()</b>. However, some care is needed for certain facility can be used to pass very long subject strings to the DFA matching
types of pattern. functions.
</P>
<br><a name="SEC8" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_exec() OR pcre[16|32]_exec()</a><br>
<P>
From release 8.00, the standard matching functions can also be used to do
multi-segment matching. Unlike the DFA functions, it is not possible to
restart the previous match with a new segment of data. Instead, new data must
be added to the previous subject string, and the entire match re-run, starting
from the point where the partial match occurred. Earlier data can be discarded.
</P> </P>
<P> <P>
1. If the pattern contains tests for the beginning or end of a line, you need It is best to use PCRE_PARTIAL_HARD in this situation, because it does not
to pass the PCRE_NOTBOL or PCRE_NOTEOL options, as appropriate, when the treat the end of a segment as the end of the subject when matching \z, \Z,
subject string for any call does not contain the beginning or end of a line. \b, \B, and $. Consider an unanchored pattern that matches dates:
<pre>
re&#62; /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
data&#62; The date is 23ja\P\P
Partial match: 23ja
</pre>
At this stage, an application could discard the text preceding "23ja", add on
text from the next segment, and call the matching function again. Unlike the
DFA matching functions, the entire matching string must always be available,
and the complete matching process occurs for each call, so more memory and more
processing time is needed.
</P> </P>
<P> <P>
2. If the pattern contains backward assertions (including \b or \B), you need <b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
to arrange for some overlap in the subject strings to allow for this. For with \b or \B, the string that is returned for a partial match includes
example, you could pass the subject in chunks that are 500 bytes long, but in characters that precede the partially matched string itself, because these must
a buffer of 700 bytes, with the starting offset set to 200 and the previous 200 be retained when adding on more characters for a subsequent matching attempt.
bytes at the start of the buffer. However, in some cases you may need to retain even earlier characters, as
discussed in the next section.
</P>
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
<P>
Certain types of pattern may give problems with multi-segment matching,
whichever matching function is used.
</P> </P>
<P> <P>
3. Matching a subject string that is split into multiple segments does not 1. If the pattern contains a test for the beginning of a line, you need to pass
always produce exactly the same result as matching over one single long string. the PCRE_NOTBOL option when the subject string for any call does start at the
The difference arises when there are multiple matching possibilities, because a beginning of a line. There is also a PCRE_NOTEOL option, but in practice when
partial match result is given only when there are no completed matches in a doing multi-segment matching you should be using PCRE_PARTIAL_HARD, which
call to <b>pcre_dfa_exec()</b>. This means that as soon as the shortest match has includes the effect of PCRE_NOTEOL.
been found, continuation to a new subject segment is no longer possible. </P>
Consider this <b>pcretest</b> example: <P>
2. Lookbehind assertions that have already been obeyed are catered for in the
offsets that are returned for a partial match. However a lookbehind assertion
later in the pattern could require even earlier characters to be inspected. You
can handle this case by using the PCRE_INFO_MAXLOOKBEHIND option of the
<b>pcre_fullinfo()</b> or <b>pcre[16|32]_fullinfo()</b> functions to obtain the length
of the largest lookbehind in the pattern. This length is given in characters,
not bytes. If you always retain at least that many characters before the
partially matched string, all should be well. (Of course, near the start of the
subject, fewer characters may be present; in that case all characters should be
retained.)
</P>
<P>
3. Because a partial match must always contain at least one character, what
might be considered a partial match of an empty string actually gives a "no
match" result. For example:
<pre>
re&#62; /c(?&#60;=abc)x/
data&#62; ab\P
No match
</pre>
If the next segment begins "cx", a match should be found, but this will only
happen if characters from the previous segment are retained. For this reason, a
"no match" result should be interpreted as "partial match of an empty string"
when the pattern contains lookbehinds.
</P>
<P>
4. Matching a subject string that is split into multiple segments may not
always produce exactly the same result as matching over one single long string,
especially when PCRE_PARTIAL_SOFT is used. The section "Partial Matching and
Word Boundaries" above describes an issue that arises if the pattern ends with
\b or \B. Another kind of difference may occur when there are multiple
matching possibilities, because (for PCRE_PARTIAL_SOFT) a partial match result
is given only when there are no completed matches. This means that as soon as
the shortest match has been found, continuation to a new subject segment is no
longer possible. Consider again this <b>pcretest</b> example:
<pre> <pre>
re&#62; /dog(sbody)?/ re&#62; /dog(sbody)?/
data&#62; dogsb\P
0: dog
data&#62; do\P\D data&#62; do\P\D
Partial match: do Partial match: do
data&#62; gsb\R\P\D data&#62; gsb\R\P\D
@ -193,26 +399,37 @@ Consider this <b>pcretest</b> example:
0: dogsbody 0: dogsbody
1: dog 1: dog
</pre> </pre>
The pattern matches the words "dog" or "dogsbody". When the subject is The first data line passes the string "dogsb" to a standard matching function,
presented in several parts ("do" and "gsb" being the first two) the match stops setting the PCRE_PARTIAL_SOFT option. Although the string is a partial match
when "dog" has been found, and it is not possible to continue. On the other for "dogsbody", the result is not PCRE_ERROR_PARTIAL, because the shorter
hand, if "dogsbody" is presented as a single string, both matches are found. string "dog" is a complete match. Similarly, when the subject is presented to
a DFA matching function in several parts ("do" and "gsb" being the first two)
the match stops when "dog" has been found, and it is not possible to continue.
On the other hand, if "dogsbody" is presented as a single string, a DFA
matching function finds both matches.
</P> </P>
<P> <P>
Because of this phenomenon, it does not usually make sense to end a pattern Because of these problems, it is best to use PCRE_PARTIAL_HARD when matching
that is going to be matched in this way with a variable repeat. multi-segment data. The example above then behaves differently:
</P> <pre>
<P> re&#62; /dog(sbody)?/
4. Patterns that contain alternatives at the top level which do not all data&#62; dogsb\P\P
start with the same pattern item may not work as expected. For example, Partial match: dogsb
consider this pattern: data&#62; do\P\D
Partial match: do
data&#62; gsb\R\P\P\D
Partial match: gsb
</pre>
5. Patterns that contain alternatives at the top level which do not all start
with the same pattern item may not work as expected when PCRE_DFA_RESTART is
used. For example, consider this pattern:
<pre> <pre>
1234|3789 1234|3789
</pre> </pre>
If the first part of the subject is "ABC123", a partial match of the first If the first part of the subject is "ABC123", a partial match of the first
alternative is found at offset 3. There is no partial match for the second alternative is found at offset 3. There is no partial match for the second
alternative, because such a match does not start at the same point in the alternative, because such a match does not start at the same point in the
subject string. Attempting to continue with the string "789" does not yield a subject string. Attempting to continue with the string "7890" does not yield a
match because only those alternatives that match at one point in the subject match because only those alternatives that match at one point in the subject
are remembered. The problem arises because the start of the second alternative are remembered. The problem arises because the start of the second alternative
matches within the first alternative. There is no problem with anchored matches within the first alternative. There is no problem with anchored
@ -220,9 +437,24 @@ patterns or patterns such as:
<pre> <pre>
1234|ABCD 1234|ABCD
</pre> </pre>
where no string can be a partial match for both alternatives. where no string can be a partial match for both alternatives. This is not a
problem if a standard matching function is used, because the entire match has
to be rerun each time:
<pre>
re&#62; /1234|3789/
data&#62; ABC123\P\P
Partial match: 123
data&#62; 1237890
0: 3789
</pre>
Of course, instead of using PCRE_DFA_RESTART, the same technique of re-running
the entire match can also be used with the DFA matching functions. Another
possibility is to work with two buffers. If a partial match at offset <i>n</i>
in the first buffer is followed by "no match" when PCRE_DFA_RESTART is used on
the second buffer, you can then try a new match starting at offset <i>n+1</i> in
the first buffer.
</P> </P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br> <br><a name="SEC10" href="#TOC1">AUTHOR</a><br>
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
@ -231,11 +463,11 @@ University Computing Service
Cambridge CB2 3QH, England. Cambridge CB2 3QH, England.
<br> <br>
</P> </P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br> <br><a name="SEC11" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 04 June 2007 Last updated: 24 June 2012
<br> <br>
Copyright &copy; 1997-2007 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

File diff suppressed because it is too large Load Diff

View File

@ -21,14 +21,15 @@ time. The way you express your pattern as a regular expression can affect both
of them. of them.
</P> </P>
<br><b> <br><b>
MEMORY USAGE COMPILED PATTERN MEMORY USAGE
</b><br> </b><br>
<P> <P>
Patterns are compiled by PCRE into a reasonably efficient byte code, so that Patterns are compiled by PCRE into a reasonably efficient interpretive code, so
most simple patterns do not use much memory. However, there is one case where that most simple patterns do not use much memory. However, there is one case
memory usage can be unexpectedly large. When a parenthesized subpattern has a where the memory usage of a compiled pattern can be unexpectedly large. If a
quantifier with a minimum greater than 1 and/or a limited maximum, the whole parenthesized subpattern has a quantifier with a minimum greater than 1 and/or
subpattern is repeated in the compiled code. For example, the pattern a limited maximum, the whole subpattern is repeated in the compiled code. For
example, the pattern
<pre> <pre>
(abc|def){2,4} (abc|def){2,4}
</pre> </pre>
@ -47,12 +48,12 @@ example, the very simple pattern
<pre> <pre>
((ab){1,1000}c){1,3} ((ab){1,1000}c){1,3}
</pre> </pre>
uses 51K bytes when compiled. When PCRE is compiled with its default internal uses 51K bytes when compiled using the 8-bit library. When PCRE is compiled
pointer size of two bytes, the size limit on a compiled pattern is 64K, and with its default internal pointer size of two bytes, the size limit on a
this is reached with the above pattern if the outer repetition is increased compiled pattern is 64K data units, and this is reached with the above pattern
from 3 to 4. PCRE can be compiled to use larger internal pointers and thus if the outer repetition is increased from 3 to 4. PCRE can be compiled to use
handle larger compiled patterns, but it is better to try to rewrite your larger internal pointers and thus handle larger compiled patterns, but it is
pattern to use less memory if you can. better to try to rewrite your pattern to use less memory if you can.
</P> </P>
<P> <P>
One way of reducing the memory usage for such patterns is to make use of PCRE's One way of reducing the memory usage for such patterns is to make use of PCRE's
@ -73,6 +74,18 @@ speed is acceptable, this kind of rewriting will allow you to process patterns
that PCRE cannot otherwise handle. that PCRE cannot otherwise handle.
</P> </P>
<br><b> <br><b>
STACK USAGE AT RUN TIME
</b><br>
<P>
When <b>pcre_exec()</b> or <b>pcre[16|32]_exec()</b> is used for matching, certain
kinds of pattern can cause it to use large amounts of the process stack. In
some environments the default process stack is quite small, and if it runs out
the result is often SIGSEGV. This issue is probably the most frequently raised
problem with PCRE. Rewriting your pattern can often help. The
<a href="pcrestack.html"><b>pcrestack</b></a>
documentation discusses this issue in detail.
</P>
<br><b>
PROCESSING TIME PROCESSING TIME
</b><br> </b><br>
<P> <P>
@ -86,10 +99,19 @@ contains a few observations about PCRE.
</P> </P>
<P> <P>
Using Unicode character properties (the \p, \P, and \X escapes) is slow, Using Unicode character properties (the \p, \P, and \X escapes) is slow,
because PCRE has to scan a structure that contains data for over fifteen because PCRE has to use a multi-stage table lookup whenever it needs a
thousand characters whenever it needs a character's property. If you can find character's property. If you can find an alternative pattern that does not use
an alternative pattern that does not use character properties, it will probably character properties, it will probably be faster.
be faster. </P>
<P>
By default, the escape sequences \b, \d, \s, and \w, and the POSIX
character classes such as [:alpha:] do not use Unicode properties, partly for
backwards compatibility, and partly for performance reasons. However, you can
set PCRE_UCP if you want Unicode character properties to be used. This can
double the matching time for items such as \d, when matched with
a traditional matching function; the performance loss is less with
a DFA matching function, and in both cases there is not much difference for
\b.
</P> </P>
<P> <P>
When a pattern begins with .* not in parentheses, or in parentheses that are When a pattern begins with .* not in parentheses, or in parentheses that are
@ -164,9 +186,9 @@ Cambridge CB2 3QH, England.
REVISION REVISION
</b><br> </b><br>
<P> <P>
Last updated: 06 March 2007 Last updated: 25 August 2012
<br> <br>
Copyright &copy; 1997-2007 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -44,11 +44,12 @@ man page, in case the conversion went wrong.
</P> </P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br> <br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P> <P>
This set of functions provides a POSIX-style API to the PCRE regular expression This set of functions provides a POSIX-style API for the PCRE regular
package. See the expression 8-bit library. See the
<a href="pcreapi.html"><b>pcreapi</b></a> <a href="pcreapi.html"><b>pcreapi</b></a>
documentation for a description of PCRE's native API, which contains much documentation for a description of PCRE's native API, which contains much
additional functionality. additional functionality. There is no POSIX-style wrapper for PCRE's 16-bit
and 32-bit library.
</P> </P>
<P> <P>
The functions described here are just wrapper functions that ultimately call The functions described here are just wrapper functions that ultimately call
@ -66,6 +67,11 @@ POSIX interface often use it, this makes it easier to slot in PCRE as a
replacement library. Other POSIX options are not even defined. replacement library. Other POSIX options are not even defined.
</P> </P>
<P> <P>
There are also some other options that are not defined by POSIX. These have
been added at the request of users who want to make use of certain
PCRE-specific features via the POSIX calling interface.
</P>
<P>
When PCRE is called via these functions, it is only the API that is POSIX-like When PCRE is called via these functions, it is only the API that is POSIX-like
in style. The syntax and semantics of the regular expressions themselves are in style. The syntax and semantics of the regular expressions themselves are
still those of Perl, subject to the setting of various PCRE options, as still those of Perl, subject to the setting of various PCRE options, as
@ -82,8 +88,6 @@ structure types, <i>regex_t</i> for compiled internal forms, and
constants whose names start with "REG_"; these are used for setting options and constants whose names start with "REG_"; these are used for setting options and
identifying error codes. identifying error codes.
</P> </P>
<P>
</P>
<br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br> <br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br>
<P> <P>
The function <b>regcomp()</b> is called to compile a pattern into an The function <b>regcomp()</b> is called to compile a pattern into an
@ -120,6 +124,19 @@ for compilation to the native function. In addition, when a pattern that is
compiled with this flag is passed to <b>regexec()</b> for matching, the compiled with this flag is passed to <b>regexec()</b> for matching, the
<i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no captured strings <i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no captured strings
are returned. are returned.
<pre>
REG_UCP
</pre>
The PCRE_UCP option is set when the regular expression is passed for
compilation to the native function. This causes PCRE to use Unicode properties
when matchine \d, \w, etc., instead of just recognizing ASCII values. Note
that REG_UTF8 is not part of the POSIX standard.
<pre>
REG_UNGREEDY
</pre>
The PCRE_UNGREEDY option is set when the regular expression is passed for
compilation to the native function. Note that REG_UNGREEDY is not part of the
POSIX standard.
<pre> <pre>
REG_UTF8 REG_UTF8
</pre> </pre>
@ -134,7 +151,7 @@ This means the the regex is compiled with PCRE default semantics. In
particular, the way it handles newline characters in the subject string is the particular, the way it handles newline characters in the subject string is the
Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only
<i>some</i> of the effects specified for REG_NEWLINE. It does not affect the way <i>some</i> of the effects specified for REG_NEWLINE. It does not affect the way
newlines are matched by . (they aren't) or by a negative class such as [^a] newlines are matched by . (they are not) or by a negative class such as [^a]
(they are). (they are).
</P> </P>
<P> <P>
@ -143,6 +160,11 @@ The yield of <b>regcomp()</b> is zero on success, and non-zero otherwise. The
is public: <i>re_nsub</i> contains the number of capturing subpatterns in is public: <i>re_nsub</i> contains the number of capturing subpatterns in
the regular expression. Various error codes are defined in the header file. the regular expression. Various error codes are defined in the header file.
</P> </P>
<P>
NOTE: If the yield of <b>regcomp()</b> is non-zero, you must not attempt to
use the contents of the <i>preg</i> structure. If, for example, you pass it to
<b>regexec()</b>, the result is undefined and your program is likely to crash.
</P>
<br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br> <br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
<P> <P>
This area is not simple, because POSIX and Perl take different views of things. This area is not simple, because POSIX and Perl take different views of things.
@ -217,6 +239,10 @@ strings is returned. The <i>nmatch</i> and <i>pmatch</i> arguments of
<b>regexec()</b> are ignored. <b>regexec()</b> are ignored.
</P> </P>
<P> <P>
If the value of <i>nmatch</i> is zero, or if the value <i>pmatch</i> is NULL,
no data about any matched strings is returned.
</P>
<P>
Otherwise,the portion of the string that was matched, and also any captured Otherwise,the portion of the string that was matched, and also any captured
substrings, are returned via the <i>pmatch</i> argument, which points to an substrings, are returned via the <i>pmatch</i> argument, which points to an
array of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the array of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the
@ -257,9 +283,9 @@ Cambridge CB2 3QH, England.
</P> </P>
<br><a name="SEC9" href="#TOC1">REVISION</a><br> <br><a name="SEC9" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 11 March 2009 Last updated: 09 January 2012
<br> <br>
Copyright &copy; 1997-2009 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -28,24 +28,31 @@ instead of having to compile them every time the application is run.
If you are not using any private character tables (see the If you are not using any private character tables (see the
<a href="pcre_maketables.html"><b>pcre_maketables()</b></a> <a href="pcre_maketables.html"><b>pcre_maketables()</b></a>
documentation), this is relatively straightforward. If you are using private documentation), this is relatively straightforward. If you are using private
tables, it is a little bit more complicated. tables, it is a little bit more complicated. However, if you are using the
just-in-time optimization feature, it is not possible to save and reload the
JIT data.
</P> </P>
<P> <P>
If you save compiled patterns to a file, you can copy them to a different host If you save compiled patterns to a file, you can copy them to a different host
and run them there. This works even if the new host has the opposite endianness and run them there. If the two hosts have different endianness (byte order),
to the one on which the patterns were compiled. There may be a small you should run the <b>pcre[16|32]_pattern_to_host_byte_order()</b> function on the
performance penalty, but it should be insignificant. However, compiling regular new host before trying to match the pattern. The matching functions return
expressions with one version of PCRE for use with a different version is not PCRE_ERROR_BADENDIANNESS if they detect a pattern with the wrong endianness.
guaranteed to work and may cause crashes. </P>
<P>
Compiling regular expressions with one version of PCRE for use with a different
version is not guaranteed to work and may cause crashes, and saving and
restoring a compiled pattern loses any JIT optimization data.
</P> </P>
<br><a name="SEC2" href="#TOC1">SAVING A COMPILED PATTERN</a><br> <br><a name="SEC2" href="#TOC1">SAVING A COMPILED PATTERN</a><br>
<P> <P>
The value returned by <b>pcre_compile()</b> points to a single block of memory The value returned by <b>pcre[16|32]_compile()</b> points to a single block of
that holds the compiled pattern and associated data. You can find the length of memory that holds the compiled pattern and associated data. You can find the
this block in bytes by calling <b>pcre_fullinfo()</b> with an argument of length of this block in bytes by calling <b>pcre[16|32]_fullinfo()</b> with an
PCRE_INFO_SIZE. You can then save the data in any appropriate manner. Here is argument of PCRE_INFO_SIZE. You can then save the data in any appropriate
sample code that compiles a pattern and writes it to a file. It assumes that manner. Here is sample code for the 8-bit library that compiles a pattern and
the variable <i>fd</i> refers to a file that is open for output: writes it to a file. It assumes that the variable <i>fd</i> refers to a file
that is open for output:
<pre> <pre>
int erroroffset, rc, size; int erroroffset, rc, size;
char *error; char *error;
@ -76,33 +83,36 @@ some daemon process that passes them via sockets to the processes that want
them. them.
</P> </P>
<P> <P>
If the pattern has been studied, it is also possible to save the study data in If the pattern has been studied, it is also possible to save the normal study
a similar way to the compiled pattern itself. When studying generates data in a similar way to the compiled pattern itself. However, if the
additional information, <b>pcre_study()</b> returns a pointer to a PCRE_STUDY_JIT_COMPILE was used, the just-in-time data that is created cannot
<b>pcre_extra</b> data block. Its format is defined in the be saved because it is too dependent on the current environment. When studying
generates additional information, <b>pcre[16|32]_study()</b> returns a pointer to a
<b>pcre[16|32]_extra</b> data block. Its format is defined in the
<a href="pcreapi.html#extradata">section on matching a pattern</a> <a href="pcreapi.html#extradata">section on matching a pattern</a>
in the in the
<a href="pcreapi.html"><b>pcreapi</b></a> <a href="pcreapi.html"><b>pcreapi</b></a>
documentation. The <i>study_data</i> field points to the binary study data, and documentation. The <i>study_data</i> field points to the binary study data, and
this is what you must save (not the <b>pcre_extra</b> block itself). The length this is what you must save (not the <b>pcre[16|32]_extra</b> block itself). The
of the study data can be obtained by calling <b>pcre_fullinfo()</b> with an length of the study data can be obtained by calling <b>pcre[16|32]_fullinfo()</b>
argument of PCRE_INFO_STUDYSIZE. Remember to check that <b>pcre_study()</b> did with an argument of PCRE_INFO_STUDYSIZE. Remember to check that
return a non-NULL value before trying to save the study data. <b>pcre[16|32]_study()</b> did return a non-NULL value before trying to save the
study data.
</P> </P>
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br> <br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
<P> <P>
Re-using a precompiled pattern is straightforward. Having reloaded it into main Re-using a precompiled pattern is straightforward. Having reloaded it into main
memory, you pass its pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary,
the usual way. This should work even on another host, and even if that host has you pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
the opposite endianness to the one where the pattern was compiled. the usual way.
</P> </P>
<P> <P>
However, if you passed a pointer to custom character tables when the pattern However, if you passed a pointer to custom character tables when the pattern
was compiled (the <i>tableptr</i> argument of <b>pcre_compile()</b>), you must was compiled (the <i>tableptr</i> argument of <b>pcre[16|32]_compile()</b>), you
now pass a similar pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>, must now pass a similar pointer to <b>pcre[16|32]_exec()</b> or
because the value saved with the compiled pattern will obviously be nonsense. A <b>pcre[16|32]_dfa_exec()</b>, because the value saved with the compiled pattern
field in a <b>pcre_extra()</b> block is used to pass this data, as described in will obviously be nonsense. A field in a <b>pcre[16|32]_extra()</b> block is used
the to pass this data, as described in the
<a href="pcreapi.html#extradata">section on matching a pattern</a> <a href="pcreapi.html#extradata">section on matching a pattern</a>
in the in the
<a href="pcreapi.html"><b>pcreapi</b></a> <a href="pcreapi.html"><b>pcreapi</b></a>
@ -110,23 +120,23 @@ documentation.
</P> </P>
<P> <P>
If you did not provide custom character tables when the pattern was compiled, If you did not provide custom character tables when the pattern was compiled,
the pointer in the compiled pattern is NULL, which causes <b>pcre_exec()</b> to the pointer in the compiled pattern is NULL, which causes the matching
use PCRE's internal tables. Thus, you do not need to take any special action at functions to use PCRE's internal tables. Thus, you do not need to take any
run time in this case. special action at run time in this case.
</P> </P>
<P> <P>
If you saved study data with the compiled pattern, you need to create your own If you saved study data with the compiled pattern, you need to create your own
<b>pcre_extra</b> data block and set the <i>study_data</i> field to point to the <b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point to the
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
<i>flags</i> field to indicate that study data is present. Then pass the <i>flags</i> field to indicate that study data is present. Then pass the
<b>pcre_extra</b> block to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in the <b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
usual way. pattern was studied for just-in-time optimization, that data cannot be saved,
and so is lost by a save/restore cycle.
</P> </P>
<br><a name="SEC4" href="#TOC1">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a><br> <br><a name="SEC4" href="#TOC1">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a><br>
<P> <P>
In general, it is safest to recompile all saved patterns when you update to a In general, it is safest to recompile all saved patterns when you update to a
new PCRE release, though not all updates actually require this. Recompiling is new PCRE release, though not all updates actually require this.
definitely needed for release 7.2.
</P> </P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br> <br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P> <P>
@ -139,9 +149,9 @@ Cambridge CB2 3QH, England.
</P> </P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br> <br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 13 June 2007 Last updated: 24 June 2012
<br> <br>
Copyright &copy; 1997-2007 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -17,14 +17,19 @@ PCRE SAMPLE PROGRAM
</b><br> </b><br>
<P> <P>
A simple, complete demonstration program, to get you started with using PCRE, A simple, complete demonstration program, to get you started with using PCRE,
is supplied in the file <i>pcredemo.c</i> in the PCRE distribution. is supplied in the file <i>pcredemo.c</i> in the PCRE distribution. A listing of
this program is given in the
<a href="pcredemo.html"><b>pcredemo</b></a>
documentation. If you do not have a copy of the PCRE distribution, you can save
this listing to re-create <i>pcredemo.c</i>.
</P> </P>
<P> <P>
The program compiles the regular expression that is its first argument, and The demonstration program, which uses the original PCRE 8-bit library, compiles
matches it against the subject string in its second argument. No PCRE options the regular expression that is its first argument, and matches it against the
are set, and default character tables are used. If matching succeeds, the subject string in its second argument. No PCRE options are set, and default
program outputs the portion of the subject that matched, together with the character tables are used. If matching succeeds, the program outputs the
contents of any captured substrings. portion of the subject that matched, together with the contents of any captured
substrings.
</P> </P>
<P> <P>
If the -g option is given on the command line, the program then goes on to If the -g option is given on the command line, the program then goes on to
@ -34,8 +39,8 @@ an empty string. Comments in the code explain what is going on.
</P> </P>
<P> <P>
If PCRE is installed in the standard include and library directories for your If PCRE is installed in the standard include and library directories for your
system, you should be able to compile the demonstration program using this operating system, you should be able to compile the demonstration program using
command: this command:
<pre> <pre>
gcc -o pcredemo pcredemo.c -lpcre gcc -o pcredemo pcredemo.c -lpcre
</pre> </pre>
@ -46,22 +51,31 @@ like this:
<pre> <pre>
gcc -o pcredemo -I/usr/local/include pcredemo.c -L/usr/local/lib -lpcre gcc -o pcredemo -I/usr/local/include pcredemo.c -L/usr/local/lib -lpcre
</pre> </pre>
Once you have compiled the demonstration program, you can run simple tests like In a Windows environment, if you want to statically link the program against a
this: non-dll <b>pcre.a</b> file, you must uncomment the line that defines PCRE_STATIC
before including <b>pcre.h</b>, because otherwise the <b>pcre_malloc()</b> and
<b>pcre_free()</b> exported functions will be declared
<b>__declspec(dllimport)</b>, with unwanted results.
</P>
<P>
Once you have compiled and linked the demonstration program, you can run simple
tests like this:
<pre> <pre>
./pcredemo 'cat|dog' 'the cat sat on the mat' ./pcredemo 'cat|dog' 'the cat sat on the mat'
./pcredemo -g 'cat|dog' 'the dog sat on the cat' ./pcredemo -g 'cat|dog' 'the dog sat on the cat'
</pre> </pre>
Note that there is a much more comprehensive test program, called Note that there is a much more comprehensive test program, called
<a href="pcretest.html"><b>pcretest</b>,</a> <a href="pcretest.html"><b>pcretest</b>,</a>
which supports many more facilities for testing regular expressions and the which supports many more facilities for testing regular expressions and both
PCRE library. The <b>pcredemo</b> program is provided as a simple coding PCRE libraries. The
example. <a href="pcredemo.html"><b>pcredemo</b></a>
program is provided as a simple coding example.
</P> </P>
<P> <P>
On some operating systems (e.g. Solaris), when PCRE is not installed in the If you try to run
standard library directory, you may get an error like this when you try to run <a href="pcredemo.html"><b>pcredemo</b></a>
<b>pcredemo</b>: when PCRE is not installed in the standard library directory, you may get an
error like this on some operating systems (e.g. Solaris):
<pre> <pre>
ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory
</pre> </pre>
@ -87,9 +101,9 @@ Cambridge CB2 3QH, England.
REVISION REVISION
</b><br> </b><br>
<P> <P>
Last updated: 23 January 2008 Last updated: 10 January 2012
<br> <br>
Copyright &copy; 1997-2008 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -16,11 +16,14 @@ man page, in case the conversion went wrong.
PCRE DISCUSSION OF STACK USAGE PCRE DISCUSSION OF STACK USAGE
</b><br> </b><br>
<P> <P>
When you call <b>pcre_exec()</b>, it makes use of an internal function called When you call <b>pcre[16|32]_exec()</b>, it makes use of an internal function
<b>match()</b>. This calls itself recursively at branch points in the pattern, called <b>match()</b>. This calls itself recursively at branch points in the
in order to remember the state of the match so that it can back up and try a pattern, in order to remember the state of the match so that it can back up and
different alternative if the first one fails. As matching proceeds deeper and try a different alternative if the first one fails. As matching proceeds deeper
deeper into the tree of possibilities, the recursion depth increases. and deeper into the tree of possibilities, the recursion depth increases. The
<b>match()</b> function is also called in other circumstances, for example,
whenever a parenthesized sub-pattern is entered, and in certain cases of
repetition.
</P> </P>
<P> <P>
Not all calls of <b>match()</b> increase the recursion depth; for an item such Not all calls of <b>match()</b> increase the recursion depth; for an item such
@ -30,21 +33,34 @@ the recursive call would immediately be passed back as the result of the
current call (a "tail recursion"), the function is just restarted instead. current call (a "tail recursion"), the function is just restarted instead.
</P> </P>
<P> <P>
The <b>pcre_dfa_exec()</b> function operates in an entirely different way, and The above comments apply when <b>pcre[16|32]_exec()</b> is run in its normal
hardly uses recursion at all. The limit on its complexity is the amount of interpretive manner. If the pattern was studied with the
workspace it is given. The comments that follow do NOT apply to PCRE_STUDY_JIT_COMPILE option, and just-in-time compiling was successful, and
<b>pcre_dfa_exec()</b>; they are relevant only for <b>pcre_exec()</b>. the options passed to <b>pcre[16|32]_exec()</b> were not incompatible, the matching
process uses the JIT-compiled code instead of the <b>match()</b> function. In
this case, the memory requirements are handled entirely differently. See the
<a href="pcrejit.html"><b>pcrejit</b></a>
documentation for details.
</P> </P>
<P> <P>
You can set limits on the number of times that <b>match()</b> is called, both in The <b>pcre[16|32]_dfa_exec()</b> function operates in an entirely different way,
total and recursively. If the limit is exceeded, an error occurs. For details, and uses recursion only when there is a regular expression recursion or
see the subroutine call in the pattern. This includes the processing of assertion and
<a href="pcreapi.html#extradata">section on extra data for <b>pcre_exec()</b></a> "once-only" subpatterns, which are handled like subroutine calls. Normally,
in the these are never very deep, and the limit on the complexity of
<a href="pcreapi.html"><b>pcreapi</b></a> <b>pcre[16|32]_dfa_exec()</b> is controlled by the amount of workspace it is given.
documentation. However, it is possible to write patterns with runaway infinite recursions;
such patterns will cause <b>pcre[16|32]_dfa_exec()</b> to run out of stack. At
present, there is no protection against this.
</P> </P>
<P> <P>
The comments that follow do NOT apply to <b>pcre[16|32]_dfa_exec()</b>; they are
relevant only for <b>pcre[16|32]_exec()</b> without the JIT optimization.
</P>
<br><b>
Reducing <b>pcre[16|32]_exec()</b>'s stack usage
</b><br>
<P>
Each time that <b>match()</b> is actually called recursively, it uses memory Each time that <b>match()</b> is actually called recursively, it uses memory
from the process stack. For certain kinds of pattern and data, very large from the process stack. For certain kinds of pattern and data, very large
amounts of stack may be needed, despite the recognition of "tail recursion". amounts of stack may be needed, despite the recognition of "tail recursion".
@ -78,42 +94,79 @@ subject strings is to write repeated parenthesized subpatterns to match more
than one character whenever possible. than one character whenever possible.
</P> </P>
<br><b> <br><b>
Compiling PCRE to use heap instead of stack Compiling PCRE to use heap instead of stack for <b>pcre[16|32]_exec()</b>
</b><br> </b><br>
<P> <P>
In environments where stack memory is constrained, you might want to compile In environments where stack memory is constrained, you might want to compile
PCRE to use heap memory instead of stack for remembering back-up points. This PCRE to use heap memory instead of stack for remembering back-up points when
makes it run a lot more slowly, however. Details of how to do this are given in <b>pcre[16|32]_exec()</b> is running. This makes it run a lot more slowly, however.
the Details of how to do this are given in the
<a href="pcrebuild.html"><b>pcrebuild</b></a> <a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation. When built in this way, instead of using the stack, PCRE obtains documentation. When built in this way, instead of using the stack, PCRE obtains
and frees memory by calling the functions that are pointed to by the and frees memory by calling the functions that are pointed to by the
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables. By default, these <b>pcre[16|32]_stack_malloc</b> and <b>pcre[16|32]_stack_free</b> variables. By
point to <b>malloc()</b> and <b>free()</b>, but you can replace the pointers to default, these point to <b>malloc()</b> and <b>free()</b>, but you can replace
cause PCRE to use your own functions. Since the block sizes are always the the pointers to cause PCRE to use your own functions. Since the block sizes are
same, and are always freed in reverse order, it may be possible to implement always the same, and are always freed in reverse order, it may be possible to
customized memory handlers that are more efficient than the standard functions. implement customized memory handlers that are more efficient than the standard
functions.
</P> </P>
<br><b> <br><b>
Limiting PCRE's stack usage Limiting <b>pcre[16|32]_exec()</b>'s stack usage
</b><br> </b><br>
<P> <P>
PCRE has an internal counter that can be used to limit the depth of recursion, You can set limits on the number of times that <b>match()</b> is called, both in
and thus cause <b>pcre_exec()</b> to give an error code before it runs out of total and recursively. If a limit is exceeded, <b>pcre[16|32]_exec()</b> returns an
stack. By default, the limit is very large, and unlikely ever to operate. It error code. Setting suitable limits should prevent it from running out of
can be changed when PCRE is built, and it can also be set when stack. The default values of the limits are very large, and unlikely ever to
<b>pcre_exec()</b> is called. For details of these interfaces, see the operate. They can be changed when PCRE is built, and they can also be set when
<b>pcre[16|32]_exec()</b> is called. For details of these interfaces, see the
<a href="pcrebuild.html"><b>pcrebuild</b></a> <a href="pcrebuild.html"><b>pcrebuild</b></a>
and documentation and the
<a href="pcreapi.html#extradata">section on extra data for <b>pcre[16|32]_exec()</b></a>
in the
<a href="pcreapi.html"><b>pcreapi</b></a> <a href="pcreapi.html"><b>pcreapi</b></a>
documentation. documentation.
</P> </P>
<P> <P>
As a very rough rule of thumb, you should reckon on about 500 bytes per As a very rough rule of thumb, you should reckon on about 500 bytes per
recursion. Thus, if you want to limit your stack usage to 8Mb, you recursion. Thus, if you want to limit your stack usage to 8Mb, you should set
should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can the limit at 16000 recursions. A 64Mb stack, on the other hand, can support
support around 128000 recursions. The <b>pcretest</b> test program has a command around 128000 recursions.
line option (<b>-S</b>) that can be used to increase the size of its stack. </P>
<P>
In Unix-like environments, the <b>pcretest</b> test program has a command line
option (<b>-S</b>) that can be used to increase the size of its stack. As long
as the stack is large enough, another option (<b>-M</b>) can be used to find the
smallest limits that allow a particular pattern to match a given subject
string. This is done by calling <b>pcre[16|32]_exec()</b> repeatedly with different
limits.
</P>
<br><b>
Obtaining an estimate of stack usage
</b><br>
<P>
The actual amount of stack used per recursion can vary quite a lot, depending
on the compiler that was used to build PCRE and the optimization or debugging
options that were set for it. The rule of thumb value of 500 bytes mentioned
above may be larger or smaller than what is actually needed. A better
approximation can be obtained by running this command:
<pre>
pcretest -m -C
</pre>
The <b>-C</b> option causes <b>pcretest</b> to output information about the
options with which PCRE was compiled. When <b>-m</b> is also given (before
<b>-C</b>), information about stack use is given in a line like this:
<pre>
Match recursion uses stack: approximate frame size = 640 bytes
</pre>
The value is approximate because some recursions need a bit more (up to perhaps
16 more bytes).
</P>
<P>
If the above command is given when PCRE is compiled to use the heap instead of
the stack for recursion, the value that is output is the size of each block
that is obtained from the heap.
</P> </P>
<br><b> <br><b>
Changing stack size in Unix-like systems Changing stack size in Unix-like systems
@ -137,7 +190,7 @@ limit on stack size by code such as this:
</pre> </pre>
This reads the current limits (soft and hard) using <b>getrlimit()</b>, then This reads the current limits (soft and hard) using <b>getrlimit()</b>, then
attempts to increase the soft limit to 100Mb using <b>setrlimit()</b>. You must attempts to increase the soft limit to 100Mb using <b>setrlimit()</b>. You must
do this before calling <b>pcre_exec()</b>. do this before calling <b>pcre[16|32]_exec()</b>.
</P> </P>
<br><b> <br><b>
Changing stack size in Mac OS X Changing stack size in Mac OS X
@ -163,9 +216,9 @@ Cambridge CB2 3QH, England.
REVISION REVISION
</b><br> </b><br>
<P> <P>
Last updated: 09 July 2008 Last updated: 24 June 2012
<br> <br>
Copyright &copy; 1997-2008 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

View File

@ -17,36 +17,36 @@ man page, in case the conversion went wrong.
<li><a name="TOC2" href="#SEC2">QUOTING</a> <li><a name="TOC2" href="#SEC2">QUOTING</a>
<li><a name="TOC3" href="#SEC3">CHARACTERS</a> <li><a name="TOC3" href="#SEC3">CHARACTERS</a>
<li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a> <li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
<li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a> <li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
<li><a name="TOC6" href="#SEC6">SCRIPT NAMES FOR \p AND \P</a> <li><a name="TOC6" href="#SEC6">PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
<li><a name="TOC7" href="#SEC7">CHARACTER CLASSES</a> <li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a>
<li><a name="TOC8" href="#SEC8">QUANTIFIERS</a> <li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
<li><a name="TOC9" href="#SEC9">ANCHORS AND SIMPLE ASSERTIONS</a> <li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
<li><a name="TOC10" href="#SEC10">MATCH POINT RESET</a> <li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
<li><a name="TOC11" href="#SEC11">ALTERNATION</a> <li><a name="TOC11" href="#SEC11">MATCH POINT RESET</a>
<li><a name="TOC12" href="#SEC12">CAPTURING</a> <li><a name="TOC12" href="#SEC12">ALTERNATION</a>
<li><a name="TOC13" href="#SEC13">ATOMIC GROUPS</a> <li><a name="TOC13" href="#SEC13">CAPTURING</a>
<li><a name="TOC14" href="#SEC14">COMMENT</a> <li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
<li><a name="TOC15" href="#SEC15">OPTION SETTING</a> <li><a name="TOC15" href="#SEC15">COMMENT</a>
<li><a name="TOC16" href="#SEC16">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a> <li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
<li><a name="TOC17" href="#SEC17">BACKREFERENCES</a> <li><a name="TOC17" href="#SEC17">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
<li><a name="TOC18" href="#SEC18">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a> <li><a name="TOC18" href="#SEC18">BACKREFERENCES</a>
<li><a name="TOC19" href="#SEC19">CONDITIONAL PATTERNS</a> <li><a name="TOC19" href="#SEC19">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
<li><a name="TOC20" href="#SEC20">BACKTRACKING CONTROL</a> <li><a name="TOC20" href="#SEC20">CONDITIONAL PATTERNS</a>
<li><a name="TOC21" href="#SEC21">NEWLINE CONVENTIONS</a> <li><a name="TOC21" href="#SEC21">BACKTRACKING CONTROL</a>
<li><a name="TOC22" href="#SEC22">WHAT \R MATCHES</a> <li><a name="TOC22" href="#SEC22">NEWLINE CONVENTIONS</a>
<li><a name="TOC23" href="#SEC23">CALLOUTS</a> <li><a name="TOC23" href="#SEC23">WHAT \R MATCHES</a>
<li><a name="TOC24" href="#SEC24">SEE ALSO</a> <li><a name="TOC24" href="#SEC24">CALLOUTS</a>
<li><a name="TOC25" href="#SEC25">AUTHOR</a> <li><a name="TOC25" href="#SEC25">SEE ALSO</a>
<li><a name="TOC26" href="#SEC26">REVISION</a> <li><a name="TOC26" href="#SEC26">AUTHOR</a>
<li><a name="TOC27" href="#SEC27">REVISION</a>
</ul> </ul>
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a><br> <br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
<P> <P>
The full syntax and semantics of the regular expressions that are supported by The full syntax and semantics of the regular expressions that are supported by
PCRE are described in the PCRE are described in the
<a href="pcrepattern.html"><b>pcrepattern</b></a> <a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation. This document contains just a quick-reference summary of the documentation. This document contains a quick-reference summary of the syntax.
syntax.
</P> </P>
<br><a name="SEC2" href="#TOC1">QUOTING</a><br> <br><a name="SEC2" href="#TOC1">QUOTING</a><br>
<P> <P>
@ -59,9 +59,9 @@ syntax.
<P> <P>
<pre> <pre>
\a alarm, that is, the BEL character (hex 07) \a alarm, that is, the BEL character (hex 07)
\cx "control-x", where x is any character \cx "control-x", where x is any ASCII character
\e escape (hex 1B) \e escape (hex 1B)
\f formfeed (hex 0C) \f form feed (hex 0C)
\n newline (hex 0A) \n newline (hex 0A)
\r carriage return (hex 0D) \r carriage return (hex 0D)
\t tab (hex 09) \t tab (hex 09)
@ -75,25 +75,28 @@ syntax.
<pre> <pre>
. any character except newline; . any character except newline;
in dotall mode, any character whatsoever in dotall mode, any character whatsoever
\C one byte, even in UTF-8 mode (best avoided) \C one data unit, even in UTF mode (best avoided)
\d a decimal digit \d a decimal digit
\D a character that is not a decimal digit \D a character that is not a decimal digit
\h a horizontal whitespace character \h a horizontal white space character
\H a character that is not a horizontal whitespace character \H a character that is not a horizontal white space character
\N a character that is not a newline
\p{<i>xx</i>} a character with the <i>xx</i> property \p{<i>xx</i>} a character with the <i>xx</i> property
\P{<i>xx</i>} a character without the <i>xx</i> property \P{<i>xx</i>} a character without the <i>xx</i> property
\R a newline sequence \R a newline sequence
\s a whitespace character \s a white space character
\S a character that is not a whitespace character \S a character that is not a white space character
\v a vertical whitespace character \v a vertical white space character
\V a character that is not a vertical whitespace character \V a character that is not a vertical white space character
\w a "word" character \w a "word" character
\W a "non-word" character \W a "non-word" character
\X an extended Unicode sequence \X a Unicode extended grapheme cluster
</pre> </pre>
In PCRE, \d, \D, \s, \S, \w, and \W recognize only ASCII characters. In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
characters, even in a UTF mode. However, this can be changed by setting the
PCRE_UCP option.
</P> </P>
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a><br> <br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
<P> <P>
<pre> <pre>
C Other C Other
@ -142,18 +145,32 @@ In PCRE, \d, \D, \s, \S, \w, and \W recognize only ASCII characters.
Zs Space separator Zs Space separator
</PRE> </PRE>
</P> </P>
<br><a name="SEC6" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br> <br><a name="SEC6" href="#TOC1">PCRE SPECIAL CATEGORY PROPERTIES FOR \p and \P</a><br>
<P>
<pre>
Xan Alphanumeric: union of properties L and N
Xps POSIX space: property Z or tab, NL, VT, FF, CR
Xsp Perl space: property Z or tab, NL, FF, CR
Xwd Perl word: property Xan or underscore
</PRE>
</P>
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
<P> <P>
Arabic, Arabic,
Armenian, Armenian,
Avestan,
Balinese, Balinese,
Bamum,
Batak,
Bengali, Bengali,
Bopomofo, Bopomofo,
Brahmi,
Braille, Braille,
Buginese, Buginese,
Buhid, Buhid,
Canadian_Aboriginal, Canadian_Aboriginal,
Carian, Carian,
Chakma,
Cham, Cham,
Cherokee, Cherokee,
Common, Common,
@ -163,6 +180,7 @@ Cypriot,
Cyrillic, Cyrillic,
Deseret, Deseret,
Devanagari, Devanagari,
Egyptian_Hieroglyphs,
Ethiopic, Ethiopic,
Georgian, Georgian,
Glagolitic, Glagolitic,
@ -175,7 +193,12 @@ Hangul,
Hanunoo, Hanunoo,
Hebrew, Hebrew,
Hiragana, Hiragana,
Imperial_Aramaic,
Inherited, Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada, Kannada,
Katakana, Katakana,
Kayah_Li, Kayah_Li,
@ -186,9 +209,15 @@ Latin,
Lepcha, Lepcha,
Limbu, Limbu,
Linear_B, Linear_B,
Lisu,
Lycian, Lycian,
Lydian, Lydian,
Malayalam, Malayalam,
Mandaic,
Meetei_Mayek,
Meroitic_Cursive,
Meroitic_Hieroglyphs,
Miao,
Mongolian, Mongolian,
Myanmar, Myanmar,
New_Tai_Lue, New_Tai_Lue,
@ -196,6 +225,8 @@ Nko,
Ogham, Ogham,
Old_Italic, Old_Italic,
Old_Persian, Old_Persian,
Old_South_Arabian,
Old_Turkic,
Ol_Chiki, Ol_Chiki,
Oriya, Oriya,
Osmanya, Osmanya,
@ -203,15 +234,21 @@ Phags_Pa,
Phoenician, Phoenician,
Rejang, Rejang,
Runic, Runic,
Samaritan,
Saurashtra, Saurashtra,
Sharada,
Shavian, Shavian,
Sinhala, Sinhala,
Sudanese, Sora_Sompeng,
Sundanese,
Syloti_Nagri, Syloti_Nagri,
Syriac, Syriac,
Tagalog, Tagalog,
Tagbanwa, Tagbanwa,
Tai_Le, Tai_Le,
Tai_Tham,
Tai_Viet,
Takri,
Tamil, Tamil,
Telugu, Telugu,
Thaana, Thaana,
@ -222,7 +259,7 @@ Ugaritic,
Vai, Vai,
Yi. Yi.
</P> </P>
<br><a name="SEC7" href="#TOC1">CHARACTER CLASSES</a><br> <br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
<P> <P>
<pre> <pre>
[...] positive character class [...] positive character class
@ -241,15 +278,16 @@ Yi.
lower lower case letter lower lower case letter
print printing, including space print printing, including space
punct printing, excluding alphanumeric punct printing, excluding alphanumeric
space whitespace space white space
upper upper case letter upper upper case letter
word same as \w word same as \w
xdigit hexadecimal digit xdigit hexadecimal digit
</pre> </pre>
In PCRE, POSIX character set names recognize only ASCII characters. You can use In PCRE, POSIX character set names recognize only ASCII characters by default,
but some of them use Unicode properties if PCRE_UCP is set. You can use
\Q...\E inside a character class. \Q...\E inside a character class.
</P> </P>
<br><a name="SEC8" href="#TOC1">QUANTIFIERS</a><br> <br><a name="SEC9" href="#TOC1">QUANTIFIERS</a><br>
<P> <P>
<pre> <pre>
? 0 or 1, greedy ? 0 or 1, greedy
@ -270,10 +308,10 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
{n,}? n or more, lazy {n,}? n or more, lazy
</PRE> </PRE>
</P> </P>
<br><a name="SEC9" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br> <br><a name="SEC10" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
<P> <P>
<pre> <pre>
\b word boundary (only ASCII letters recognized) \b word boundary
\B not a word boundary \B not a word boundary
^ start of subject ^ start of subject
also after internal newline in multiline mode also after internal newline in multiline mode
@ -287,19 +325,19 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
\G first matching position in subject \G first matching position in subject
</PRE> </PRE>
</P> </P>
<br><a name="SEC10" href="#TOC1">MATCH POINT RESET</a><br> <br><a name="SEC11" href="#TOC1">MATCH POINT RESET</a><br>
<P> <P>
<pre> <pre>
\K reset start of match \K reset start of match
</PRE> </PRE>
</P> </P>
<br><a name="SEC11" href="#TOC1">ALTERNATION</a><br> <br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
<P> <P>
<pre> <pre>
expr|expr|expr... expr|expr|expr...
</PRE> </PRE>
</P> </P>
<br><a name="SEC12" href="#TOC1">CAPTURING</a><br> <br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
<P> <P>
<pre> <pre>
(...) capturing group (...) capturing group
@ -311,19 +349,19 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
capturing groups in each alternative capturing groups in each alternative
</PRE> </PRE>
</P> </P>
<br><a name="SEC13" href="#TOC1">ATOMIC GROUPS</a><br> <br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
<P> <P>
<pre> <pre>
(?&#62;...) atomic, non-capturing group (?&#62;...) atomic, non-capturing group
</PRE> </PRE>
</P> </P>
<br><a name="SEC14" href="#TOC1">COMMENT</a><br> <br><a name="SEC15" href="#TOC1">COMMENT</a><br>
<P> <P>
<pre> <pre>
(?#....) comment (not nestable) (?#....) comment (not nestable)
</PRE> </PRE>
</P> </P>
<br><a name="SEC15" href="#TOC1">OPTION SETTING</a><br> <br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
<P> <P>
<pre> <pre>
(?i) caseless (?i) caseless
@ -334,13 +372,18 @@ In PCRE, POSIX character set names recognize only ASCII characters. You can use
(?x) extended (ignore white space) (?x) extended (ignore white space)
(?-...) unset option(s) (?-...) unset option(s)
</pre> </pre>
The following is recognized only at the start of a pattern or after one of the The following are recognized only at the start of a pattern or after one of the
newline-setting options with similar syntax: newline-setting options with similar syntax:
<pre> <pre>
(*UTF8) set UTF-8 mode (*NO_START_OPT) no start-match optimization (PCRE_NO_START_OPTIMIZE)
(*UTF8) set UTF-8 mode: 8-bit library (PCRE_UTF8)
(*UTF16) set UTF-16 mode: 16-bit library (PCRE_UTF16)
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
(*UTF) set appropriate UTF mode for the library in use
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
</PRE> </PRE>
</P> </P>
<br><a name="SEC16" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br> <br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
<P> <P>
<pre> <pre>
(?=...) positive look ahead (?=...) positive look ahead
@ -350,7 +393,7 @@ newline-setting options with similar syntax:
</pre> </pre>
Each top-level branch of a look behind must be of a fixed length. Each top-level branch of a look behind must be of a fixed length.
</P> </P>
<br><a name="SEC17" href="#TOC1">BACKREFERENCES</a><br> <br><a name="SEC18" href="#TOC1">BACKREFERENCES</a><br>
<P> <P>
<pre> <pre>
\n reference by number (can be ambiguous) \n reference by number (can be ambiguous)
@ -364,7 +407,7 @@ Each top-level branch of a look behind must be of a fixed length.
(?P=name) reference by name (Python) (?P=name) reference by name (Python)
</PRE> </PRE>
</P> </P>
<br><a name="SEC18" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br> <br><a name="SEC19" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
<P> <P>
<pre> <pre>
(?R) recurse whole pattern (?R) recurse whole pattern
@ -383,7 +426,7 @@ Each top-level branch of a look behind must be of a fixed length.
\g'-n' call subpattern by relative number (PCRE extension) \g'-n' call subpattern by relative number (PCRE extension)
</PRE> </PRE>
</P> </P>
<br><a name="SEC19" href="#TOC1">CONDITIONAL PATTERNS</a><br> <br><a name="SEC20" href="#TOC1">CONDITIONAL PATTERNS</a><br>
<P> <P>
<pre> <pre>
(?(condition)yes-pattern) (?(condition)yes-pattern)
@ -402,12 +445,13 @@ Each top-level branch of a look behind must be of a fixed length.
(?(assert)... assertion condition (?(assert)... assertion condition
</PRE> </PRE>
</P> </P>
<br><a name="SEC20" href="#TOC1">BACKTRACKING CONTROL</a><br> <br><a name="SEC21" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P> <P>
The following act immediately they are reached: The following act immediately they are reached:
<pre> <pre>
(*ACCEPT) force successful match (*ACCEPT) force successful match
(*FAIL) force backtrack; synonym (*F) (*FAIL) force backtrack; synonym (*F)
(*MARK:NAME) set name to be passed back; synonym (*:NAME)
</pre> </pre>
The following act only when a subsequent match failure causes a backtrack to The following act only when a subsequent match failure causes a backtrack to
reach them. They all force a match failure, but they differ in what happens reach them. They all force a match failure, but they differ in what happens
@ -416,14 +460,18 @@ pattern is not anchored.
<pre> <pre>
(*COMMIT) overall failure, no advance of starting point (*COMMIT) overall failure, no advance of starting point
(*PRUNE) advance to next starting character (*PRUNE) advance to next starting character
(*SKIP) advance start to current matching position (*PRUNE:NAME) equivalent to (*MARK:NAME)(*PRUNE)
(*SKIP) advance to current matching position
(*SKIP:NAME) advance to position corresponding to an earlier
(*MARK:NAME); if not found, the (*SKIP) is ignored
(*THEN) local failure, backtrack to next alternation (*THEN) local failure, backtrack to next alternation
(*THEN:NAME) equivalent to (*MARK:NAME)(*THEN)
</PRE> </PRE>
</P> </P>
<br><a name="SEC21" href="#TOC1">NEWLINE CONVENTIONS</a><br> <br><a name="SEC22" href="#TOC1">NEWLINE CONVENTIONS</a><br>
<P> <P>
These are recognized only at the very start of the pattern or after a These are recognized only at the very start of the pattern or after a
(*BSR_...) or (*UTF8) option. (*BSR_...), (*UTF8), (*UTF16), (*UTF32) or (*UCP) option.
<pre> <pre>
(*CR) carriage return only (*CR) carriage return only
(*LF) linefeed only (*LF) linefeed only
@ -432,28 +480,28 @@ These are recognized only at the very start of the pattern or after a
(*ANY) any Unicode newline sequence (*ANY) any Unicode newline sequence
</PRE> </PRE>
</P> </P>
<br><a name="SEC22" href="#TOC1">WHAT \R MATCHES</a><br> <br><a name="SEC23" href="#TOC1">WHAT \R MATCHES</a><br>
<P> <P>
These are recognized only at the very start of the pattern or after a These are recognized only at the very start of the pattern or after a
(*...) option that sets the newline convention or UTF-8 mode. (*...) option that sets the newline convention or a UTF or UCP mode.
<pre> <pre>
(*BSR_ANYCRLF) CR, LF, or CRLF (*BSR_ANYCRLF) CR, LF, or CRLF
(*BSR_UNICODE) any Unicode newline sequence (*BSR_UNICODE) any Unicode newline sequence
</PRE> </PRE>
</P> </P>
<br><a name="SEC23" href="#TOC1">CALLOUTS</a><br> <br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
<P> <P>
<pre> <pre>
(?C) callout (?C) callout
(?Cn) callout with data n (?Cn) callout with data n
</PRE> </PRE>
</P> </P>
<br><a name="SEC24" href="#TOC1">SEE ALSO</a><br> <br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
<P> <P>
<b>pcrepattern</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrepattern</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3),
<b>pcrematching</b>(3), <b>pcre</b>(3). <b>pcrematching</b>(3), <b>pcre</b>(3).
</P> </P>
<br><a name="SEC25" href="#TOC1">AUTHOR</a><br> <br><a name="SEC26" href="#TOC1">AUTHOR</a><br>
<P> <P>
Philip Hazel Philip Hazel
<br> <br>
@ -462,11 +510,11 @@ University Computing Service
Cambridge CB2 3QH, England. Cambridge CB2 3QH, England.
<br> <br>
</P> </P>
<br><a name="SEC26" href="#TOC1">REVISION</a><br> <br><a name="SEC27" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 11 April 2009 Last updated: 11 November 2012
<br> <br>
Copyright &copy; 1997-2009 University of Cambridge. Copyright &copy; 1997-2012 University of Cambridge.
<br> <br>
<p> <p>
Return to the <a href="index.html">PCRE index page</a>. Return to the <a href="index.html">PCRE index page</a>.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,270 @@
<html>
<head>
<title>pcreunicode specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcreunicode man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
UTF-8, UTF-16, UTF-32, AND UNICODE PROPERTY SUPPORT
</b><br>
<P>
As well as UTF-8 support, PCRE also supports UTF-16 (from release 8.30) and
UTF-32 (from release 8.32), by means of two additional libraries. They can be
built as well as, or instead of, the 8-bit library.
</P>
<br><b>
UTF-8 SUPPORT
</b><br>
<P>
In order process UTF-8 strings, you must build PCRE's 8-bit library with UTF
support, and, in addition, you must call
<a href="pcre_compile.html"><b>pcre_compile()</b></a>
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
(*UTF8) or (*UTF). When either of these is the case, both the pattern and any
subject strings that are matched against it are treated as UTF-8 strings
instead of strings of individual 1-byte characters.
</P>
<br><b>
UTF-16 AND UTF-32 SUPPORT
</b><br>
<P>
In order process UTF-16 or UTF-32 strings, you must build PCRE's 16-bit or
32-bit library with UTF support, and, in addition, you must call
<a href="pcre16_compile.html"><b>pcre16_compile()</b></a>
or
<a href="pcre32_compile.html"><b>pcre32_compile()</b></a>
with the PCRE_UTF16 or PCRE_UTF32 option flag, as appropriate. Alternatively,
the pattern must start with the sequence (*UTF16), (*UTF32), as appropriate, or
(*UTF), which can be used with either library. When UTF mode is set, both the
pattern and any subject strings that are matched against it are treated as
UTF-16 or UTF-32 strings instead of strings of individual 16-bit or 32-bit
characters.
</P>
<br><b>
UTF SUPPORT OVERHEAD
</b><br>
<P>
If you compile PCRE with UTF support, but do not use it at run time, the
library will be a bit bigger, but the additional run time overhead is limited
to testing the PCRE_UTF[8|16|32] flag occasionally, so should not be very big.
</P>
<br><b>
UNICODE PROPERTY SUPPORT
</b><br>
<P>
If PCRE is built with Unicode character property support (which implies UTF
support), the escape sequences \p{..}, \P{..}, and \X can be used.
The available properties that can be tested are limited to the general
category properties such as Lu for an upper case letter or Nd for a decimal
number, the Unicode script names such as Arabic or Han, and the derived
properties Any and L&. Full lists is given in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
and
<a href="pcresyntax.html"><b>pcresyntax</b></a>
documentation. Only the short names for properties are supported. For example,
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
compatibility with Perl 5.6. PCRE does not support this.
<a name="utf8strings"></a></P>
<br><b>
Validity of UTF-8 strings
</b><br>
<P>
When you set the PCRE_UTF8 flag, the byte strings passed as patterns and
subjects are (by default) checked for validity on entry to the relevant
functions. The entire string is checked before any other processing takes
place. From release 7.3 of PCRE, the check is according the rules of RFC 3629,
which are themselves derived from the Unicode specification. Earlier releases
of PCRE followed the rules of RFC 2279, which allows the full range of 31-bit
values (0 to 0x7FFFFFFF). The current check allows only values in the range U+0
to U+10FFFF, excluding the surrogate area and the non-characters.
</P>
<P>
Characters in the "Surrogate Area" of Unicode are reserved for use by UTF-16,
where they are used in pairs to encode codepoints with values greater than
0xFFFF. The code points that are encoded by UTF-16 pairs are available
independently in the UTF-8 and UTF-32 encodings. (In other words, the whole
surrogate thing is a fudge for UTF-16 which unfortunately messes up UTF-8 and
UTF-32.)
</P>
<P>
Also excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
and the last two code points in each plane, U+??FFFE and U+??FFFF.
</P>
<P>
If an invalid UTF-8 string is passed to PCRE, an error return is given. At
compile time, the only additional information is the offset to the first byte
of the failing character. The run-time functions <b>pcre_exec()</b> and
<b>pcre_dfa_exec()</b> also pass back this information, as well as a more
detailed reason code if the caller has provided memory in which to do this.
</P>
<P>
In some situations, you may already know that your strings are valid, and
therefore want to skip these checks in order to improve performance, for
example in the case of a long subject string that is being scanned repeatedly.
If you set the PCRE_NO_UTF8_CHECK flag at compile time or at run time, PCRE
assumes that the pattern or subject it is given (respectively) contains only
valid UTF-8 codes. In this case, it does not diagnose an invalid UTF-8 string.
</P>
<P>
Note that passing PCRE_NO_UTF8_CHECK to <b>pcre_compile()</b> just disables the
check for the pattern; it does not also apply to subject strings. If you want
to disable the check for a subject string you must pass this option to
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>.
</P>
<P>
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, the result
is undefined and your program may crash.
<a name="utf16strings"></a></P>
<br><b>
Validity of UTF-16 strings
</b><br>
<P>
When you set the PCRE_UTF16 flag, the strings of 16-bit data units that are
passed as patterns and subjects are (by default) checked for validity on entry
to the relevant functions. Values other than those in the surrogate range
U+D800 to U+DFFF are independent code points. Values in the surrogate range
must be used in pairs in the correct manner.
</P>
<P>
Excluded are the "Non-Character" code points, which are U+FDD0 to U+FDEF
and the last two code points in each plane, U+??FFFE and U+??FFFF.
</P>
<P>
If an invalid UTF-16 string is passed to PCRE, an error return is given. At
compile time, the only additional information is the offset to the first data
unit of the failing character. The run-time functions <b>pcre16_exec()</b> and
<b>pcre16_dfa_exec()</b> also pass back this information, as well as a more
detailed reason code if the caller has provided memory in which to do this.
</P>
<P>
In some situations, you may already know that your strings are valid, and
therefore want to skip these checks in order to improve performance. If you set
the PCRE_NO_UTF16_CHECK flag at compile time or at run time, PCRE assumes that
the pattern or subject it is given (respectively) contains only valid UTF-16
sequences. In this case, it does not diagnose an invalid UTF-16 string.
However, if an invalid string is passed, the result is undefined.
<a name="utf32strings"></a></P>
<br><b>
Validity of UTF-32 strings
</b><br>
<P>
When you set the PCRE_UTF32 flag, the strings of 32-bit data units that are
passed as patterns and subjects are (by default) checked for validity on entry
to the relevant functions. This check allows only values in the range U+0
to U+10FFFF, excluding the surrogate area U+D800 to U+DFFF, and the
"Non-Character" code points, which are U+FDD0 to U+FDEF and the last two
characters in each plane, U+??FFFE and U+??FFFF.
</P>
<P>
If an invalid UTF-32 string is passed to PCRE, an error return is given. At
compile time, the only additional information is the offset to the first data
unit of the failing character. The run-time functions <b>pcre32_exec()</b> and
<b>pcre32_dfa_exec()</b> also pass back this information, as well as a more
detailed reason code if the caller has provided memory in which to do this.
</P>
<P>
In some situations, you may already know that your strings are valid, and
therefore want to skip these checks in order to improve performance. If you set
the PCRE_NO_UTF32_CHECK flag at compile time or at run time, PCRE assumes that
the pattern or subject it is given (respectively) contains only valid UTF-32
sequences. In this case, it does not diagnose an invalid UTF-32 string.
However, if an invalid string is passed, the result is undefined.
</P>
<br><b>
General comments about UTF modes
</b><br>
<P>
1. Codepoints less than 256 can be specified in patterns by either braced or
unbraced hexadecimal escape sequences (for example, \x{b3} or \xb3). Larger
values have to use braced sequences.
</P>
<P>
2. Octal numbers up to \777 are recognized, and in UTF-8 mode they match
two-byte characters for values greater than \177.
</P>
<P>
3. Repeat quantifiers apply to complete UTF characters, not to individual
data units, for example: \x{100}{3}.
</P>
<P>
4. The dot metacharacter matches one UTF character instead of a single data
unit.
</P>
<P>
5. The escape sequence \C can be used to match a single byte in UTF-8 mode, or
a single 16-bit data unit in UTF-16 mode, or a single 32-bit data unit in
UTF-32 mode, but its use can lead to some strange effects because it breaks up
multi-unit characters (see the description of \C in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation). The use of \C is not supported in the alternative matching
function <b>pcre[16|32]_dfa_exec()</b>, nor is it supported in UTF mode by the
JIT optimization of <b>pcre[16|32]_exec()</b>. If JIT optimization is requested
for a UTF pattern that contains \C, it will not succeed, and so the matching
will be carried out by the normal interpretive function.
</P>
<P>
6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
test characters of any code value, but, by default, the characters that PCRE
recognizes as digits, spaces, or word characters remain the same set as in
non-UTF mode, all with values less than 256. This remains true even when PCRE
is built to include Unicode property support, because to do otherwise would
slow down PCRE in many common cases. Note in particular that this applies to
\b and \B, because they are defined in terms of \w and \W. If you really
want to test for a wider sense of, say, "digit", you can use explicit Unicode
property tests such as \p{Nd}. Alternatively, if you set the PCRE_UCP option,
the way that the character escapes work is changed so that Unicode properties
are used to determine which characters match. There are more details in the
section on
<a href="pcrepattern.html#genericchartypes">generic character types</a>
in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation.
</P>
<P>
7. Similarly, characters that match the POSIX named character classes are all
low-valued characters, unless the PCRE_UCP option is set.
</P>
<P>
8. However, the horizontal and vertical white space matching escapes (\h, \H,
\v, and \V) do match all the appropriate Unicode characters, whether or not
PCRE_UCP is set.
</P>
<P>
9. Case-insensitive matching applies only to characters whose values are less
than 128, unless PCRE is built with Unicode property support. A few Unicode
characters such as Greek sigma have more than two codepoints that are
case-equivalent. Up to and including PCRE release 8.31, only one-to-one case
mappings were supported, but later releases (with Unicode property support) do
treat as case-equivalent all versions of characters such as Greek sigma.
</P>
<br><b>
AUTHOR
</b><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><b>
REVISION
</b><br>
<P>
Last updated: 11 November 2012
<br>
Copyright &copy; 1997-2012 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>

View File

@ -18,6 +18,12 @@ The HTML documentation for PCRE comprises the following pages:
<tr><td><a href="pcre.html">pcre</a></td> <tr><td><a href="pcre.html">pcre</a></td>
<td>&nbsp;&nbsp;Introductory page</td></tr> <td>&nbsp;&nbsp;Introductory page</td></tr>
<tr><td><a href="pcre16.html">pcre16</a></td>
<td>&nbsp;&nbsp;Discussion of the 16-bit PCRE library</td></tr>
<tr><td><a href="pcre32.html">pcre32</a></td>
<td>&nbsp;&nbsp;Discussion of the 32-bit PCRE library</td></tr>
<tr><td><a href="pcre-config.html">pcre-config</a></td> <tr><td><a href="pcre-config.html">pcre-config</a></td>
<td>&nbsp;&nbsp;Information about the installation configuration</td></tr> <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>
@ -36,9 +42,18 @@ The HTML documentation for PCRE comprises the following pages:
<tr><td><a href="pcrecpp.html">pcrecpp</a></td> <tr><td><a href="pcrecpp.html">pcrecpp</a></td>
<td>&nbsp;&nbsp;The C++ wrapper for the PCRE library</td></tr> <td>&nbsp;&nbsp;The C++ wrapper for the PCRE library</td></tr>
<tr><td><a href="pcredemo.html">pcredemo</a></td>
<td>&nbsp;&nbsp;A demonstration C program that uses the PCRE library</td></tr>
<tr><td><a href="pcregrep.html">pcregrep</a></td> <tr><td><a href="pcregrep.html">pcregrep</a></td>
<td>&nbsp;&nbsp;The <b>pcregrep</b> command</td></tr> <td>&nbsp;&nbsp;The <b>pcregrep</b> command</td></tr>
<tr><td><a href="pcrejit.html">pcrejit</a></td>
<td>&nbsp;&nbsp;Discussion of the just-in-time optimization support</td></tr>
<tr><td><a href="pcrelimits.html">pcrelimits</a></td>
<td>&nbsp;&nbsp;Details of size and other limits</td></tr>
<tr><td><a href="pcrematching.html">pcrematching</a></td> <tr><td><a href="pcrematching.html">pcrematching</a></td>
<td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr> <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>
@ -58,7 +73,7 @@ The HTML documentation for PCRE comprises the following pages:
<td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr> <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>
<tr><td><a href="pcresample.html">pcresample</a></td> <tr><td><a href="pcresample.html">pcresample</a></td>
<td>&nbsp;&nbsp;Description of the sample program</td></tr> <td>&nbsp;&nbsp;Discussion of the pcredemo program</td></tr>
<tr><td><a href="pcrestack.html">pcrestack</a></td> <tr><td><a href="pcrestack.html">pcrestack</a></td>
<td>&nbsp;&nbsp;Discussion of PCRE's stack usage</td></tr> <td>&nbsp;&nbsp;Discussion of PCRE's stack usage</td></tr>
@ -68,15 +83,22 @@ The HTML documentation for PCRE comprises the following pages:
<tr><td><a href="pcretest.html">pcretest</a></td> <tr><td><a href="pcretest.html">pcretest</a></td>
<td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr> <td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr>
<tr><td><a href="pcreunicode.html">pcreunicode</a></td>
<td>&nbsp;&nbsp;Discussion of Unicode and UTF-8/UTF-16/UTF-32 support</td></tr>
</table> </table>
<p> <p>
There are also individual pages that summarize the interface for each function There are also individual pages that summarize the interface for each function
in the library: in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
functions.
</p> </p>
<table> <table>
<tr><td><a href="pcre_assign_jit_stack.html">pcre_assign_jit_stack</a></td>
<td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
<tr><td><a href="pcre_compile.html">pcre_compile</a></td> <tr><td><a href="pcre_compile.html">pcre_compile</a></td>
<td>&nbsp;&nbsp;Compile a regular expression</td></tr> <td>&nbsp;&nbsp;Compile a regular expression</td></tr>
@ -96,6 +118,9 @@ in the library:
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(DFA algorithm; <i>not</i> Perl compatible)</td></tr> (DFA algorithm; <i>not</i> Perl compatible)</td></tr>
<tr><td><a href="pcre_free_study.html">pcre_free_study</a></td>
<td>&nbsp;&nbsp;Free study data</td></tr>
<tr><td><a href="pcre_exec.html">pcre_exec</a></td> <tr><td><a href="pcre_exec.html">pcre_exec</a></td>
<td>&nbsp;&nbsp;Match a compiled pattern to a subject string <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
(Perl compatible)</td></tr> (Perl compatible)</td></tr>
@ -124,15 +149,30 @@ in the library:
<tr><td><a href="pcre_info.html">pcre_info</a></td> <tr><td><a href="pcre_info.html">pcre_info</a></td>
<td>&nbsp;&nbsp;Obsolete information extraction function</td></tr> <td>&nbsp;&nbsp;Obsolete information extraction function</td></tr>
<tr><td><a href="pcre_jit_stack_alloc.html">pcre_jit_stack_alloc</a></td>
<td>&nbsp;&nbsp;Create a stack for JIT matching</td></tr>
<tr><td><a href="pcre_jit_stack_free.html">pcre_jit_stack_free</a></td>
<td>&nbsp;&nbsp;Free a JIT matching stack</td></tr>
<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td> <tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr> <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre_pattern_to_host_byte_order.html">pcre_pattern_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>
<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td> <tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
<td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr> <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>
<tr><td><a href="pcre_study.html">pcre_study</a></td> <tr><td><a href="pcre_study.html">pcre_study</a></td>
<td>&nbsp;&nbsp;Study a compiled pattern</td></tr> <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>
<tr><td><a href="pcre_utf16_to_host_byte_order.html">pcre_utf16_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert UTF-16 string to host byte order if necessary</td></tr>
<tr><td><a href="pcre_utf32_to_host_byte_order.html">pcre_utf32_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert UTF-32 string to host byte order if necessary</td></tr>
<tr><td><a href="pcre_version.html">pcre_version</a></td> <tr><td><a href="pcre_version.html">pcre_version</a></td>
<td>&nbsp;&nbsp;Return PCRE version and release date</td></tr> <td>&nbsp;&nbsp;Return PCRE version and release date</td></tr>
</table> </table>

View File

@ -1,4 +1,4 @@
.TH PCRE-CONFIG 1 .TH PCRE-CONFIG 1 "01 January 2012" "PCRE 8.30"
.SH NAME .SH NAME
pcre-config - program to return PCRE configuration pcre-config - program to return PCRE configuration
.SH SYNOPSIS .SH SYNOPSIS
@ -6,14 +6,20 @@ pcre-config - program to return PCRE configuration
.sp .sp
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs] .B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
.ti +5n .ti +5n
.B [--libs-posix] [--cflags] [--cflags-posix] .B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
.ti +5n
.B [--cflags] [--cflags-posix]
. .
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
.sp .sp
\fBpcre-config\fP returns the configuration of the installed PCRE \fBpcre-config\fP returns the configuration of the installed PCRE
libraries and the options required to compile a program to use them. libraries and the options required to compile a program to use them. Some of
the options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
respectively, and are
not available if only one of those libraries has been built. If an unavailable
option is encountered, the "usage" information is output.
. .
. .
.SH OPTIONS .SH OPTIONS
@ -34,11 +40,24 @@ output.
.TP 10 .TP 10
\fB--libs\fP \fB--libs\fP
Writes to the standard output the command line options required to link Writes to the standard output the command line options required to link
with PCRE (\fB-lpcre\fP on many systems). with the 8-bit PCRE library (\fB-lpcre\fP on many systems).
.TP 10
\fB--libs16\fP
Writes to the standard output the command line options required to link
with the 16-bit PCRE library (\fB-lpcre16\fP on many systems).
.TP 10
\fB--libs32\fP
Writes to the standard output the command line options required to link
with the 32-bit PCRE library (\fB-lpcre32\fP on many systems).
.TP 10
\fB--libs-cpp\fP
Writes to the standard output the command line options required to link with
PCRE's C++ wrapper library (\fB-lpcrecpp\fP \fB-lpcre\fP on many
systems).
.TP 10 .TP 10
\fB--libs-posix\fP \fB--libs-posix\fP
Writes to the standard output the command line options required to link with Writes to the standard output the command line options required to link with
the PCRE posix emulation library (\fB-lpcreposix\fP \fB-lpcre\fP on many PCRE's POSIX API wrapper library (\fB-lpcreposix\fP \fB-lpcre\fP on many
systems). systems).
.TP 10 .TP 10
\fB--cflags\fP \fB--cflags\fP
@ -48,7 +67,7 @@ many systems).
.TP 10 .TP 10
\fB--cflags-posix\fP \fB--cflags-posix\fP
Writes to the standard output the command line options required to compile Writes to the standard output the command line options required to compile
files that use the PCRE posix emulation library (this may include some \fB-I\fP files that use PCRE's POSIX API wrapper library (this may include some \fB-I\fP
options, but is blank on many systems). options, but is blank on many systems).
. .
. .
@ -62,12 +81,12 @@ options, but is blank on many systems).
.rs .rs
.sp .sp
This manual page was originally written by Mark Baker for the Debian GNU/Linux This manual page was originally written by Mark Baker for the Debian GNU/Linux
system. It has been slightly revised as a generic PCRE man page. system. It has been subsequently revised as a generic PCRE man page.
. .
. .
.SH REVISION .SH REVISION
.rs .rs
.sp .sp
.nf .nf
Last updated: 18 April 2007 Last updated: 24 June 2012
.fi .fi

View File

@ -8,13 +8,18 @@ NAME
SYNOPSIS SYNOPSIS
pcre-config [--prefix] [--exec-prefix] [--version] [--libs] pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
[--libs-posix] [--cflags] [--cflags-posix] [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
[--cflags] [--cflags-posix]
DESCRIPTION DESCRIPTION
pcre-config returns the configuration of the installed PCRE libraries pcre-config returns the configuration of the installed PCRE libraries
and the options required to compile a program to use them. and the options required to compile a program to use them. Some of the
options apply only to the 8-bit, or 16-bit, or 32-bit libraries,
respectively, and are not available if only one of those libraries has
been built. If an unavailable option is encountered, the "usage" infor-
mation is output.
OPTIONS OPTIONS
@ -32,21 +37,35 @@ OPTIONS
the standard output. the standard output.
--libs Writes to the standard output the command line options --libs Writes to the standard output the command line options
required to link with PCRE (-lpcre on many systems). required to link with the 8-bit PCRE library (-lpcre on many
systems).
--libs16 Writes to the standard output the command line options
required to link with the 16-bit PCRE library (-lpcre16 on
many systems).
--libs32 Writes to the standard output the command line options
required to link with the 32-bit PCRE library (-lpcre32 on
many systems).
--libs-cpp
Writes to the standard output the command line options
required to link with PCRE's C++ wrapper library (-lpcrecpp
-lpcre on many systems).
--libs-posix --libs-posix
Writes to the standard output the command line options Writes to the standard output the command line options
required to link with the PCRE posix emulation library required to link with PCRE's POSIX API wrapper library
(-lpcreposix -lpcre on many systems). (-lpcreposix -lpcre on many systems).
--cflags Writes to the standard output the command line options --cflags Writes to the standard output the command line options
required to compile files that use PCRE (this may include required to compile files that use PCRE (this may include
some -I options, but is blank on many systems). some -I options, but is blank on many systems).
--cflags-posix --cflags-posix
Writes to the standard output the command line options Writes to the standard output the command line options
required to compile files that use the PCRE posix emulation required to compile files that use PCRE's POSIX API wrapper
library (this may include some -I options, but is blank on library (this may include some -I options, but is blank on
many systems). many systems).
@ -57,11 +76,11 @@ SEE ALSO
AUTHOR AUTHOR
This manual page was originally written by Mark Baker for the Debian This manual page was originally written by Mark Baker for the Debian
GNU/Linux system. It has been slightly revised as a generic PCRE man GNU/Linux system. It has been subsequently revised as a generic PCRE
page. man page.
REVISION REVISION
Last updated: 18 April 2007 Last updated: 24 June 2012

View File

@ -1,4 +1,4 @@
.TH PCRE 3 .TH PCRE 3 "11 November 2012" "PCRE 8.32"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH INTRODUCTION .SH INTRODUCTION
@ -6,21 +6,50 @@ PCRE - Perl-compatible regular expressions
.sp .sp
The PCRE library is a set of functions that implement regular expression The PCRE library is a set of functions that implement regular expression
pattern matching using the same syntax and semantics as Perl, with just a few pattern matching using the same syntax and semantics as Perl, with just a few
differences. Certain features that appeared in Python and PCRE before they differences. Some features that appeared in Python and PCRE before they
appeared in Perl are also available using the Python syntax. There is also some appeared in Perl are also available using the Python syntax, there is some
support for certain .NET and Oniguruma syntax items, and there is an option for support for one or two .NET and Oniguruma syntax items, and there is an option
requesting some minor changes that give better JavaScript compatibility. for requesting some minor changes that give better JavaScript compatibility.
.P .P
The current implementation of PCRE (release 7.x) corresponds approximately with Starting with release 8.30, it is possible to compile two separate PCRE
Perl 5.10, including support for UTF-8 encoded strings and Unicode general libraries: the original, which supports 8-bit character strings (including
category properties. However, UTF-8 and Unicode support has to be explicitly UTF-8 strings), and a second library that supports 16-bit character strings
(including UTF-16 strings). The build process allows either one or both to be
built. The majority of the work to make this possible was done by Zoltan
Herczeg.
.P
Starting with release 8.32 it is possible to compile a third separate PCRE
library, which supports 32-bit character strings (including
UTF-32 strings). The build process allows any set of the 8-, 16- and 32-bit
libraries. The work to make this possible was done by Christian Persch.
.P
The three libraries contain identical sets of functions, except that the names
in the 16-bit library start with \fBpcre16_\fP instead of \fBpcre_\fP, and the
names in the 32-bit library start with \fBpcre32_\fP instead of \fBpcre_\fP. To
avoid over-complication and reduce the documentation maintenance load, most of
the documentation describes the 8-bit library, with the differences for the
16-bit and 32-bit libraries described separately in the
.\" HREF
\fBpcre16\fP
and
.\" HREF
\fBpcre32\fP
.\"
pages. References to functions or structures of the form \fIpcre[16|32]_xxx\fP
should be read as meaning "\fIpcre_xxx\fP when using the 8-bit library,
\fIpcre16_xxx\fP when using the 16-bit library, or \fIpcre32_xxx\fP when using
the 32-bit library".
.P
The current implementation of PCRE corresponds approximately with Perl 5.12,
including support for UTF-8/16/32 encoded strings and Unicode general category
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
enabled; it is not the default. The Unicode tables correspond to Unicode enabled; it is not the default. The Unicode tables correspond to Unicode
release 5.1. release 6.2.0.
.P .P
In addition to the Perl-compatible matching function, PCRE contains an In addition to the Perl-compatible matching function, PCRE contains an
alternative matching function that matches the same compiled patterns in a alternative function that matches the same compiled patterns in a different
different way. In certain circumstances, the alternative function has some way. In certain circumstances, the alternative function has some advantages.
advantages. For a discussion of the two matching algorithms, see the For a discussion of the two matching algorithms, see the
.\" HREF .\" HREF
\fBpcrematching\fP \fBpcrematching\fP
.\" .\"
@ -28,13 +57,13 @@ page.
.P .P
PCRE is written in C and released as a C library. A number of people have PCRE is written in C and released as a C library. A number of people have
written wrappers and interfaces of various kinds. In particular, Google Inc. written wrappers and interfaces of various kinds. In particular, Google Inc.
have provided a comprehensive C++ wrapper. This is now included as part of the have provided a comprehensive C++ wrapper for the 8-bit library. This is now
PCRE distribution. The included as part of the PCRE distribution. The
.\" HREF .\" HREF
\fBpcrecpp\fP \fBpcrecpp\fP
.\" .\"
page has details of this interface. Other people's contributions can be found page has details of this interface. Other people's contributions can be found
in the \fIContrib\fR directory at the primary FTP site, which is: in the \fIContrib\fP directory at the primary FTP site, which is:
.sp .sp
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre"> .\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
.\" </a> .\" </a>
@ -43,22 +72,22 @@ ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
Details of exactly which Perl regular expression features are and are not Details of exactly which Perl regular expression features are and are not
supported by PCRE are given in separate documents. See the supported by PCRE are given in separate documents. See the
.\" HREF .\" HREF
\fBpcrepattern\fR \fBpcrepattern\fP
.\" .\"
and and
.\" HREF .\" HREF
\fBpcrecompat\fR \fBpcrecompat\fP
.\" .\"
pages. There is a syntax summary in the pages. There is a syntax summary in the
.\" HREF .\" HREF
\fBpcresyntax\fR \fBpcresyntax\fP
.\" .\"
page. page.
.P .P
Some features of PCRE can be included, excluded, or changed when the library is Some features of PCRE can be included, excluded, or changed when the library is
built. The built. The
.\" HREF .\" HREF
\fBpcre_config()\fR \fBpcre_config()\fP
.\" .\"
function makes it possible for a client to discover which features are function makes it possible for a client to discover which features are
available. The features themselves are described in the available. The features themselves are described in the
@ -66,15 +95,48 @@ available. The features themselves are described in the
\fBpcrebuild\fP \fBpcrebuild\fP
.\" .\"
page. Documentation about building PCRE for various operating systems can be page. Documentation about building PCRE for various operating systems can be
found in the \fBREADME\fP file in the source distribution. found in the \fBREADME\fP and \fBNON-AUTOTOOLS_BUILD\fP files in the source
distribution.
.P .P
The library contains a number of undocumented internal functions and data The libraries contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but tables that are used by more than one of the exported external functions, but
which are not intended for use by external callers. Their names all begin with which are not intended for use by external callers. Their names all begin with
"_pcre_", which hopefully will not provoke any name clashes. In some "_pcre_" or "_pcre16_" or "_pcre32_", which hopefully will not provoke any name
environments, it is possible to control which external symbols are exported clashes. In some environments, it is possible to control which external symbols
when a shared library is built, and in these cases the undocumented symbols are are exported when a shared library is built, and in these cases the
not exported. undocumented symbols are not exported.
.
.
.SH "SECURITY CONSIDERATIONS"
.rs
.sp
If you are using PCRE in a non-UTF application that permits users to supply
arbitrary patterns for compilation, you should be aware of a feature that
allows users to turn on UTF support from within a pattern, provided that PCRE
was built with UTF support. For example, an 8-bit pattern that begins with
"(*UTF8)" or "(*UTF)" turns on UTF-8 mode, which interprets patterns and
subjects as strings of UTF-8 characters instead of individual 8-bit characters.
This causes both the pattern and any data against which it is matched to be
checked for UTF-8 validity. If the data string is very long, such a check might
use sufficiently many resources as to cause your application to lose
performance.
.P
The best way of guarding against this possibility is to use the
\fBpcre_fullinfo()\fP function to check the compiled pattern's options for UTF.
.P
If your application is one that supports UTF, be aware that validity checking
can take time. If the same data string is to be matched many times, you can use
the PCRE_NO_UTF[8|16|32]_CHECK option for the second and subsequent matches to
save redundant checks.
.P
Another way that performance can be hit is by running a pattern that has a very
large search tree against a string that will never match. Nested unlimited
repeats in a pattern are a common example. PCRE provides some protection
against this: see the PCRE_EXTRA_MATCH_LIMIT feature in the
.\" HREF
\fBpcreapi\fP
.\"
page.
. .
. .
.SH "USER DOCUMENTATION" .SH "USER DOCUMENTATION"
@ -83,196 +145,40 @@ not exported.
The user documentation for PCRE comprises a number of different sections. In The user documentation for PCRE comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format, the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format, each is a separate page, linked from the index page. In the plain text format,
all the sections are concatenated, for ease of searching. The sections are as all the sections, except the \fBpcredemo\fP section, are concatenated, for ease
follows: of searching. The sections are as follows:
.sp .sp
pcre this document pcre this document
pcre16 details of the 16-bit library
pcre32 details of the 32-bit library
pcre-config show PCRE installation configuration information pcre-config show PCRE installation configuration information
pcreapi details of PCRE's native C API pcreapi details of PCRE's native C API
pcrebuild options for building PCRE pcrebuild options for building PCRE
pcrecallout details of the callout feature pcrecallout details of the callout feature
pcrecompat discussion of Perl compatibility pcrecompat discussion of Perl compatibility
pcrecpp details of the C++ wrapper pcrecpp details of the C++ wrapper for the 8-bit library
pcregrep description of the \fBpcregrep\fP command pcredemo a demonstration C program that uses PCRE
pcregrep description of the \fBpcregrep\fP command (8-bit only)
pcrejit discussion of the just-in-time optimization support
pcrelimits details of size and other limits
pcrematching discussion of the two matching algorithms pcrematching discussion of the two matching algorithms
pcrepartial details of the partial matching facility pcrepartial details of the partial matching facility
.\" JOIN .\" JOIN
pcrepattern syntax and semantics of supported pcrepattern syntax and semantics of supported
regular expressions regular expressions
pcresyntax quick syntax reference
pcreperform discussion of performance issues pcreperform discussion of performance issues
pcreposix the POSIX-compatible C API pcreposix the POSIX-compatible C API for the 8-bit library
pcreprecompile details of saving and re-using precompiled patterns pcreprecompile details of saving and re-using precompiled patterns
pcresample discussion of the sample program pcresample discussion of the pcredemo program
pcrestack discussion of stack usage pcrestack discussion of stack usage
pcresyntax quick syntax reference
pcretest description of the \fBpcretest\fP testing command pcretest description of the \fBpcretest\fP testing command
pcreunicode discussion of Unicode and UTF-8/16/32 support
.sp .sp
In addition, in the "man" and HTML formats, there is a short page for each In addition, in the "man" and HTML formats, there is a short page for each
C library function, listing its arguments and results. C library function, listing its arguments and results.
. .
. .
.SH LIMITATIONS
.rs
.sp
There are some size limitations in PCRE but it is hoped that they will never in
practice be relevant.
.P
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
compiled with the default internal linkage size of 2. If you want to process
regular expressions that are truly enormous, you can compile PCRE with an
internal linkage size of 3 or 4 (see the \fBREADME\fP file in the source
distribution and the
.\" HREF
\fBpcrebuild\fP
.\"
documentation for details). In these cases the limit is substantially larger.
However, the speed of execution is slower.
.P
All values in repeating quantifiers must be less than 65536.
.P
There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns.
.P
The maximum length of name for a named subpattern is 32 characters, and the
maximum number of named subpatterns is 10000.
.P
The maximum length of a subject string is the largest positive number that an
integer variable can hold. However, when using the traditional matching
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
This means that the available stack space may limit the size of a subject
string that can be processed by certain patterns. For a discussion of stack
issues, see the
.\" HREF
\fBpcrestack\fP
.\"
documentation.
.
.\" HTML <a name="utf8support"></a>
.
.
.SH "UTF-8 AND UNICODE PROPERTY SUPPORT"
.rs
.sp
From release 3.3, PCRE has had some support for character strings encoded in
the UTF-8 format. For release 4.0 this was greatly extended to cover most
common requirements, and in release 5.0 additional support for Unicode general
category properties was added.
.P
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
the code, and, in addition, you must call
.\" HREF
\fBpcre_compile()\fP
.\"
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
(*UTF8). When either of these is the case, both the pattern and any subject
strings that are matched against it are treated as UTF-8 strings instead of
just strings of bytes.
.P
If you compile PCRE with UTF-8 support, but do not use it at run time, the
library will be a bit bigger, but the additional run time overhead is limited
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
.P
If PCRE is built with Unicode character property support (which implies UTF-8
support), the escape sequences \ep{..}, \eP{..}, and \eX are supported.
The available properties that can be tested are limited to the general
category properties such as Lu for an upper case letter or Nd for a decimal
number, the Unicode script names such as Arabic or Han, and the derived
properties Any and L&. A full list is given in the
.\" HREF
\fBpcrepattern\fP
.\"
documentation. Only the short names for properties are supported. For example,
\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
compatibility with Perl 5.6. PCRE does not support this.
.
.\" HTML <a name="utf8strings"></a>
.
.SS "Validity of UTF-8 strings"
.rs
.sp
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions. From
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
themselves derived from the Unicode specification. Earlier releases of PCRE
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
U+10FFFF, excluding U+D800 to U+DFFF.
.P
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
Unicode Standard says this: "The Low Surrogate Area does not contain any
character assignments, consequently no character code charts or namelists are
provided for this area. Surrogates are reserved for use with UTF-16 and then
must be used in pairs." The code points that are encoded by UTF-16 pairs are
available as independent code points in the UTF-8 encoding. (In other words,
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
UTF-8.)
.P
If an invalid UTF-8 string is passed to PCRE, an error return
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
your strings are valid, and therefore want to skip these checks in order to
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
at run time, PCRE assumes that the pattern or subject it is given
(respectively) contains only valid UTF-8 codes. In this case, it does not
diagnose an invalid UTF-8 string.
.P
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
happens depends on why the string is invalid. If the string conforms to the
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
the result is undefined. Your program may crash.
.P
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
encoded in a UTF-8-like manner as per the old RFC, you can set
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
situation, you will have to apply your own validity check.
.
.SS "General comments about UTF-8 mode"
.rs
.sp
1. An unbraced hexadecimal escape sequence (such as \exb3) matches a two-byte
UTF-8 character if the value is greater than 127.
.P
2. Octal numbers up to \e777 are recognized, and match two-byte UTF-8
characters for values greater than \e177.
.P
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
bytes, for example: \ex{100}{3}.
.P
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
.P
5. The escape sequence \eC can be used to match a single byte in UTF-8 mode,
but its use can lead to some strange effects. This facility is not available in
the alternative matching function, \fBpcre_dfa_exec()\fP.
.P
6. The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly
test characters of any code value, but the characters that PCRE recognizes as
digits, spaces, or word characters remain the same set as before, all with
values less than 256. This remains true even when PCRE includes Unicode
property support, because to do otherwise would slow down PCRE in many common
cases. If you really want to test for a wider sense of, say, "digit", you
must use Unicode property tests such as \ep{Nd}. Note that this also applies to
\eb, because it is defined in terms of \ew and \eW.
.P
7. Similarly, characters that match the POSIX named character classes are all
low-valued characters.
.P
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
(\eh, \eH, \ev, and \eV) do match all the appropriate Unicode characters.
.P
9. Case-insensitive matching applies only to characters whose values are less
than 128, unless PCRE is built with Unicode property support. Even when Unicode
property support is available, PCRE still uses its own character tables when
checking the case of low-valued characters, so as not to degrade performance.
The Unicode property information is used only for characters with higher
values. Even when Unicode property support is available, PCRE supports
case-insensitive matching only when there is a one-to-one mapping between a
letter's cases. There are a small number of many-to-one mappings in Unicode;
these are not supported by PCRE.
.
.
.SH AUTHOR .SH AUTHOR
.rs .rs
.sp .sp
@ -291,6 +197,6 @@ two digits 10, at the domain cam.ac.uk.
.rs .rs
.sp .sp
.nf .nf
Last updated: 11 April 2009 Last updated: 11 November 2012
Copyright (c) 1997-2009 University of Cambridge. Copyright (c) 1997-2012 University of Cambridge.
.fi .fi

File diff suppressed because it is too large Load Diff

390
tools/pcre/doc/pcre16.3 Normal file
View File

@ -0,0 +1,390 @@
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.
.
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
.rs
.sp
.SM
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.PP
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
.PP
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.
.
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
.rs
.sp
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP);
.PP
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
.PP
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
.PP
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
.PP
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
.
.
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
.rs
.sp
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
.PP
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
.PP
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
.ti +5n
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B const unsigned char *pcre16_maketables(void);
.PP
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
.PP
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B const char *pcre16_version(void);
.PP
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
.ti +5n
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.
.
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
.rs
.sp
.B void *(*pcre16_malloc)(size_t);
.PP
.B void (*pcre16_free)(void *);
.PP
.B void *(*pcre16_stack_malloc)(size_t);
.PP
.B void (*pcre16_stack_free)(void *);
.PP
.B int (*pcre16_callout)(pcre16_callout_block *);
.
.
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
.rs
.sp
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
.ti +5n
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
.ti +5n
.B int \fIkeep_boms\fP);
.
.
.SH "THE PCRE 16-BIT LIBRARY"
.rs
.sp
Starting with release 8.30, it is possible to compile a PCRE library that
supports 16-bit character strings, including UTF-16 strings, as well as or
instead of the original 8-bit library. The majority of the work to make this
possible was done by Zoltan Herczeg. The two libraries contain identical sets
of functions, used in exactly the same way. Only the names of the functions and
the data types of their arguments and results are different. To avoid
over-complication and reduce the documentation maintenance load, most of the
PCRE documentation describes the 8-bit library, with only occasional references
to the 16-bit library. This page describes what is different when you use the
16-bit library.
.P
WARNING: A single application can be linked with both libraries, but you must
take care when processing any particular pattern to use functions from just one
library. For example, if you want to study a pattern that was compiled with
\fBpcre16_compile()\fP, you must do so with \fBpcre16_study()\fP, not
\fBpcre_study()\fP, and you must free the study data with
\fBpcre16_free_study()\fP.
.
.
.SH "THE HEADER FILE"
.rs
.sp
There is only one header file, \fBpcre.h\fP. It contains prototypes for all the
functions in all libraries, as well as definitions of flags, structures, error
codes, etc.
.
.
.SH "THE LIBRARY NAME"
.rs
.sp
In Unix-like systems, the 16-bit library is called \fBlibpcre16\fP, and can
normally be accesss by adding \fB-lpcre16\fP to the command for linking an
application that uses PCRE.
.
.
.SH "STRING TYPES"
.rs
.sp
In the 8-bit library, strings are passed to PCRE library functions as vectors
of bytes with the C type "char *". In the 16-bit library, strings are passed as
vectors of unsigned 16-bit quantities. The macro PCRE_UCHAR16 specifies an
appropriate data type, and PCRE_SPTR16 is defined as "const PCRE_UCHAR16 *". In
very many environments, "short int" is a 16-bit data type. When PCRE is built,
it defines PCRE_UCHAR16 as "unsigned short int", but checks that it really is a
16-bit data type. If it is not, the build fails with an error message telling
the maintainer to modify the definition appropriately.
.
.
.SH "STRUCTURE TYPES"
.rs
.sp
The types of the opaque structures that are used for compiled 16-bit patterns
and JIT stacks are \fBpcre16\fP and \fBpcre16_jit_stack\fP respectively. The
type of the user-accessible structure that is returned by \fBpcre16_study()\fP
is \fBpcre16_extra\fP, and the type of the structure that is used for passing
data to a callout function is \fBpcre16_callout_block\fP. These structures
contain the same fields, with the same names, as their 8-bit counterparts. The
only difference is that pointers to character strings are 16-bit instead of
8-bit types.
.
.
.SH "16-BIT FUNCTIONS"
.rs
.sp
For every function in the 8-bit library there is a corresponding function in
the 16-bit library with a name that starts with \fBpcre16_\fP instead of
\fBpcre_\fP. The prototypes are listed above. In addition, there is one extra
function, \fBpcre16_utf16_to_host_byte_order()\fP. This is a utility function
that converts a UTF-16 character string to host byte order if necessary. The
other 16-bit functions expect the strings they are passed to be in host byte
order.
.P
The \fIinput\fP and \fIoutput\fP arguments of
\fBpcre16_utf16_to_host_byte_order()\fP may point to the same address, that is,
conversion in place is supported. The output buffer must be at least as long as
the input.
.P
The \fIlength\fP argument specifies the number of 16-bit data units in the
input string; a negative value specifies a zero-terminated string.
.P
If \fIbyte_order\fP is NULL, it is assumed that the string starts off in host
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
string (commonly as the first character).
.P
If \fIbyte_order\fP is not NULL, a non-zero value of the integer to which it
points means that the input starts off in host byte order, otherwise the
opposite order is assumed. Again, BOMs in the string can change this. The final
byte order is passed back at the end of processing.
.P
If \fIkeep_boms\fP is not zero, byte-order mark characters (0xfeff) are copied
into the output string. Otherwise they are discarded.
.P
The result of the function is the number of 16-bit units placed into the output
buffer, including the zero terminator if the string was zero-terminated.
.
.
.SH "SUBJECT STRING OFFSETS"
.rs
.sp
The offsets within subject strings that are returned by the matching functions
are in 16-bit units rather than bytes.
.
.
.SH "NAMED SUBPATTERNS"
.rs
.sp
The name-to-number translation table that is maintained for named subpatterns
uses 16-bit characters. The \fBpcre16_get_stringtable_entries()\fP function
returns the length of each entry in the table as the number of 16-bit data
units.
.
.
.SH "OPTION NAMES"
.rs
.sp
There are two new general option names, PCRE_UTF16 and PCRE_NO_UTF16_CHECK,
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
fact, these new options define the same bits in the options word. There is a
discussion about the
.\" HTML <a href="pcreunicode.html#utf16strings">
.\" </a>
validity of UTF-16 strings
.\"
in the
.\" HREF
\fBpcreunicode\fP
.\"
page.
.P
For the \fBpcre16_config()\fP function there is an option PCRE_CONFIG_UTF16
that returns 1 if UTF-16 support is configured, otherwise 0. If this option is
given to \fBpcre_config()\fP or \fBpcre32_config()\fP, or if the
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 option is given to \fBpcre16_config()\fP,
the result is the PCRE_ERROR_BADOPTION error.
.
.
.SH "CHARACTER CODES"
.rs
.sp
In 16-bit mode, when PCRE_UTF16 is not set, character values are treated in the
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
from 0 to 0xffff instead of 0 to 0xff. Character types for characters less than
0xff can therefore be influenced by the locale in the same way as before.
Characters greater than 0xff have only one case, and no "type" (such as letter
or digit).
.P
In UTF-16 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
the exception of values in the range 0xd800 to 0xdfff because those are
"surrogate" values that are used in pairs to encode values greater than 0xffff.
.P
A UTF-16 string can indicate its endianness by special code knows as a
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
to be in host byte order. A utility function called
\fBpcre16_utf16_to_host_byte_order()\fP is provided to help with this (see
above).
.
.
.SH "ERROR NAMES"
.rs
.sp
The errors PCRE_ERROR_BADUTF16_OFFSET and PCRE_ERROR_SHORTUTF16 correspond to
their 8-bit counterparts. The error PCRE_ERROR_BADMODE is given when a compiled
pattern is passed to a function that processes patterns in the other
mode, for example, if a pattern compiled with \fBpcre_compile()\fP is passed to
\fBpcre16_exec()\fP.
.P
There are new error codes whose names begin with PCRE_UTF16_ERR for invalid
UTF-16 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
are described in the section entitled
.\" HTML <a href="pcreapi.html#badutf8reasons">
.\" </a>
"Reason codes for invalid UTF-8 strings"
.\"
in the main
.\" HREF
\fBpcreapi\fP
.\"
page. The UTF-16 errors are:
.sp
PCRE_UTF16_ERR1 Missing low surrogate at end of string
PCRE_UTF16_ERR2 Invalid low surrogate follows high surrogate
PCRE_UTF16_ERR3 Isolated low surrogate
PCRE_UTF16_ERR4 Non-character
.
.
.SH "ERROR TEXTS"
.rs
.sp
If there is an error while compiling a pattern, the error text that is passed
back by \fBpcre16_compile()\fP or \fBpcre16_compile2()\fP is still an 8-bit
character string, zero-terminated.
.
.
.SH "CALLOUTS"
.rs
.sp
The \fIsubject\fP and \fImark\fP fields in the callout block that is passed to
a callout function point to 16-bit vectors.
.
.
.SH "TESTING"
.rs
.sp
The \fBpcretest\fP program continues to operate with 8-bit input and output
files, but it can be used for testing the 16-bit library. If it is run with the
command line option \fB-16\fP, patterns and subject strings are converted from
8-bit to 16-bit before being passed to PCRE, and the 16-bit library functions
are used instead of the 8-bit ones. Returned 16-bit strings are converted to
8-bit for output. If both the 8-bit and the 32-bit libraries were not compiled,
\fBpcretest\fP defaults to 16-bit and the \fB-16\fP option is ignored.
.P
When PCRE is being built, the \fBRunTest\fP script that is called by "make
check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit,
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
.
.
.SH "NOT SUPPORTED IN 16-BIT MODE"
.rs
.sp
Not all the features of the 8-bit library are available with the 16-bit
library. The C++ and POSIX wrapper functions support only the 8-bit library,
and the \fBpcregrep\fP program is at present 8-bit only.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 08 November 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi

389
tools/pcre/doc/pcre32.3 Normal file
View File

@ -0,0 +1,389 @@
.TH PCRE 3 "08 November 2012" "PCRE 8.32"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
.B #include <pcre.h>
.
.
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
.rs
.sp
.SM
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.PP
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
.PP
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.
.
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
.rs
.sp
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR32 *\fIstringptr\fP);
.PP
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIname\fP);
.PP
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
.PP
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B PCRE_SPTR32 *\fIstringptr\fP);
.PP
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
.PP
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
.PP
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
.
.
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
.rs
.sp
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
.PP
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
.PP
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
.ti +5n
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B const unsigned char *pcre32_maketables(void);
.PP
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
.PP
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B const char *pcre32_version(void);
.PP
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
.ti +5n
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
.
.
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
.rs
.sp
.B void *(*pcre32_malloc)(size_t);
.PP
.B void (*pcre32_free)(void *);
.PP
.B void *(*pcre32_stack_malloc)(size_t);
.PP
.B void (*pcre32_stack_free)(void *);
.PP
.B int (*pcre32_callout)(pcre32_callout_block *);
.
.
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
.rs
.sp
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
.ti +5n
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
.ti +5n
.B int \fIkeep_boms\fP);
.
.
.SH "THE PCRE 32-BIT LIBRARY"
.rs
.sp
Starting with release 8.32, it is possible to compile a PCRE library that
supports 32-bit character strings, including UTF-32 strings, as well as or
instead of the original 8-bit library. This work was done by Christian Persch,
based on the work done by Zoltan Herczeg for the 16-bit library. All three
libraries contain identical sets of functions, used in exactly the same way.
Only the names of the functions and the data types of their arguments and
results are different. To avoid over-complication and reduce the documentation
maintenance load, most of the PCRE documentation describes the 8-bit library,
with only occasional references to the 16-bit and 32-bit libraries. This page
describes what is different when you use the 32-bit library.
.P
WARNING: A single application can be linked with all or any of the three
libraries, but you must take care when processing any particular pattern
to use functions from just one library. For example, if you want to study
a pattern that was compiled with \fBpcre32_compile()\fP, you must do so
with \fBpcre32_study()\fP, not \fBpcre_study()\fP, and you must free the
study data with \fBpcre32_free_study()\fP.
.
.
.SH "THE HEADER FILE"
.rs
.sp
There is only one header file, \fBpcre.h\fP. It contains prototypes for all the
functions in all libraries, as well as definitions of flags, structures, error
codes, etc.
.
.
.SH "THE LIBRARY NAME"
.rs
.sp
In Unix-like systems, the 32-bit library is called \fBlibpcre32\fP, and can
normally be accesss by adding \fB-lpcre32\fP to the command for linking an
application that uses PCRE.
.
.
.SH "STRING TYPES"
.rs
.sp
In the 8-bit library, strings are passed to PCRE library functions as vectors
of bytes with the C type "char *". In the 32-bit library, strings are passed as
vectors of unsigned 32-bit quantities. The macro PCRE_UCHAR32 specifies an
appropriate data type, and PCRE_SPTR32 is defined as "const PCRE_UCHAR32 *". In
very many environments, "unsigned int" is a 32-bit data type. When PCRE is
built, it defines PCRE_UCHAR32 as "unsigned int", but checks that it really is
a 32-bit data type. If it is not, the build fails with an error message telling
the maintainer to modify the definition appropriately.
.
.
.SH "STRUCTURE TYPES"
.rs
.sp
The types of the opaque structures that are used for compiled 32-bit patterns
and JIT stacks are \fBpcre32\fP and \fBpcre32_jit_stack\fP respectively. The
type of the user-accessible structure that is returned by \fBpcre32_study()\fP
is \fBpcre32_extra\fP, and the type of the structure that is used for passing
data to a callout function is \fBpcre32_callout_block\fP. These structures
contain the same fields, with the same names, as their 8-bit counterparts. The
only difference is that pointers to character strings are 32-bit instead of
8-bit types.
.
.
.SH "32-BIT FUNCTIONS"
.rs
.sp
For every function in the 8-bit library there is a corresponding function in
the 32-bit library with a name that starts with \fBpcre32_\fP instead of
\fBpcre_\fP. The prototypes are listed above. In addition, there is one extra
function, \fBpcre32_utf32_to_host_byte_order()\fP. This is a utility function
that converts a UTF-32 character string to host byte order if necessary. The
other 32-bit functions expect the strings they are passed to be in host byte
order.
.P
The \fIinput\fP and \fIoutput\fP arguments of
\fBpcre32_utf32_to_host_byte_order()\fP may point to the same address, that is,
conversion in place is supported. The output buffer must be at least as long as
the input.
.P
The \fIlength\fP argument specifies the number of 32-bit data units in the
input string; a negative value specifies a zero-terminated string.
.P
If \fIbyte_order\fP is NULL, it is assumed that the string starts off in host
byte order. This may be changed by byte-order marks (BOMs) anywhere in the
string (commonly as the first character).
.P
If \fIbyte_order\fP is not NULL, a non-zero value of the integer to which it
points means that the input starts off in host byte order, otherwise the
opposite order is assumed. Again, BOMs in the string can change this. The final
byte order is passed back at the end of processing.
.P
If \fIkeep_boms\fP is not zero, byte-order mark characters (0xfeff) are copied
into the output string. Otherwise they are discarded.
.P
The result of the function is the number of 32-bit units placed into the output
buffer, including the zero terminator if the string was zero-terminated.
.
.
.SH "SUBJECT STRING OFFSETS"
.rs
.sp
The offsets within subject strings that are returned by the matching functions
are in 32-bit units rather than bytes.
.
.
.SH "NAMED SUBPATTERNS"
.rs
.sp
The name-to-number translation table that is maintained for named subpatterns
uses 32-bit characters. The \fBpcre32_get_stringtable_entries()\fP function
returns the length of each entry in the table as the number of 32-bit data
units.
.
.
.SH "OPTION NAMES"
.rs
.sp
There are two new general option names, PCRE_UTF32 and PCRE_NO_UTF32_CHECK,
which correspond to PCRE_UTF8 and PCRE_NO_UTF8_CHECK in the 8-bit library. In
fact, these new options define the same bits in the options word. There is a
discussion about the
.\" HTML <a href="pcreunicode.html#utf32strings">
.\" </a>
validity of UTF-32 strings
.\"
in the
.\" HREF
\fBpcreunicode\fP
.\"
page.
.P
For the \fBpcre32_config()\fP function there is an option PCRE_CONFIG_UTF32
that returns 1 if UTF-32 support is configured, otherwise 0. If this option is
given to \fBpcre_config()\fP or \fBpcre16_config()\fP, or if the
PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 option is given to \fBpcre32_config()\fP,
the result is the PCRE_ERROR_BADOPTION error.
.
.
.SH "CHARACTER CODES"
.rs
.sp
In 32-bit mode, when PCRE_UTF32 is not set, character values are treated in the
same way as in 8-bit, non UTF-8 mode, except, of course, that they can range
from 0 to 0x7fffffff instead of 0 to 0xff. Character types for characters less
than 0xff can therefore be influenced by the locale in the same way as before.
Characters greater than 0xff have only one case, and no "type" (such as letter
or digit).
.P
In UTF-32 mode, the character code is Unicode, in the range 0 to 0x10ffff, with
the exception of values in the range 0xd800 to 0xdfff because those are
"surrogate" values that are ill-formed in UTF-32.
.P
A UTF-32 string can indicate its endianness by special code knows as a
byte-order mark (BOM). The PCRE functions do not handle this, expecting strings
to be in host byte order. A utility function called
\fBpcre32_utf32_to_host_byte_order()\fP is provided to help with this (see
above).
.
.
.SH "ERROR NAMES"
.rs
.sp
The error PCRE_ERROR_BADUTF32 corresponds to its 8-bit counterpart.
The error PCRE_ERROR_BADMODE is given when a compiled
pattern is passed to a function that processes patterns in the other
mode, for example, if a pattern compiled with \fBpcre_compile()\fP is passed to
\fBpcre32_exec()\fP.
.P
There are new error codes whose names begin with PCRE_UTF32_ERR for invalid
UTF-32 strings, corresponding to the PCRE_UTF8_ERR codes for UTF-8 strings that
are described in the section entitled
.\" HTML <a href="pcreapi.html#badutf8reasons">
.\" </a>
"Reason codes for invalid UTF-8 strings"
.\"
in the main
.\" HREF
\fBpcreapi\fP
.\"
page. The UTF-32 errors are:
.sp
PCRE_UTF32_ERR1 Surrogate character (range from 0xd800 to 0xdfff)
PCRE_UTF32_ERR2 Non-character
PCRE_UTF32_ERR3 Character > 0x10ffff
.
.
.SH "ERROR TEXTS"
.rs
.sp
If there is an error while compiling a pattern, the error text that is passed
back by \fBpcre32_compile()\fP or \fBpcre32_compile2()\fP is still an 8-bit
character string, zero-terminated.
.
.
.SH "CALLOUTS"
.rs
.sp
The \fIsubject\fP and \fImark\fP fields in the callout block that is passed to
a callout function point to 32-bit vectors.
.
.
.SH "TESTING"
.rs
.sp
The \fBpcretest\fP program continues to operate with 8-bit input and output
files, but it can be used for testing the 32-bit library. If it is run with the
command line option \fB-32\fP, patterns and subject strings are converted from
8-bit to 32-bit before being passed to PCRE, and the 32-bit library functions
are used instead of the 8-bit ones. Returned 32-bit strings are converted to
8-bit for output. If both the 8-bit and the 16-bit libraries were not compiled,
\fBpcretest\fP defaults to 32-bit and the \fB-32\fP option is ignored.
.P
When PCRE is being built, the \fBRunTest\fP script that is called by "make
check" uses the \fBpcretest\fP \fB-C\fP option to discover which of the 8-bit,
16-bit and 32-bit libraries has been built, and runs the tests appropriately.
.
.
.SH "NOT SUPPORTED IN 32-BIT MODE"
.rs
.sp
Not all the features of the 8-bit library are available with the 32-bit
library. The C++ and POSIX wrapper functions support only the 8-bit library,
and the \fBpcregrep\fP program is at present 8-bit only.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 08 November 2012
Copyright (c) 1997-2012 University of Cambridge.
.fi

View File

@ -0,0 +1,61 @@
.TH PCRE_ASSIGN_JIT_STACK 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
.ti +5n
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
.ti +5n
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
.PP
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
.ti +5n
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
.
.SH DESCRIPTION
.rs
.sp
This function provides control over the memory used as a stack at run-time by a
call to \fBpcre[16|32]_exec()\fP with a pattern that has been successfully
compiled with JIT optimization. The arguments are:
.sp
extra the data pointer returned by \fBpcre[16|32]_study()\fP
callback a callback function
data a JIT stack or a value to be passed to the callback
function
.P
If \fIcallback\fP is NULL and \fIdata\fP is NULL, an internal 32K block on
the machine stack is used.
.P
If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must
be a valid JIT stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP.
.P
If \fIcallback\fP not NULL, it is called with \fIdata\fP as an argument at
the start of matching, in order to set up a JIT stack. If the result is NULL,
the internal 32K stack is used; otherwise the return value must be a valid JIT
stack, the result of calling \fBpcre[16|32]_jit_stack_alloc()\fP.
.P
You may safely assign the same JIT stack to multiple patterns, as long as they
are all matched in the same thread. In a multithread application, each thread
must use its own JIT stack. For more details, see the
.\" HREF
\fBpcrejit\fP
.\"
page.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.

View File

@ -1,4 +1,4 @@
.TH PCRE_COMPILE 3 .TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -12,20 +12,32 @@ PCRE - Perl-compatible regular expressions
.B const char **\fIerrptr\fP, int *\fIerroffset\fP, .B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n .ti +5n
.B const unsigned char *\fItableptr\fP); .B const unsigned char *\fItableptr\fP);
.PP
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
.sp .sp
This function compiles a regular expression into an internal form. It is the This function compiles a regular expression into an internal form. It is the
same as \fBpcre_compile2()\fP, except for the absence of the \fIerrorcodeptr\fP same as \fBpcre[16|32]_compile2()\fP, except for the absence of the
argument. Its arguments are: \fIerrorcodeptr\fP argument. Its arguments are:
.sp .sp
\fIpattern\fR A zero-terminated string containing the \fIpattern\fP A zero-terminated string containing the
regular expression to be compiled regular expression to be compiled
\fIoptions\fR Zero or more option bits \fIoptions\fP Zero or more option bits
\fIerrptr\fR Where to put an error message \fIerrptr\fP Where to put an error message
\fIerroffset\fR Offset in pattern where error was found \fIerroffset\fP Offset in pattern where error was found
\fItableptr\fR Pointer to character tables, or NULL to \fItableptr\fP Pointer to character tables, or NULL to
use the built-in default use the built-in default
.sp .sp
The option bits are: The option bits are:
@ -38,7 +50,7 @@ The option bits are:
PCRE_DOLLAR_ENDONLY $ not to match newline at end PCRE_DOLLAR_ENDONLY $ not to match newline at end
PCRE_DOTALL . matches anything including NL PCRE_DOTALL . matches anything including NL
PCRE_DUPNAMES Allow duplicate names for subpatterns PCRE_DUPNAMES Allow duplicate names for subpatterns
PCRE_EXTENDED Ignore whitespace and # comments PCRE_EXTENDED Ignore white space and # comments
PCRE_EXTRA PCRE extra features PCRE_EXTRA PCRE extra features
(not much use currently) (not much use currently)
PCRE_FIRSTLINE Force matching to be before newline PCRE_FIRSTLINE Force matching to be before newline
@ -52,14 +64,23 @@ The option bits are:
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren- PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
theses (named ones available) theses (named ones available)
PCRE_UNGREEDY Invert greediness of quantifiers PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
PCRE_UTF8 Run in UTF-8 mode validity (only relevant if
PCRE_UTF16 is set)
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
validity (only relevant if
PCRE_UTF32 is set)
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8 PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
validity (only relevant if validity (only relevant if
PCRE_UTF8 is set) PCRE_UTF8 is set)
PCRE_UCP Use Unicode properties for \ed, \ew, etc.
PCRE_UNGREEDY Invert greediness of quantifiers
PCRE_UTF16 Run in \fBpcre16_compile()\fP UTF-16 mode
PCRE_UTF32 Run in \fBpcre32_compile()\fP UTF-32 mode
PCRE_UTF8 Run in \fBpcre_compile()\fP UTF-8 mode
.sp .sp
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
PCRE_NO_UTF8_CHECK. PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
.P .P
The yield of the function is a pointer to a private data structure that The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that contains the compiled pattern, or NULL if an error was detected. Note that
@ -68,10 +89,10 @@ version is not guaranteed to work and may cause crashes.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF
\fBpcreapi\fR \fBpcreapi\fP
.\" .\"
page and a description of the POSIX API in the page and a description of the POSIX API in the
.\" HREF .\" HREF
\fBpcreposix\fR \fBpcreposix\fP
.\" .\"
page. page.

View File

@ -1,4 +1,4 @@
.TH PCRE_COMPILE2 3 .TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -14,52 +14,81 @@ PCRE - Perl-compatible regular expressions
.B const char **\fIerrptr\fP, int *\fIerroffset\fP, .B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n .ti +5n
.B const unsigned char *\fItableptr\fP); .B const unsigned char *\fItableptr\fP);
.PP
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
.sp .sp
This function compiles a regular expression into an internal form. It is the This function compiles a regular expression into an internal form. It is the
same as \fBpcre_compile()\fP, except for the addition of the \fIerrorcodeptr\fP same as \fBpcre[16|32]_compile()\fP, except for the addition of the
argument. The arguments are: \fIerrorcodeptr\fP argument. The arguments are:
.
.sp .sp
\fIpattern\fR A zero-terminated string containing the \fIpattern\fP A zero-terminated string containing the
regular expression to be compiled regular expression to be compiled
\fIoptions\fR Zero or more option bits \fIoptions\fP Zero or more option bits
\fIerrorcodeptr\fP Where to put an error code \fIerrorcodeptr\fP Where to put an error code
\fIerrptr\fR Where to put an error message \fIerrptr\fP Where to put an error message
\fIerroffset\fR Offset in pattern where error was found \fIerroffset\fP Offset in pattern where error was found
\fItableptr\fR Pointer to character tables, or NULL to \fItableptr\fP Pointer to character tables, or NULL to
use the built-in default use the built-in default
.sp .sp
The option bits are: The option bits are:
.sp .sp
PCRE_ANCHORED Force pattern anchoring PCRE_ANCHORED Force pattern anchoring
PCRE_AUTO_CALLOUT Compile automatic callouts PCRE_AUTO_CALLOUT Compile automatic callouts
PCRE_CASELESS Do caseless matching PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
PCRE_DOLLAR_ENDONLY $ not to match newline at end PCRE_BSR_UNICODE \eR matches all Unicode line endings
PCRE_DOTALL . matches anything including NL PCRE_CASELESS Do caseless matching
PCRE_DUPNAMES Allow duplicate names for subpatterns PCRE_DOLLAR_ENDONLY $ not to match newline at end
PCRE_EXTENDED Ignore whitespace and # comments PCRE_DOTALL . matches anything including NL
PCRE_EXTRA PCRE extra features PCRE_DUPNAMES Allow duplicate names for subpatterns
(not much use currently) PCRE_EXTENDED Ignore white space and # comments
PCRE_FIRSTLINE Force matching to be before newline PCRE_EXTRA PCRE extra features
PCRE_MULTILINE ^ and $ match newlines within data (not much use currently)
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence PCRE_FIRSTLINE Force matching to be before newline
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
PCRE_NEWLINE_CR Set CR as the newline sequence PCRE_MULTILINE ^ and $ match newlines within data
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren- sequences
theses (named ones available) PCRE_NEWLINE_CR Set CR as the newline sequence
PCRE_UNGREEDY Invert greediness of quantifiers PCRE_NEWLINE_CRLF Set CRLF as the newline sequence
PCRE_UTF8 Run in UTF-8 mode PCRE_NEWLINE_LF Set LF as the newline sequence
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8 PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
validity (only relevant if theses (named ones available)
PCRE_UTF8 is set) PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
validity (only relevant if
PCRE_UTF16 is set)
PCRE_NO_UTF32_CHECK Do not check the pattern for UTF-32
validity (only relevant if
PCRE_UTF32 is set)
PCRE_NO_UTF8_CHECK Do not check the pattern for UTF-8
validity (only relevant if
PCRE_UTF8 is set)
PCRE_UCP Use Unicode properties for \ed, \ew, etc.
PCRE_UNGREEDY Invert greediness of quantifiers
PCRE_UTF16 Run \fBpcre16_compile()\fP in UTF-16 mode
PCRE_UTF32 Run \fBpcre32_compile()\fP in UTF-32 mode
PCRE_UTF8 Run \fBpcre_compile()\fP in UTF-8 mode
.sp .sp
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and PCRE must be built with UTF support in order to use PCRE_UTF8/16/32 and
PCRE_NO_UTF8_CHECK. PCRE_NO_UTF8/16/32_CHECK, and with UCP support if PCRE_UCP is used.
.P .P
The yield of the function is a pointer to a private data structure that The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that contains the compiled pattern, or NULL if an error was detected. Note that
@ -68,10 +97,10 @@ version is not guaranteed to work and may cause crashes.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF
\fBpcreapi\fR \fBpcreapi\fP
.\" .\"
page and a description of the POSIX API in the page and a description of the POSIX API in the
.\" HREF .\" HREF
\fBpcreposix\fR \fBpcreposix\fP
.\" .\"
page. page.

View File

@ -1,4 +1,4 @@
.TH PCRE_CONFIG 3 .TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -8,19 +8,30 @@ PCRE - Perl-compatible regular expressions
.PP .PP
.SM .SM
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP); .B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
.sp .sp
This function makes it possible for a client program to find out which optional This function makes it possible for a client program to find out which optional
features are available in the version of the PCRE library it is using. Its features are available in the version of the PCRE library it is using. The
arguments are as follows: arguments are as follows:
.sp .sp
\fIwhat\fR A code specifying what information is required \fIwhat\fP A code specifying what information is required
\fIwhere\fR Points to where to put the data \fIwhere\fP Points to where to put the data
.sp .sp
The available codes are: The \fIwhere\fP argument must point to an integer variable, except for
PCRE_CONFIG_MATCH_LIMIT and PCRE_CONFIG_MATCH_LIMIT_RECURSION, when it must
point to an unsigned long integer. The available codes are:
.sp .sp
PCRE_CONFIG_JIT Availability of just-in-time compiler
support (1=yes 0=no)
PCRE_CONFIG_JITTARGET String containing information about the
target architecture for the JIT compiler,
or NULL if there is no JIT support
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4 PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
PCRE_CONFIG_MATCH_LIMIT Internal resource limit PCRE_CONFIG_MATCH_LIMIT Internal resource limit
PCRE_CONFIG_MATCH_LIMIT_RECURSION PCRE_CONFIG_MATCH_LIMIT_RECURSION
@ -35,23 +46,31 @@ The available codes are:
0 all Unicode line endings 0 all Unicode line endings
1 CR, LF, or CRLF only 1 CR, LF, or CRLF only
PCRE_CONFIG_POSIX_MALLOC_THRESHOLD PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
Threshold of return slots, above Threshold of return slots, above which
which \fBmalloc()\fR is used by \fBmalloc()\fP is used by the POSIX API
the POSIX API
PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap) PCRE_CONFIG_STACKRECURSE Recursion implementation (1=stack 0=heap)
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no) PCRE_CONFIG_UTF16 Availability of UTF-16 support (1=yes
0=no); option for \fBpcre16_config()\fP
PCRE_CONFIG_UTF32 Availability of UTF-32 support (1=yes
0=no); option for \fBpcre32_config()\fP
PCRE_CONFIG_UTF8 Availability of UTF-8 support (1=yes 0=no);
option for \fBpcre_config()\fP
PCRE_CONFIG_UNICODE_PROPERTIES PCRE_CONFIG_UNICODE_PROPERTIES
Availability of Unicode property support Availability of Unicode property support
(1=yes 0=no) (1=yes 0=no)
.sp .sp
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise. That error
is also given if PCRE_CONFIG_UTF16 or PCRE_CONFIG_UTF32 is passed to
\fBpcre_config()\fP, if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF32 is passed to
\fBpcre16_config()\fP, or if PCRE_CONFIG_UTF8 or PCRE_CONFIG_UTF16 is passed to
\fBpcre32_config()\fP.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF
\fBpcreapi\fR \fBpcreapi\fP
.\" .\"
page and a description of the POSIX API in the page and a description of the POSIX API in the
.\" HREF .\" HREF
\fBpcreposix\fR \fBpcreposix\fP
.\" .\"
page. page.

View File

@ -1,4 +1,4 @@
.TH PCRE_COPY_NAMED_SUBSTRING 3 .TH PCRE_COPY_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -14,6 +14,22 @@ PCRE - Perl-compatible regular expressions
.B int \fIstringcount\fP, const char *\fIstringname\fP, .B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n .ti +5n
.B char *\fIbuffer\fP, int \fIbuffersize\fP); .B char *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
@ -23,8 +39,8 @@ by name, into a given buffer. The arguments are:
.sp .sp
\fIcode\fP Pattern that was successfully matched \fIcode\fP Pattern that was successfully matched
\fIsubject\fP Subject that has been successfully matched \fIsubject\fP Subject that has been successfully matched
\fIovector\fP Offset vector that \fBpcre_exec()\fP used \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
\fIstringcount\fP Value returned by \fBpcre_exec()\fP \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
\fIstringname\fP Name of the required substring \fIstringname\fP Name of the required substring
\fIbuffer\fP Buffer to receive the string \fIbuffer\fP Buffer to receive the string
\fIbuffersize\fP Size of buffer \fIbuffersize\fP Size of buffer

View File

@ -1,4 +1,4 @@
.TH PCRE_COPY_SUBSTRING 3 .TH PCRE_COPY_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -12,6 +12,18 @@ PCRE - Perl-compatible regular expressions
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP, .B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
.ti +5n .ti +5n
.B int \fIbuffersize\fP); .B int \fIbuffersize\fP);
.PP
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
@ -20,8 +32,8 @@ This is a convenience function for extracting a captured substring into a given
buffer. The arguments are: buffer. The arguments are:
.sp .sp
\fIsubject\fP Subject that has been successfully matched \fIsubject\fP Subject that has been successfully matched
\fIovector\fP Offset vector that \fBpcre_exec()\fP used \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
\fIstringcount\fP Value returned by \fBpcre_exec()\fP \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
\fIstringnumber\fP Number of the required substring \fIstringnumber\fP Number of the required substring
\fIbuffer\fP Buffer to receive the string \fIbuffer\fP Buffer to receive the string
\fIbuffersize\fP Size of buffer \fIbuffersize\fP Size of buffer

View File

@ -1,4 +1,4 @@
.TH PCRE_DFA_EXEC 3 .TH PCRE_DFA_EXEC 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -14,6 +14,22 @@ PCRE - Perl-compatible regular expressions
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP, .B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n .ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP); .B int *\fIworkspace\fP, int \fIwscount\fP);
.PP
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.PP
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
@ -21,10 +37,11 @@ PCRE - Perl-compatible regular expressions
This function matches a compiled regular expression against a given subject This function matches a compiled regular expression against a given subject
string, using an alternative matching algorithm that scans the subject string string, using an alternative matching algorithm that scans the subject string
just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible, just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible,
matching function is \fBpcre_exec()\fP. The arguments for this function are: matching function is \fBpcre[16|32]_exec()\fP. The arguments for this function
are:
.sp .sp
\fIcode\fP Points to the compiled pattern \fIcode\fP Points to the compiled pattern
\fIextra\fP Points to an associated \fBpcre_extra\fP structure, \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
or is NULL or is NULL
\fIsubject\fP Points to the subject string \fIsubject\fP Points to the subject string
\fIlength\fP Length of the subject string, in bytes \fIlength\fP Length of the subject string, in bytes
@ -38,45 +55,64 @@ matching function is \fBpcre_exec()\fP. The arguments for this function are:
.sp .sp
The options are: The options are:
.sp .sp
PCRE_ANCHORED Match only at the first position PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
PCRE_BSR_UNICODE \eR matches all Unicode line endings PCRE_BSR_UNICODE \eR matches all Unicode line endings
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
PCRE_NEWLINE_CR Set CR as the newline sequence PCRE_NEWLINE_CR Recognize CR as the only newline sequence
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_LF Recognize LF as the only newline sequence
PCRE_NOTBOL Subject is not the beginning of a line PCRE_NOTBOL Subject is not the beginning of a line
PCRE_NOTEOL Subject is not the end of a line PCRE_NOTEOL Subject is not the end of a line
PCRE_NOTEMPTY An empty string is not a valid match PCRE_NOTEMPTY An empty string is not a valid match
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8 is not a valid match
validity (only relevant if PCRE_UTF8 PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
was set at compile time) PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match validity (only relevant if PCRE_UTF16
PCRE_DFA_SHORTEST Return only the shortest match was set at compile time)
PCRE_DFA_RESTART This is a restart after a partial match PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
validity (only relevant if PCRE_UTF32
was set at compile time)
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
PCRE_PARTIAL_SOFT ) match if no full matches are found
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
even if there is a full match as well
PCRE_DFA_SHORTEST Return only the shortest match
PCRE_DFA_RESTART Restart after a partial match
.sp .sp
There are restrictions on what may appear in a pattern when using this matching There are restrictions on what may appear in a pattern when using this matching
function. Details are given in the function. Details are given in the
.\" HREF .\" HREF
\fBpcrematching\fP \fBpcrematching\fP
.\" .\"
documentation. documentation. For details of partial matching, see the
.\" HREF
\fBpcrepartial\fP
.\"
page.
.P .P
A \fBpcre_extra\fP structure contains the following fields: A \fBpcre[16|32]_extra\fP structure contains the following fields:
.sp .sp
\fIflags\fP Bits indicating which fields are set \fIflags\fP Bits indicating which fields are set
\fIstudy_data\fP Opaque data from \fBpcre_study()\fP \fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP
\fImatch_limit\fP Limit on internal resource use \fImatch_limit\fP Limit on internal resource use
\fImatch_limit_recursion\fP Limit on internal recursion depth \fImatch_limit_recursion\fP Limit on internal recursion depth
\fIcallout_data\fP Opaque data passed back to callouts \fIcallout_data\fP Opaque data passed back to callouts
\fItables\fP Points to character tables or is NULL \fItables\fP Points to character tables or is NULL
\fImark\fP For passing back a *MARK pointer
\fIexecutable_jit\fP Opaque data from JIT compilation
.sp .sp
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
PCRE_EXTRA_TABLES. For this matching function, the \fImatch_limit\fP and PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT. For this
\fImatch_limit_recursion\fP fields are not used, and must not be set. matching function, the \fImatch_limit\fP and \fImatch_limit_recursion\fP fields
are not used, and must not be set. The PCRE_EXTRA_EXECUTABLE_JIT flag and
the corresponding variable are ignored.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF

View File

@ -1,4 +1,4 @@
.TH PCRE_EXEC 3 .TH PCRE_EXEC 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -12,6 +12,18 @@ PCRE - Perl-compatible regular expressions
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP, .B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n .ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP); .B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
@ -21,7 +33,7 @@ string, using a matching algorithm that is similar to Perl's. It returns
offsets to captured substrings. Its arguments are: offsets to captured substrings. Its arguments are:
.sp .sp
\fIcode\fP Points to the compiled pattern \fIcode\fP Points to the compiled pattern
\fIextra\fP Points to an associated \fBpcre_extra\fP structure, \fIextra\fP Points to an associated \fBpcre[16|32]_extra\fP structure,
or is NULL or is NULL
\fIsubject\fP Points to the subject string \fIsubject\fP Points to the subject string
\fIlength\fP Length of the subject string, in bytes \fIlength\fP Length of the subject string, in bytes
@ -33,42 +45,52 @@ offsets to captured substrings. Its arguments are:
.sp .sp
The options are: The options are:
.sp .sp
PCRE_ANCHORED Match only at the first position PCRE_ANCHORED Match only at the first position
PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF PCRE_BSR_ANYCRLF \eR matches only CR, LF, or CRLF
PCRE_BSR_UNICODE \eR matches all Unicode line endings PCRE_BSR_UNICODE \eR matches all Unicode line endings
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline sequences PCRE_NEWLINE_ANYCRLF Recognize CR, LF, & CRLF as newline sequences
PCRE_NEWLINE_CR Set CR as the newline sequence PCRE_NEWLINE_CR Recognize CR as the only newline sequence
PCRE_NEWLINE_CRLF Set CRLF as the newline sequence PCRE_NEWLINE_CRLF Recognize CRLF as the only newline sequence
PCRE_NEWLINE_LF Set LF as the newline sequence PCRE_NEWLINE_LF Recognize LF as the only newline sequence
PCRE_NOTBOL Subject is not the beginning of a line PCRE_NOTBOL Subject string is not the beginning of a line
PCRE_NOTEOL Subject is not the end of a line PCRE_NOTEOL Subject string is not the end of a line
PCRE_NOTEMPTY An empty string is not a valid match PCRE_NOTEMPTY An empty string is not a valid match
PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations PCRE_NOTEMPTY_ATSTART An empty string at the start of the subject
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8 is not a valid match
validity (only relevant if PCRE_UTF8 PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
was set at compile time) PCRE_NO_UTF16_CHECK Do not check the subject for UTF-16
PCRE_PARTIAL Return PCRE_ERROR_PARTIAL for a partial match validity (only relevant if PCRE_UTF16
was set at compile time)
PCRE_NO_UTF32_CHECK Do not check the subject for UTF-32
validity (only relevant if PCRE_UTF32
was set at compile time)
PCRE_NO_UTF8_CHECK Do not check the subject for UTF-8
validity (only relevant if PCRE_UTF8
was set at compile time)
PCRE_PARTIAL ) Return PCRE_ERROR_PARTIAL for a partial
PCRE_PARTIAL_SOFT ) match if no full matches are found
PCRE_PARTIAL_HARD Return PCRE_ERROR_PARTIAL for a partial match
if that is found before a full match
.sp .sp
There are restrictions on what may appear in a pattern when partial matching is For details of partial matching, see the
requested. For details, see the
.\" HREF .\" HREF
\fBpcrepartial\fP \fBpcrepartial\fP
.\" .\"
page. page. A \fBpcre_extra\fP structure contains the following fields:
.P
A \fBpcre_extra\fP structure contains the following fields:
.sp .sp
\fIflags\fP Bits indicating which fields are set \fIflags\fP Bits indicating which fields are set
\fIstudy_data\fP Opaque data from \fBpcre_study()\fP \fIstudy_data\fP Opaque data from \fBpcre[16|32]_study()\fP
\fImatch_limit\fP Limit on internal resource use \fImatch_limit\fP Limit on internal resource use
\fImatch_limit_recursion\fP Limit on internal recursion depth \fImatch_limit_recursion\fP Limit on internal recursion depth
\fIcallout_data\fP Opaque data passed back to callouts \fIcallout_data\fP Opaque data passed back to callouts
\fItables\fP Points to character tables or is NULL \fItables\fP Points to character tables or is NULL
\fImark\fP For passing back a *MARK pointer
\fIexecutable_jit\fP Opaque data from JIT compilation
.sp .sp
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT, The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA,
PCRE_EXTRA_TABLES. PCRE_EXTRA_TABLES, PCRE_EXTRA_MARK and PCRE_EXTRA_EXECUTABLE_JIT.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF

View File

@ -0,0 +1,31 @@
.TH PCRE_FREE_STUDY 3 "24 June 2012" "PCRE 8.30"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B void pcre_free_study(pcre_extra *\fIextra\fP);
.PP
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
.PP
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
.
.SH DESCRIPTION
.rs
.sp
This function is used to free the memory used for the data generated by a call
to \fBpcre[16|32]_study()\fP when it is no longer needed. The argument must be the
result of such a call.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.

View File

@ -1,4 +1,4 @@
.TH PCRE_FREE_SUBSTRING 3 .TH PCRE_FREE_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -8,13 +8,17 @@ PCRE - Perl-compatible regular expressions
.PP .PP
.SM .SM
.B void pcre_free_substring(const char *\fIstringptr\fP); .B void pcre_free_substring(const char *\fIstringptr\fP);
.PP
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
.PP
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
.sp .sp
This is a convenience function for freeing the store obtained by a previous This is a convenience function for freeing the store obtained by a previous
call to \fBpcre_get_substring()\fP or \fBpcre_get_named_substring()\fP. Its call to \fBpcre[16|32]_get_substring()\fP or \fBpcre[16|32]_get_named_substring()\fP.
only argument is a pointer to the string. Its only argument is a pointer to the string.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF

View File

@ -1,4 +1,4 @@
.TH PCRE_FREE_SUBSTRING_LIST 3 .TH PCRE_FREE_SUBSTRING_LIST 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -8,13 +8,17 @@ PCRE - Perl-compatible regular expressions
.PP .PP
.SM .SM
.B void pcre_free_substring_list(const char **\fIstringptr\fP); .B void pcre_free_substring_list(const char **\fIstringptr\fP);
.PP
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
.sp .sp
This is a convenience function for freeing the store obtained by a previous This is a convenience function for freeing the store obtained by a previous
call to \fBpcre_get_substring_list()\fP. Its only argument is a pointer to the call to \fBpcre[16|32]_get_substring_list()\fP. Its only argument is a pointer to
list of string pointers. the list of string pointers.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF

View File

@ -1,4 +1,4 @@
.TH PCRE_FULLINFO 3 .TH PCRE_FULLINFO 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -10,6 +10,14 @@ PCRE - Perl-compatible regular expressions
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP," .B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n .ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP); .B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
@ -17,7 +25,7 @@ PCRE - Perl-compatible regular expressions
This function returns information about a compiled pattern. Its arguments are: This function returns information about a compiled pattern. Its arguments are:
.sp .sp
\fIcode\fP Compiled regular expression \fIcode\fP Compiled regular expression
\fIextra\fP Result of \fBpcre_study()\fP or NULL \fIextra\fP Result of \fBpcre[16|32]_study()\fP or NULL
\fIwhat\fP What information is required \fIwhat\fP What information is required
\fIwhere\fP Where to put the information \fIwhere\fP Where to put the information
.sp .sp
@ -26,20 +34,48 @@ The following information is available:
PCRE_INFO_BACKREFMAX Number of highest back reference PCRE_INFO_BACKREFMAX Number of highest back reference
PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns PCRE_INFO_CAPTURECOUNT Number of capturing subpatterns
PCRE_INFO_DEFAULT_TABLES Pointer to default tables PCRE_INFO_DEFAULT_TABLES Pointer to default tables
PCRE_INFO_FIRSTBYTE Fixed first byte for a match, or PCRE_INFO_FIRSTBYTE Fixed first data unit for a match, or
-1 for start of string -1 for start of string
or after newline, or or after newline, or
-2 otherwise -2 otherwise
PCRE_INFO_FIRSTTABLE Table of first bytes (after studying) PCRE_INFO_FIRSTTABLE Table of first data units (after studying)
PCRE_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist
PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used PCRE_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE_INFO_LASTLITERAL Literal last byte required PCRE_INFO_JIT Return 1 after successful JIT compilation
PCRE_INFO_JITSIZE Size of JIT compiled code
PCRE_INFO_LASTLITERAL Literal last data unit required
PCRE_INFO_MINLENGTH Lower bound length of matching strings
PCRE_INFO_NAMECOUNT Number of named subpatterns PCRE_INFO_NAMECOUNT Number of named subpatterns
PCRE_INFO_NAMEENTRYSIZE Size of name table entry PCRE_INFO_NAMEENTRYSIZE Size of name table entry
PCRE_INFO_NAMETABLE Pointer to name table PCRE_INFO_NAMETABLE Pointer to name table
PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried PCRE_INFO_OKPARTIAL Return 1 if partial matching can be tried
(always returns 1 after release 8.00)
PCRE_INFO_OPTIONS Option bits used for compilation PCRE_INFO_OPTIONS Option bits used for compilation
PCRE_INFO_SIZE Size of compiled pattern PCRE_INFO_SIZE Size of compiled pattern
PCRE_INFO_STUDYSIZE Size of study data PCRE_INFO_STUDYSIZE Size of study data
PCRE_INFO_FIRSTCHARACTER Fixed first data unit for a match
PCRE_INFO_FIRSTCHARACTERFLAGS Returns
1 if there is a first data character set, which can
then be retrieved using PCRE_INFO_FIRSTCHARACTER,
2 if the first character is at the start of the data
string or after a newline, and
0 otherwise
PCRE_INFO_REQUIREDCHAR Literal last data unit required
PCRE_INFO_REQUIREDCHARFLAGS Returns 1 if the last data character is set (which can then
be retrieved using PCRE_INFO_REQUIREDCHAR); 0 otherwise
.sp
The \fIwhere\fP argument must point to an integer variable, except for the
following \fIwhat\fP values:
.sp
PCRE_INFO_DEFAULT_TABLES const unsigned char *
PCRE_INFO_FIRSTTABLE const unsigned char *
PCRE_INFO_NAMETABLE PCRE_SPTR16 (16-bit library)
PCRE_INFO_NAMETABLE PCRE_SPTR32 (32-bit library)
PCRE_INFO_NAMETABLE const unsigned char * (8-bit library)
PCRE_INFO_OPTIONS unsigned long int
PCRE_INFO_SIZE size_t
PCRE_INFO_FIRSTCHARACTER uint32_t
PCRE_INFO_REQUIREDCHAR uint32_t
.sp .sp
The yield of the function is zero on success or: The yield of the function is zero on success or:
.sp .sp

View File

@ -1,4 +1,4 @@
.TH PCRE_GET_NAMED_SUBSTRING 3 .TH PCRE_GET_NAMED_SUBSTRING 3 "24 June 2012" "PCRE 8.30"
.SH NAME .SH NAME
PCRE - Perl-compatible regular expressions PCRE - Perl-compatible regular expressions
.SH SYNOPSIS .SH SYNOPSIS
@ -14,6 +14,22 @@ PCRE - Perl-compatible regular expressions
.B int \fIstringcount\fP, const char *\fIstringname\fP, .B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n .ti +5n
.B const char **\fIstringptr\fP); .B const char **\fIstringptr\fP);
.PP
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR16 *\fIstringptr\fP);
.PP
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
.ti +5n
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
.ti +5n
.B PCRE_SPTR32 *\fIstringptr\fP);
. .
.SH DESCRIPTION .SH DESCRIPTION
.rs .rs
@ -23,16 +39,17 @@ arguments are:
.sp .sp
\fIcode\fP Compiled pattern \fIcode\fP Compiled pattern
\fIsubject\fP Subject that has been successfully matched \fIsubject\fP Subject that has been successfully matched
\fIovector\fP Offset vector that \fBpcre_exec()\fP used \fIovector\fP Offset vector that \fBpcre[16|32]_exec()\fP used
\fIstringcount\fP Value returned by \fBpcre_exec()\fP \fIstringcount\fP Value returned by \fBpcre[16|32]_exec()\fP
\fIstringname\fP Name of the required substring \fIstringname\fP Name of the required substring
\fIstringptr\fP Where to put the string pointer \fIstringptr\fP Where to put the string pointer
.sp .sp
The memory in which the substring is placed is obtained by calling The memory in which the substring is placed is obtained by calling
\fBpcre_malloc()\fP. The convenience function \fBpcre_free_substring()\fP can \fBpcre[16|32]_malloc()\fP. The convenience function
be used to free it when it is no longer needed. The yield of the function is \fBpcre[16|32]_free_substring()\fP can be used to free it when it is no longer
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory needed. The yield of the function is the length of the extracted substring,
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid. PCRE_ERROR_NOMEMORY if sufficient memory could not be obtained, or
PCRE_ERROR_NOSUBSTRING if the string name is invalid.
.P .P
There is a complete description of the PCRE native API in the There is a complete description of the PCRE native API in the
.\" HREF .\" HREF

Some files were not shown because too many files have changed in this diff Show More