parent
93e9a29353
commit
cdb9851da6
@ -33,22 +33,23 @@
|
|||||||
|
|
||||||
#include "pcre.h"
|
#include "pcre.h"
|
||||||
#include "CRegEx.h"
|
#include "CRegEx.h"
|
||||||
#include <sh_string.h>
|
|
||||||
#include "extension.h"
|
#include "extension.h"
|
||||||
|
|
||||||
RegEx::RegEx()
|
RegEx::RegEx()
|
||||||
{
|
{
|
||||||
mErrorOffset = 0;
|
mErrorOffset = 0;
|
||||||
|
mErrorCode = 0;
|
||||||
mError = NULL;
|
mError = NULL;
|
||||||
re = NULL;
|
re = NULL;
|
||||||
mFree = true;
|
mFree = true;
|
||||||
subject = NULL;
|
subject = NULL;
|
||||||
mSubStrings = 0;
|
mMatchCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegEx::Clear ()
|
void RegEx::Clear ()
|
||||||
{
|
{
|
||||||
mErrorOffset = 0;
|
mErrorOffset = 0;
|
||||||
|
mErrorCode = 0;
|
||||||
mError = NULL;
|
mError = NULL;
|
||||||
if (re)
|
if (re)
|
||||||
pcre_free(re);
|
pcre_free(re);
|
||||||
@ -57,7 +58,7 @@ void RegEx::Clear ()
|
|||||||
if (subject)
|
if (subject)
|
||||||
delete [] subject;
|
delete [] subject;
|
||||||
subject = NULL;
|
subject = NULL;
|
||||||
mSubStrings = 0;
|
mMatchCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegEx::~RegEx()
|
RegEx::~RegEx()
|
||||||
@ -81,7 +82,7 @@ int RegEx::Compile(const char *pattern, int iFlags)
|
|||||||
if (!mFree)
|
if (!mFree)
|
||||||
Clear();
|
Clear();
|
||||||
|
|
||||||
re = pcre_compile(pattern, iFlags, &mError, &mErrorOffset, NULL);
|
re = pcre_compile2(pattern, iFlags, &mErrorCode, &mError, &mErrorOffset, NULL);
|
||||||
|
|
||||||
if (re == NULL)
|
if (re == NULL)
|
||||||
{
|
{
|
||||||
@ -93,7 +94,7 @@ int RegEx::Compile(const char *pattern, int iFlags)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int RegEx::Match(const char *str)
|
int RegEx::Match(const char *str, unsigned int offset)
|
||||||
{
|
{
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
@ -106,7 +107,9 @@ int RegEx::Match(const char *str)
|
|||||||
subject = new char[strlen(str)+1];
|
subject = new char[strlen(str)+1];
|
||||||
strcpy(subject, str);
|
strcpy(subject, str);
|
||||||
|
|
||||||
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
|
unsigned int len = strlen(subject);
|
||||||
|
|
||||||
|
rc = pcre_exec(re, NULL, subject, len, offset, 0, mMatches[0].mVector, MAX_CAPTURES);
|
||||||
|
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
{
|
{
|
||||||
@ -114,34 +117,80 @@ int RegEx::Match(const char *str)
|
|||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
mErrorOffset = rc;
|
mErrorCode = rc;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mSubStrings = rc;
|
mMatches[0].mSubStringCount = rc;
|
||||||
|
mMatchCount = 1;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int RegEx::MatchAll(const char *str)
|
||||||
|
{
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
if (mFree || re == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
this->ClearMatch();
|
||||||
|
|
||||||
|
//save str
|
||||||
|
subject = new char[strlen(str) + 1];
|
||||||
|
strcpy(subject, str);
|
||||||
|
|
||||||
|
unsigned int offset = 0;
|
||||||
|
unsigned int len = strlen(subject);
|
||||||
|
unsigned int matches = 0;
|
||||||
|
|
||||||
|
while (matches < MAX_MATCHES && offset < len && (rc = pcre_exec(re, 0, subject, len, offset, 0, mMatches[matches].mVector, MAX_CAPTURES)) >= 0)
|
||||||
|
{
|
||||||
|
offset = mMatches[matches].mVector[1];
|
||||||
|
mMatches[matches].mSubStringCount = rc;
|
||||||
|
|
||||||
|
matches++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rc < PCRE_ERROR_NOMATCH || (rc == PCRE_ERROR_NOMATCH && matches == 0))
|
||||||
|
{
|
||||||
|
if (rc == PCRE_ERROR_NOMATCH)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mErrorCode = rc;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mMatchCount = matches;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
void RegEx::ClearMatch()
|
void RegEx::ClearMatch()
|
||||||
{
|
{
|
||||||
// Clears match results
|
// Clears match results
|
||||||
mErrorOffset = 0;
|
mErrorOffset = 0;
|
||||||
|
mErrorCode = 0;
|
||||||
mError = NULL;
|
mError = NULL;
|
||||||
if (subject)
|
if (subject)
|
||||||
delete [] subject;
|
delete [] subject;
|
||||||
subject = NULL;
|
subject = NULL;
|
||||||
mSubStrings = 0;
|
mMatchCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
bool RegEx::GetSubstring(int s, char buffer[], int max, int match)
|
||||||
{
|
{
|
||||||
int i = 0;
|
int i = 0;
|
||||||
if (s >= mSubStrings || s < 0)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
char *substr_a = subject + ovector[2*s];
|
if (s >= mMatches[match].mSubStringCount || s < 0)
|
||||||
int substr_l = ovector[2*s+1] - ovector[2*s];
|
return false;
|
||||||
|
|
||||||
|
char *substr_a = subject + mMatches[match].mVector[2 * s];
|
||||||
|
int substr_l = mMatches[match].mVector[2 * s + 1] - mMatches[match].mVector[2 * s];
|
||||||
|
|
||||||
for (i = 0; i<substr_l; i++)
|
for (i = 0; i<substr_l; i++)
|
||||||
{
|
{
|
||||||
@ -152,6 +201,6 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
|||||||
|
|
||||||
buffer[i] = '\0';
|
buffer[i] = '\0';
|
||||||
|
|
||||||
return buffer;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,10 +28,20 @@
|
|||||||
*
|
*
|
||||||
* Version: $Id$
|
* Version: $Id$
|
||||||
*/
|
*/
|
||||||
|
#include <am-string.h>
|
||||||
|
|
||||||
#ifndef _INCLUDE_CREGEX_H
|
#ifndef _INCLUDE_CREGEX_H
|
||||||
#define _INCLUDE_CREGEX_H
|
#define _INCLUDE_CREGEX_H
|
||||||
|
|
||||||
|
#define MAX_MATCHES 20
|
||||||
|
#define MAX_CAPTURES MAX_MATCHES*3
|
||||||
|
|
||||||
|
struct RegexMatch
|
||||||
|
{
|
||||||
|
int mSubStringCount;
|
||||||
|
int mVector[MAX_CAPTURES];
|
||||||
|
};
|
||||||
|
|
||||||
class RegEx
|
class RegEx
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -41,17 +51,19 @@ public:
|
|||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
int Compile(const char *pattern, int iFlags);
|
int Compile(const char *pattern, int iFlags);
|
||||||
int Match(const char *str);
|
int Match(const char *str, unsigned int offset);
|
||||||
|
int MatchAll(const char *str);
|
||||||
void ClearMatch();
|
void ClearMatch();
|
||||||
const char *GetSubstring(int s, char buffer[], int max);
|
bool GetSubstring(int s, char buffer[], int max, int match);
|
||||||
public:
|
public:
|
||||||
int mErrorOffset;
|
int mErrorOffset;
|
||||||
|
int mErrorCode;
|
||||||
const char *mError;
|
const char *mError;
|
||||||
int mSubStrings;
|
int mMatchCount;
|
||||||
|
RegexMatch mMatches[MAX_MATCHES];
|
||||||
private:
|
private:
|
||||||
pcre *re;
|
pcre *re;
|
||||||
bool mFree;
|
bool mFree;
|
||||||
int ovector[30];
|
|
||||||
char *subject;
|
char *subject;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
#include "extension.h"
|
#include "extension.h"
|
||||||
#include <sh_string.h>
|
#include <sh_string.h>
|
||||||
#include "pcre.h"
|
#include "pcre.h"
|
||||||
|
#include "posix_map.h"
|
||||||
#include "CRegEx.h"
|
#include "CRegEx.h"
|
||||||
using namespace SourceHook;
|
using namespace SourceHook;
|
||||||
|
|
||||||
@ -82,10 +83,11 @@ static cell_t CompileRegex(IPluginContext *pCtx, const cell_t *params)
|
|||||||
|
|
||||||
if (x->Compile(regex, params[2]) == 0)
|
if (x->Compile(regex, params[2]) == 0)
|
||||||
{
|
{
|
||||||
cell_t *eOff;
|
cell_t *eError;
|
||||||
pCtx->LocalToPhysAddr(params[5], &eOff);
|
pCtx->LocalToPhysAddr(params[5], &eError);
|
||||||
const char *err = x->mError;
|
const char *err = x->mError;
|
||||||
*eOff = x->mErrorOffset;
|
// Convert error code to posix error code but use pcre's error string since it is more detailed.
|
||||||
|
*eError = pcre_posix_compile_error_map[x->mErrorCode];
|
||||||
pCtx->StringToLocal(params[3], params[4], err ? err:"unknown");
|
pCtx->StringToLocal(params[3], params[4], err ? err:"unknown");
|
||||||
delete x;
|
delete x;
|
||||||
return 0;
|
return 0;
|
||||||
@ -112,6 +114,13 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params)
|
|||||||
sec.pOwner = NULL;
|
sec.pOwner = NULL;
|
||||||
sec.pIdentity = myself->GetIdentity();
|
sec.pIdentity = myself->GetIdentity();
|
||||||
|
|
||||||
|
unsigned int offset = 0;
|
||||||
|
|
||||||
|
if (params[0] >= 4)
|
||||||
|
{
|
||||||
|
offset = (unsigned int)params[4];
|
||||||
|
}
|
||||||
|
|
||||||
RegEx *x;
|
RegEx *x;
|
||||||
|
|
||||||
if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||||
@ -129,14 +138,17 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params)
|
|||||||
char *str;
|
char *str;
|
||||||
pCtx->LocalToString(params[2], &str);
|
pCtx->LocalToString(params[2], &str);
|
||||||
|
|
||||||
int e = x->Match(str);
|
if(offset >= strlen(str))
|
||||||
|
return pCtx->ThrowNativeError("Invalid string index\n");
|
||||||
|
|
||||||
|
int e = x->Match(str, offset);
|
||||||
|
|
||||||
if (e == -1)
|
if (e == -1)
|
||||||
{
|
{
|
||||||
/* there was a match error. move on. */
|
/* there was a match error. move on. */
|
||||||
cell_t *res;
|
cell_t *res;
|
||||||
pCtx->LocalToPhysAddr(params[3], &res);
|
pCtx->LocalToPhysAddr(params[3], &res);
|
||||||
*res = x->mErrorOffset;
|
*res = x->mErrorCode;
|
||||||
/* only clear the match results, since the regex object
|
/* only clear the match results, since the regex object
|
||||||
may still be referenced later */
|
may still be referenced later */
|
||||||
x->ClearMatch();
|
x->ClearMatch();
|
||||||
@ -153,7 +165,60 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return x->mSubStrings;
|
return x->mMatches[0].mSubStringCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static cell_t MatchRegexAll(IPluginContext *pCtx, const cell_t *params)
|
||||||
|
{
|
||||||
|
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||||
|
HandleError err;
|
||||||
|
HandleSecurity sec;
|
||||||
|
sec.pOwner = NULL;
|
||||||
|
sec.pIdentity = myself->GetIdentity();
|
||||||
|
|
||||||
|
RegEx *x;
|
||||||
|
|
||||||
|
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||||
|
{
|
||||||
|
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!x)
|
||||||
|
{
|
||||||
|
pCtx->ThrowNativeError("Regex data not found\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *str;
|
||||||
|
pCtx->LocalToString(params[2], &str);
|
||||||
|
|
||||||
|
int e = x->MatchAll(str);
|
||||||
|
|
||||||
|
if (e == -1)
|
||||||
|
{
|
||||||
|
/* there was a match error. move on. */
|
||||||
|
cell_t *res;
|
||||||
|
pCtx->LocalToPhysAddr(params[3], &res);
|
||||||
|
*res = x->mErrorCode;
|
||||||
|
/* only clear the match results, since the regex object
|
||||||
|
may still be referenced later */
|
||||||
|
x->ClearMatch();
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (e == 0)
|
||||||
|
{
|
||||||
|
/* only clear the match results, since the regex object
|
||||||
|
may still be referenced later */
|
||||||
|
x->ClearMatch();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return x->mMatchCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,6 +230,8 @@ static cell_t GetRegexSubString(IPluginContext *pCtx, const cell_t *params)
|
|||||||
sec.pOwner=NULL;
|
sec.pOwner=NULL;
|
||||||
sec.pIdentity=myself->GetIdentity();
|
sec.pIdentity=myself->GetIdentity();
|
||||||
|
|
||||||
|
int match = 0;
|
||||||
|
|
||||||
RegEx *x;
|
RegEx *x;
|
||||||
|
|
||||||
if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||||
@ -178,17 +245,93 @@ static cell_t GetRegexSubString(IPluginContext *pCtx, const cell_t *params)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char buffer[4096];
|
if (params[0] >= 5)
|
||||||
const char *ret=x->GetSubstring(params[2], buffer, sizeof(buffer));
|
|
||||||
|
|
||||||
if(!ret)
|
|
||||||
{
|
{
|
||||||
return 0;
|
match = params[5];
|
||||||
}
|
}
|
||||||
|
|
||||||
pCtx->StringToLocalUTF8(params[3], params[4], ret, NULL);
|
if(match >= x->mMatchCount || match < 0)
|
||||||
|
return pCtx->ThrowNativeError("Invalid match index passed.\n");
|
||||||
|
|
||||||
return 1;
|
char *buffer;
|
||||||
|
pCtx->LocalToString(params[3], &buffer);
|
||||||
|
|
||||||
|
return x->GetSubstring(params[2], buffer, params[4], match);
|
||||||
|
}
|
||||||
|
|
||||||
|
static cell_t GetRegexMatchCount(IPluginContext *pCtx, const cell_t *params)
|
||||||
|
{
|
||||||
|
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||||
|
HandleError err;
|
||||||
|
HandleSecurity sec;
|
||||||
|
sec.pOwner = NULL;
|
||||||
|
sec.pIdentity = myself->GetIdentity();
|
||||||
|
|
||||||
|
RegEx *x;
|
||||||
|
|
||||||
|
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||||
|
{
|
||||||
|
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!x)
|
||||||
|
{
|
||||||
|
return pCtx->ThrowNativeError("Regex data not found\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return x->mMatchCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cell_t GetRegexCaptureCount(IPluginContext *pCtx, const cell_t *params)
|
||||||
|
{
|
||||||
|
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||||
|
HandleError err;
|
||||||
|
HandleSecurity sec;
|
||||||
|
sec.pOwner = NULL;
|
||||||
|
sec.pIdentity = myself->GetIdentity();
|
||||||
|
|
||||||
|
RegEx *x;
|
||||||
|
|
||||||
|
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||||
|
{
|
||||||
|
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!x)
|
||||||
|
{
|
||||||
|
return pCtx->ThrowNativeError("Regex data not found\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params[2] >= x->mMatchCount || params[2] < 0)
|
||||||
|
return pCtx->ThrowNativeError("Invalid match index passed.\n");
|
||||||
|
|
||||||
|
return x->mMatches[params[2]].mSubStringCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
static cell_t GetRegexOffset(IPluginContext *pCtx, const cell_t *params)
|
||||||
|
{
|
||||||
|
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||||
|
HandleError err;
|
||||||
|
HandleSecurity sec;
|
||||||
|
sec.pOwner = NULL;
|
||||||
|
sec.pIdentity = myself->GetIdentity();
|
||||||
|
|
||||||
|
RegEx *x;
|
||||||
|
|
||||||
|
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||||
|
{
|
||||||
|
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!x)
|
||||||
|
{
|
||||||
|
return pCtx->ThrowNativeError("Regex data not found\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params[2] >= x->mMatchCount || params[2] < 0)
|
||||||
|
return pCtx->ThrowNativeError("Invalid match index passed.\n");
|
||||||
|
|
||||||
|
return x->mMatches[params[2]].mVector[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegexHandler::OnHandleDestroy(HandleType_t type, void *object)
|
void RegexHandler::OnHandleDestroy(HandleType_t type, void *object)
|
||||||
@ -209,5 +352,9 @@ const sp_nativeinfo_t regex_natives[] =
|
|||||||
{"Regex.GetSubString", GetRegexSubString},
|
{"Regex.GetSubString", GetRegexSubString},
|
||||||
{"Regex.Match", MatchRegex},
|
{"Regex.Match", MatchRegex},
|
||||||
{"Regex.Regex", CompileRegex},
|
{"Regex.Regex", CompileRegex},
|
||||||
|
{"Regex.MatchAll", MatchRegexAll},
|
||||||
|
{"Regex.MatchCount", GetRegexMatchCount},
|
||||||
|
{"Regex.CaptureCount", GetRegexCaptureCount},
|
||||||
|
{"Regex.MatchOffset", GetRegexOffset},
|
||||||
{NULL, NULL},
|
{NULL, NULL},
|
||||||
};
|
};
|
||||||
|
136
extensions/regex/posix_map.h
Normal file
136
extensions/regex/posix_map.h
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*
|
||||||
|
Maps pcre_compile2 error codes to posix error codes.
|
||||||
|
From pcreposix.c and pcreposix.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
// posix error codes
|
||||||
|
enum {
|
||||||
|
REG_ASSERT = 1, /* internal error ? */
|
||||||
|
REG_BADBR, /* invalid repeat counts in {} */
|
||||||
|
REG_BADPAT, /* pattern error */
|
||||||
|
REG_BADRPT, /* ? * + invalid */
|
||||||
|
REG_EBRACE, /* unbalanced {} */
|
||||||
|
REG_EBRACK, /* unbalanced [] */
|
||||||
|
REG_ECOLLATE, /* collation error - not relevant */
|
||||||
|
REG_ECTYPE, /* bad class */
|
||||||
|
REG_EESCAPE, /* bad escape sequence */
|
||||||
|
REG_EMPTY, /* empty expression */
|
||||||
|
REG_EPAREN, /* unbalanced () */
|
||||||
|
REG_ERANGE, /* bad range inside [] */
|
||||||
|
REG_ESIZE, /* expression too big */
|
||||||
|
REG_ESPACE, /* failed to get memory */
|
||||||
|
REG_ESUBREG, /* bad back reference */
|
||||||
|
REG_INVARG, /* bad argument */
|
||||||
|
|
||||||
|
// This isnt used below since it is not a compile error. So we remove it as to not conflict.
|
||||||
|
//REG_NOMATCH /* match failed */
|
||||||
|
};
|
||||||
|
|
||||||
|
// pcre compile error -> posix compile error
|
||||||
|
const int pcre_posix_compile_error_map[] = {
|
||||||
|
0, /* no error */
|
||||||
|
REG_EESCAPE, /* \ at end of pattern */
|
||||||
|
REG_EESCAPE, /* \c at end of pattern */
|
||||||
|
REG_EESCAPE, /* unrecognized character follows \ */
|
||||||
|
REG_BADBR, /* numbers out of order in {} quantifier */
|
||||||
|
/* 5 */
|
||||||
|
REG_BADBR, /* number too big in {} quantifier */
|
||||||
|
REG_EBRACK, /* missing terminating ] for character class */
|
||||||
|
REG_ECTYPE, /* invalid escape sequence in character class */
|
||||||
|
REG_ERANGE, /* range out of order in character class */
|
||||||
|
REG_BADRPT, /* nothing to repeat */
|
||||||
|
/* 10 */
|
||||||
|
REG_BADRPT, /* operand of unlimited repeat could match the empty string */
|
||||||
|
REG_ASSERT, /* internal error: unexpected repeat */
|
||||||
|
REG_BADPAT, /* unrecognized character after (? */
|
||||||
|
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
||||||
|
REG_EPAREN, /* missing ) */
|
||||||
|
/* 15 */
|
||||||
|
REG_ESUBREG, /* reference to non-existent subpattern */
|
||||||
|
REG_INVARG, /* erroffset passed as NULL */
|
||||||
|
REG_INVARG, /* unknown option bit(s) set */
|
||||||
|
REG_EPAREN, /* missing ) after comment */
|
||||||
|
REG_ESIZE, /* parentheses nested too deeply */
|
||||||
|
/* 20 */
|
||||||
|
REG_ESIZE, /* regular expression too large */
|
||||||
|
REG_ESPACE, /* failed to get memory */
|
||||||
|
REG_EPAREN, /* unmatched parentheses */
|
||||||
|
REG_ASSERT, /* internal error: code overflow */
|
||||||
|
REG_BADPAT, /* unrecognized character after (?< */
|
||||||
|
/* 25 */
|
||||||
|
REG_BADPAT, /* lookbehind assertion is not fixed length */
|
||||||
|
REG_BADPAT, /* malformed number or name after (?( */
|
||||||
|
REG_BADPAT, /* conditional group contains more than two branches */
|
||||||
|
REG_BADPAT, /* assertion expected after (?( */
|
||||||
|
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */
|
||||||
|
/* 30 */
|
||||||
|
REG_ECTYPE, /* unknown POSIX class name */
|
||||||
|
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||||
|
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */
|
||||||
|
REG_BADPAT, /* spare error */
|
||||||
|
REG_BADPAT, /* character value in \x{} or \o{} is too large */
|
||||||
|
/* 35 */
|
||||||
|
REG_BADPAT, /* invalid condition (?(0) */
|
||||||
|
REG_BADPAT, /* \C not allowed in lookbehind assertion */
|
||||||
|
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
|
||||||
|
REG_BADPAT, /* number after (?C is > 255 */
|
||||||
|
REG_BADPAT, /* closing ) for (?C expected */
|
||||||
|
/* 40 */
|
||||||
|
REG_BADPAT, /* recursive call could loop indefinitely */
|
||||||
|
REG_BADPAT, /* unrecognized character after (?P */
|
||||||
|
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */
|
||||||
|
REG_BADPAT, /* two named subpatterns have the same name */
|
||||||
|
REG_BADPAT, /* invalid UTF-8 string */
|
||||||
|
/* 45 */
|
||||||
|
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */
|
||||||
|
REG_BADPAT, /* malformed \P or \p sequence */
|
||||||
|
REG_BADPAT, /* unknown property name after \P or \p */
|
||||||
|
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */
|
||||||
|
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */
|
||||||
|
/* 50 */
|
||||||
|
REG_BADPAT, /* repeated subpattern is too long */
|
||||||
|
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */
|
||||||
|
REG_BADPAT, /* internal error: overran compiling workspace */
|
||||||
|
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */
|
||||||
|
REG_BADPAT, /* DEFINE group contains more than one branch */
|
||||||
|
/* 55 */
|
||||||
|
REG_BADPAT, /* repeating a DEFINE group is not allowed */
|
||||||
|
REG_INVARG, /* inconsistent NEWLINE options */
|
||||||
|
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */
|
||||||
|
REG_BADPAT, /* a numbered reference must not be zero */
|
||||||
|
REG_BADPAT, /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */
|
||||||
|
/* 60 */
|
||||||
|
REG_BADPAT, /* (*VERB) not recognized */
|
||||||
|
REG_BADPAT, /* number is too big */
|
||||||
|
REG_BADPAT, /* subpattern name expected */
|
||||||
|
REG_BADPAT, /* digit expected after (?+ */
|
||||||
|
REG_BADPAT, /* ] is an invalid data character in JavaScript compatibility mode */
|
||||||
|
/* 65 */
|
||||||
|
REG_BADPAT, /* different names for subpatterns of the same number are not allowed */
|
||||||
|
REG_BADPAT, /* (*MARK) must have an argument */
|
||||||
|
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UCP support */
|
||||||
|
REG_BADPAT, /* \c must be followed by an ASCII character */
|
||||||
|
REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */
|
||||||
|
/* 70 */
|
||||||
|
REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */
|
||||||
|
REG_BADPAT, /* \N is not supported in a class */
|
||||||
|
REG_BADPAT, /* too many forward references */
|
||||||
|
REG_BADPAT, /* disallowed UTF-8/16/32 code point (>= 0xd800 && <= 0xdfff) */
|
||||||
|
REG_BADPAT, /* invalid UTF-16 string (should not occur) */
|
||||||
|
/* 75 */
|
||||||
|
REG_BADPAT, /* overlong MARK name */
|
||||||
|
REG_BADPAT, /* character value in \u.... sequence is too large */
|
||||||
|
REG_BADPAT, /* invalid UTF-32 string (should not occur) */
|
||||||
|
REG_BADPAT, /* setting UTF is disabled by the application */
|
||||||
|
REG_BADPAT, /* non-hex character in \\x{} (closing brace missing?) */
|
||||||
|
/* 80 */
|
||||||
|
REG_BADPAT, /* non-octal character in \o{} (closing brace missing?) */
|
||||||
|
REG_BADPAT, /* missing opening brace after \o */
|
||||||
|
REG_BADPAT, /* parentheses too deeply nested */
|
||||||
|
REG_BADPAT, /* invalid range in character class */
|
||||||
|
REG_BADPAT, /* group name must start with a non-digit */
|
||||||
|
/* 85 */
|
||||||
|
REG_BADPAT /* parentheses too deeply nested (stack check) */
|
||||||
|
};
|
@ -58,6 +58,24 @@
|
|||||||
enum RegexError
|
enum RegexError
|
||||||
{
|
{
|
||||||
REGEX_ERROR_NONE = 0, /* No error */
|
REGEX_ERROR_NONE = 0, /* No error */
|
||||||
|
|
||||||
|
REGEX_ERROR_ASSERT = 1, /* internal error ? */
|
||||||
|
REGEX_ERROR_BADBR, /* invalid repeat counts in {} */
|
||||||
|
REGEX_ERROR_BADPAT, /* pattern error */
|
||||||
|
REGEX_ERROR_BADRPT, /* ? * + invalid */
|
||||||
|
REGEX_ERROR_EBRACE, /* unbalanced {} */
|
||||||
|
REGEX_ERROR_EBRACK, /* unbalanced [] */
|
||||||
|
REGEX_ERROR_ECOLLATE, /* collation error - not relevant */
|
||||||
|
REGEX_ERROR_ECTYPE, /* bad class */
|
||||||
|
REGEX_ERROR_EESCAPE, /* bad escape sequence */
|
||||||
|
REGEX_ERROR_EMPTY, /* empty expression */
|
||||||
|
REGEX_ERROR_EPAREN, /* unbalanced () */
|
||||||
|
REGEX_ERROR_ERANGE, /* bad range inside [] */
|
||||||
|
REGEX_ERROR_ESIZE, /* expression too big */
|
||||||
|
REGEX_ERROR_ESPACE, /* failed to get memory */
|
||||||
|
REGEX_ERROR_ESUBREG, /* bad back reference */
|
||||||
|
REGEX_ERROR_INVARG, /* bad argument */
|
||||||
|
|
||||||
REGEX_ERROR_NOMATCH = -1, /* No match was found */
|
REGEX_ERROR_NOMATCH = -1, /* No match was found */
|
||||||
REGEX_ERROR_NULL = -2,
|
REGEX_ERROR_NULL = -2,
|
||||||
REGEX_ERROR_BADOPTION = -3,
|
REGEX_ERROR_BADOPTION = -3,
|
||||||
@ -110,23 +128,58 @@ methodmap Regex < Handle
|
|||||||
// @param str The string to check.
|
// @param str The string to check.
|
||||||
// @param regex Regex Handle from CompileRegex()
|
// @param regex Regex Handle from CompileRegex()
|
||||||
// @param ret Error code, if applicable.
|
// @param ret Error code, if applicable.
|
||||||
// @return Number of substrings found or -1 on failure.
|
// @param offset Offset in the string to start searching from. MatchOffset returns the offset of the match.
|
||||||
|
// @return Number of captures found or -1 on failure.
|
||||||
//
|
//
|
||||||
// @note Use the regex handle passed to this function to extract
|
// @note Use the regex handle passed to this function to extract
|
||||||
// matches with GetRegexSubString().
|
// matches with GetSubString().
|
||||||
public native int Match(const char[] str, RegexError &ret = REGEX_ERROR_NONE);
|
public native int Match(const char[] str, RegexError &ret = REGEX_ERROR_NONE, int offset = 0);
|
||||||
|
|
||||||
|
// Gets all matches from a string against a pre-compiled regular expression pattern.
|
||||||
|
//
|
||||||
|
// @param str The string to check.
|
||||||
|
// @param regex Regex Handle from CompileRegex()
|
||||||
|
// @param ret Error code, if applicable.
|
||||||
|
// @return Number of matches found or -1 on failure.
|
||||||
|
//
|
||||||
|
// @note Use GetSubString() and loop from 0 -> totalmatches - 1.
|
||||||
|
public native int MatchAll(const char[] str, RegexError &ret = REGEX_ERROR_NONE);
|
||||||
|
|
||||||
// Returns a matched substring from a regex handle.
|
// Returns a matched substring from a regex handle.
|
||||||
//
|
//
|
||||||
// Substring ids start at 0 and end at substrings-1, where substrings is the
|
// Substring ids start at 0 and end at captures-1, where captures is the
|
||||||
// number returned by Regex.Match.
|
// number returned by Regex.Match or Regex.CaptureCount.
|
||||||
//
|
//
|
||||||
// @param regex The regex handle to extract data from.
|
// @param regex The regex handle to extract data from.
|
||||||
// @param str_id The index of the expression to get - starts at 0, and ends at substrings - 1.
|
// @param str_id The index of the expression to get - starts at 0, and ends at captures - 1.
|
||||||
// @param buffer The buffer to set to the matching substring.
|
// @param buffer The buffer to set to the matching substring.
|
||||||
// @param maxlen The maximum string length of the buffer.
|
// @param maxlen The maximum string length of the buffer.
|
||||||
|
// @param match Match to get the captures for - starts at 0, and ends at MatchCount() -1
|
||||||
// @return True if a substring was found, False on fail/error
|
// @return True if a substring was found, False on fail/error
|
||||||
public native bool GetSubString(int str_id, char[] buffer, int maxlen);
|
//
|
||||||
|
// @note str_id = 0 is the full captured string, anything else is the capture group index.
|
||||||
|
// if Regex.Match is used match can only be 0
|
||||||
|
public native bool GetSubString(int str_id, char[] buffer, int maxlen, int match = 0);
|
||||||
|
|
||||||
|
// Returns number of matches
|
||||||
|
//
|
||||||
|
// When using Match this is always 1 or 0 (unless an error occured)
|
||||||
|
// @return Total number of matches found.
|
||||||
|
public native int MatchCount();
|
||||||
|
|
||||||
|
// Returns number of captures for a match
|
||||||
|
//
|
||||||
|
// @param match Match to get the number of captures for. Match starts at 0, and ends at MatchCount() -1
|
||||||
|
// @return Number of captures in the match.
|
||||||
|
//
|
||||||
|
// @note Use GetSubString() and loop from 1 -> captures -1 for str_id to get all captures
|
||||||
|
public native int CaptureCount(int match = 0);
|
||||||
|
|
||||||
|
// Returns the string offset of a match.
|
||||||
|
//
|
||||||
|
// @param match Match to get the offset of. Match starts at 0, and ends at MatchCount() -1
|
||||||
|
// @return Offset of the match in the string.
|
||||||
|
public native int MatchOffset(int match = 0)
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -149,7 +202,7 @@ native Regex CompileRegex(const char[] pattern, int flags = 0, char[] error="",
|
|||||||
* @param str The string to check.
|
* @param str The string to check.
|
||||||
* @param regex Regex Handle from CompileRegex()
|
* @param regex Regex Handle from CompileRegex()
|
||||||
* @param ret Error code, if applicable.
|
* @param ret Error code, if applicable.
|
||||||
* @return Number of substrings found or -1 on failure.
|
* @return Number of captures found or -1 on failure.
|
||||||
*
|
*
|
||||||
* @note Use the regex handle passed to this function to extract
|
* @note Use the regex handle passed to this function to extract
|
||||||
* matches with GetRegexSubString().
|
* matches with GetRegexSubString().
|
||||||
@ -158,14 +211,17 @@ native int MatchRegex(Handle regex, const char[] str, RegexError &ret = REGEX_ER
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a matched substring from a regex handle.
|
* Returns a matched substring from a regex handle.
|
||||||
* Substring ids start at 0 and end at substrings-1, where substrings is the number returned
|
* Substring ids start at 0 and end at captures-1, where captures is the number returned
|
||||||
* by MatchRegex
|
* by MatchRegex.
|
||||||
*
|
*
|
||||||
* @param regex The regex handle to extract data from.
|
* @param regex The regex handle to extract data from.
|
||||||
* @param str_id The index of the expression to get - starts at 0, and ends at substrings - 1.
|
* @param str_id The index of the expression to get - starts at 0, and ends at captures - 1.
|
||||||
* @param buffer The buffer to set to the matching substring.
|
* @param buffer The buffer to set to the matching substring.
|
||||||
* @param maxlen The maximum string length of the buffer.
|
* @param maxlen The maximum string length of the buffer.
|
||||||
* @return True if a substring was found, False on fail/error
|
* @return True if a substring was found, False on fail/error
|
||||||
|
*
|
||||||
|
* @note str_id = 0 is the full captured string, anything else is the capture group index.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
native bool GetRegexSubString(Handle regex, int str_id, char[] buffer, int maxlen);
|
native bool GetRegexSubString(Handle regex, int str_id, char[] buffer, int maxlen);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user