Add new regex natives. (#767)
Add new regex natives to get multiple/all matches.
This commit is contained in:
parent
f9faf9e48c
commit
5ac3390656
@ -33,7 +33,6 @@
|
||||
|
||||
#include "pcre.h"
|
||||
#include "CRegEx.h"
|
||||
#include <sh_string.h>
|
||||
#include "extension.h"
|
||||
|
||||
RegEx::RegEx()
|
||||
@ -43,7 +42,7 @@ RegEx::RegEx()
|
||||
re = NULL;
|
||||
mFree = true;
|
||||
subject = NULL;
|
||||
mSubStrings = 0;
|
||||
mMatchCount = 0;
|
||||
}
|
||||
|
||||
void RegEx::Clear ()
|
||||
@ -57,7 +56,7 @@ void RegEx::Clear ()
|
||||
if (subject)
|
||||
delete [] subject;
|
||||
subject = NULL;
|
||||
mSubStrings = 0;
|
||||
mMatchCount = 0;
|
||||
}
|
||||
|
||||
RegEx::~RegEx()
|
||||
@ -93,7 +92,7 @@ int RegEx::Compile(const char *pattern, int iFlags)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int RegEx::Match(const char *str)
|
||||
int RegEx::Match(const char *str, unsigned int offset)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
@ -106,7 +105,9 @@ int RegEx::Match(const char *str)
|
||||
subject = new char[strlen(str)+1];
|
||||
strcpy(subject, str);
|
||||
|
||||
rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30);
|
||||
unsigned int len = strlen(subject);
|
||||
|
||||
rc = pcre_exec(re, NULL, subject, len, offset, 0, mMatches[0].mVector, MAX_CAPTURES);
|
||||
|
||||
if (rc < 0)
|
||||
{
|
||||
@ -119,10 +120,54 @@ int RegEx::Match(const char *str)
|
||||
}
|
||||
}
|
||||
|
||||
mSubStrings = rc;
|
||||
mMatches[0].mSubStringCount = rc;
|
||||
mMatchCount = 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int RegEx::MatchAll(const char *str)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
if (mFree || re == NULL)
|
||||
return -1;
|
||||
|
||||
this->ClearMatch();
|
||||
|
||||
//save str
|
||||
subject = new char[strlen(str) + 1];
|
||||
strcpy(subject, str);
|
||||
|
||||
unsigned int offset = 0;
|
||||
unsigned int len = strlen(subject);
|
||||
unsigned int matches = 0;
|
||||
|
||||
while (matches < MAX_MATCHES && offset < len && (rc = pcre_exec(re, 0, subject, len, offset, 0, mMatches[matches].mVector, MAX_CAPTURES)) >= 0)
|
||||
{
|
||||
offset = mMatches[matches].mVector[1];
|
||||
mMatches[matches].mSubStringCount = rc;
|
||||
|
||||
matches++;
|
||||
}
|
||||
|
||||
if (rc < PCRE_ERROR_NOMATCH || (rc == PCRE_ERROR_NOMATCH && matches == 0))
|
||||
{
|
||||
if (rc == PCRE_ERROR_NOMATCH)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
mErrorOffset = rc;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
mMatchCount = matches;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void RegEx::ClearMatch()
|
||||
{
|
||||
// Clears match results
|
||||
@ -131,17 +176,18 @@ void RegEx::ClearMatch()
|
||||
if (subject)
|
||||
delete [] subject;
|
||||
subject = NULL;
|
||||
mSubStrings = 0;
|
||||
mMatchCount = 0;
|
||||
}
|
||||
|
||||
const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
||||
bool RegEx::GetSubstring(int s, char buffer[], int max, int match)
|
||||
{
|
||||
int i = 0;
|
||||
if (s >= mSubStrings || s < 0)
|
||||
return NULL;
|
||||
|
||||
char *substr_a = subject + ovector[2*s];
|
||||
int substr_l = ovector[2*s+1] - ovector[2*s];
|
||||
if (s >= mMatches[match].mSubStringCount || s < 0)
|
||||
return false;
|
||||
|
||||
char *substr_a = subject + mMatches[match].mVector[2 * s];
|
||||
int substr_l = mMatches[match].mVector[2 * s + 1] - mMatches[match].mVector[2 * s];
|
||||
|
||||
for (i = 0; i<substr_l; i++)
|
||||
{
|
||||
@ -152,6 +198,6 @@ const char *RegEx::GetSubstring(int s, char buffer[], int max)
|
||||
|
||||
buffer[i] = '\0';
|
||||
|
||||
return buffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -28,10 +28,20 @@
|
||||
*
|
||||
* Version: $Id$
|
||||
*/
|
||||
#include <am-string.h>
|
||||
|
||||
#ifndef _INCLUDE_CREGEX_H
|
||||
#define _INCLUDE_CREGEX_H
|
||||
|
||||
#define MAX_MATCHES 20
|
||||
#define MAX_CAPTURES MAX_MATCHES*3
|
||||
|
||||
struct RegexMatch
|
||||
{
|
||||
int mSubStringCount;
|
||||
int mVector[MAX_CAPTURES];
|
||||
};
|
||||
|
||||
class RegEx
|
||||
{
|
||||
public:
|
||||
@ -41,17 +51,18 @@ public:
|
||||
void Clear();
|
||||
|
||||
int Compile(const char *pattern, int iFlags);
|
||||
int Match(const char *str);
|
||||
int Match(const char *str, unsigned int offset);
|
||||
int MatchAll(const char *str);
|
||||
void ClearMatch();
|
||||
const char *GetSubstring(int s, char buffer[], int max);
|
||||
bool GetSubstring(int s, char buffer[], int max, int match);
|
||||
public:
|
||||
int mErrorOffset;
|
||||
const char *mError;
|
||||
int mSubStrings;
|
||||
int mMatchCount;
|
||||
RegexMatch mMatches[MAX_MATCHES];
|
||||
private:
|
||||
pcre *re;
|
||||
bool mFree;
|
||||
int ovector[30];
|
||||
char *subject;
|
||||
};
|
||||
|
||||
|
@ -112,6 +112,13 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params)
|
||||
sec.pOwner = NULL;
|
||||
sec.pIdentity = myself->GetIdentity();
|
||||
|
||||
unsigned int offset = 0;
|
||||
|
||||
if (params[0] >= 4)
|
||||
{
|
||||
offset = (unsigned int)params[4];
|
||||
}
|
||||
|
||||
RegEx *x;
|
||||
|
||||
if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||
@ -129,7 +136,10 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params)
|
||||
char *str;
|
||||
pCtx->LocalToString(params[2], &str);
|
||||
|
||||
int e = x->Match(str);
|
||||
if(offset >= strlen(str))
|
||||
return pCtx->ThrowNativeError("Invalid string index\n");
|
||||
|
||||
int e = x->Match(str, offset);
|
||||
|
||||
if (e == -1)
|
||||
{
|
||||
@ -153,7 +163,60 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params)
|
||||
}
|
||||
else
|
||||
{
|
||||
return x->mSubStrings;
|
||||
return x->mMatches[0].mSubStringCount;
|
||||
}
|
||||
}
|
||||
|
||||
static cell_t MatchRegexAll(IPluginContext *pCtx, const cell_t *params)
|
||||
{
|
||||
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||
HandleError err;
|
||||
HandleSecurity sec;
|
||||
sec.pOwner = NULL;
|
||||
sec.pIdentity = myself->GetIdentity();
|
||||
|
||||
RegEx *x;
|
||||
|
||||
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||
{
|
||||
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||
}
|
||||
|
||||
if (!x)
|
||||
{
|
||||
pCtx->ThrowNativeError("Regex data not found\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *str;
|
||||
pCtx->LocalToString(params[2], &str);
|
||||
|
||||
int e = x->MatchAll(str);
|
||||
|
||||
if (e == -1)
|
||||
{
|
||||
/* there was a match error. move on. */
|
||||
cell_t *res;
|
||||
pCtx->LocalToPhysAddr(params[3], &res);
|
||||
*res = x->mErrorOffset;
|
||||
/* only clear the match results, since the regex object
|
||||
may still be referenced later */
|
||||
x->ClearMatch();
|
||||
|
||||
return -1;
|
||||
}
|
||||
else if (e == 0)
|
||||
{
|
||||
/* only clear the match results, since the regex object
|
||||
may still be referenced later */
|
||||
x->ClearMatch();
|
||||
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return x->mMatchCount;
|
||||
}
|
||||
}
|
||||
|
||||
@ -165,6 +228,8 @@ static cell_t GetRegexSubString(IPluginContext *pCtx, const cell_t *params)
|
||||
sec.pOwner=NULL;
|
||||
sec.pIdentity=myself->GetIdentity();
|
||||
|
||||
int match = 0;
|
||||
|
||||
RegEx *x;
|
||||
|
||||
if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||
@ -178,17 +243,93 @@ static cell_t GetRegexSubString(IPluginContext *pCtx, const cell_t *params)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char buffer[4096];
|
||||
const char *ret=x->GetSubstring(params[2], buffer, sizeof(buffer));
|
||||
|
||||
if(!ret)
|
||||
if (params[0] >= 5)
|
||||
{
|
||||
return 0;
|
||||
match = params[5];
|
||||
}
|
||||
|
||||
pCtx->StringToLocalUTF8(params[3], params[4], ret, NULL);
|
||||
if(match >= x->mMatchCount || match < 0)
|
||||
return pCtx->ThrowNativeError("Invalid match index passed.\n");
|
||||
|
||||
return 1;
|
||||
char *buffer;
|
||||
pCtx->LocalToString(params[3], &buffer);
|
||||
|
||||
return x->GetSubstring(params[2], buffer, params[4], match);
|
||||
}
|
||||
|
||||
static cell_t GetRegexMatchCount(IPluginContext *pCtx, const cell_t *params)
|
||||
{
|
||||
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||
HandleError err;
|
||||
HandleSecurity sec;
|
||||
sec.pOwner = NULL;
|
||||
sec.pIdentity = myself->GetIdentity();
|
||||
|
||||
RegEx *x;
|
||||
|
||||
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||
{
|
||||
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||
}
|
||||
|
||||
if (!x)
|
||||
{
|
||||
return pCtx->ThrowNativeError("Regex data not found\n");
|
||||
}
|
||||
|
||||
return x->mMatchCount;
|
||||
}
|
||||
|
||||
static cell_t GetRegexCaptureCount(IPluginContext *pCtx, const cell_t *params)
|
||||
{
|
||||
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||
HandleError err;
|
||||
HandleSecurity sec;
|
||||
sec.pOwner = NULL;
|
||||
sec.pIdentity = myself->GetIdentity();
|
||||
|
||||
RegEx *x;
|
||||
|
||||
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||
{
|
||||
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||
}
|
||||
|
||||
if (!x)
|
||||
{
|
||||
return pCtx->ThrowNativeError("Regex data not found\n");
|
||||
}
|
||||
|
||||
if (params[2] >= x->mMatchCount || params[2] < 0)
|
||||
return pCtx->ThrowNativeError("Invalid match index passed.\n");
|
||||
|
||||
return x->mMatches[params[2]].mSubStringCount;
|
||||
}
|
||||
|
||||
static cell_t GetRegexOffset(IPluginContext *pCtx, const cell_t *params)
|
||||
{
|
||||
Handle_t hndl = static_cast<Handle_t>(params[1]);
|
||||
HandleError err;
|
||||
HandleSecurity sec;
|
||||
sec.pOwner = NULL;
|
||||
sec.pIdentity = myself->GetIdentity();
|
||||
|
||||
RegEx *x;
|
||||
|
||||
if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None)
|
||||
{
|
||||
return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err);
|
||||
}
|
||||
|
||||
if (!x)
|
||||
{
|
||||
return pCtx->ThrowNativeError("Regex data not found\n");
|
||||
}
|
||||
|
||||
if (params[2] >= x->mMatchCount || params[2] < 0)
|
||||
return pCtx->ThrowNativeError("Invalid match index passed.\n");
|
||||
|
||||
return x->mMatches[params[2]].mVector[1];
|
||||
}
|
||||
|
||||
void RegexHandler::OnHandleDestroy(HandleType_t type, void *object)
|
||||
@ -209,5 +350,9 @@ const sp_nativeinfo_t regex_natives[] =
|
||||
{"Regex.GetSubString", GetRegexSubString},
|
||||
{"Regex.Match", MatchRegex},
|
||||
{"Regex.Regex", CompileRegex},
|
||||
{"Regex.MatchAll", MatchRegexAll},
|
||||
{"Regex.MatchCount", GetRegexMatchCount},
|
||||
{"Regex.CaptureCount", GetRegexCaptureCount},
|
||||
{"Regex.MatchOffset", GetRegexOffset},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
@ -110,23 +110,58 @@ methodmap Regex < Handle
|
||||
// @param str The string to check.
|
||||
// @param regex Regex Handle from CompileRegex()
|
||||
// @param ret Error code, if applicable.
|
||||
// @return Number of substrings found or -1 on failure.
|
||||
// @param offset Offset in the string to start searching from. MatchOffset returns the offset of the match.
|
||||
// @return Number of captures found or -1 on failure.
|
||||
//
|
||||
// @note Use the regex handle passed to this function to extract
|
||||
// matches with GetRegexSubString().
|
||||
public native int Match(const char[] str, RegexError &ret = REGEX_ERROR_NONE);
|
||||
// matches with GetSubString().
|
||||
public native int Match(const char[] str, RegexError &ret = REGEX_ERROR_NONE, int offset = 0);
|
||||
|
||||
// Gets all matches from a string against a pre-compiled regular expression pattern.
|
||||
//
|
||||
// @param str The string to check.
|
||||
// @param regex Regex Handle from CompileRegex()
|
||||
// @param ret Error code, if applicable.
|
||||
// @return Number of matches found or -1 on failure.
|
||||
//
|
||||
// @note Use GetSubString() and loop from 1 -> totalmatches.
|
||||
public native int MatchAll(const char[] str, RegexError &ret = REGEX_ERROR_NONE);
|
||||
|
||||
// Returns a matched substring from a regex handle.
|
||||
//
|
||||
// Substring ids start at 0 and end at substrings-1, where substrings is the
|
||||
// number returned by Regex.Match.
|
||||
// Substring ids start at 0 and end at captures-1, where captures is the
|
||||
// number returned by Regex.Match or Regex.CaptureCount.
|
||||
//
|
||||
// @param regex The regex handle to extract data from.
|
||||
// @param str_id The index of the expression to get - starts at 0, and ends at substrings - 1.
|
||||
// @param str_id The index of the expression to get - starts at 0, and ends at captures - 1.
|
||||
// @param buffer The buffer to set to the matching substring.
|
||||
// @param maxlen The maximum string length of the buffer.
|
||||
// @param match Match to get the captures for - starts at 0, and ends at MatchCount() -1
|
||||
// @return True if a substring was found, False on fail/error
|
||||
public native bool GetSubString(int str_id, char[] buffer, int maxlen);
|
||||
//
|
||||
// @note str_id = 0 is the full captured string, anything else is the capture group index.
|
||||
// if Regex.Match is used match can only be 0
|
||||
public native bool GetSubString(int str_id, char[] buffer, int maxlen, int match = 0);
|
||||
|
||||
// Returns number of matches
|
||||
//
|
||||
// When using Match this is always 1 or 0 (unless an error occured)
|
||||
// @return Total number of matches found.
|
||||
public native int MatchCount();
|
||||
|
||||
// Returns number of captures for a match
|
||||
//
|
||||
// @param match Match to get the number of captures for. Match starts at 0, and ends at MatchCount() -1
|
||||
// @return Number of captures in the match.
|
||||
//
|
||||
// @note Use GetSubString() and loop from 1 -> captures -1 for str_id to get all captures
|
||||
public native int CaptureCount(int match = 0);
|
||||
|
||||
// Returns the string offset of a match.
|
||||
//
|
||||
// @param match Match to get the offset of. Match starts at 0, and ends at MatchCount() -1
|
||||
// @return Offset of the match in the string.
|
||||
public native int MatchOffset(int match = 0)
|
||||
};
|
||||
|
||||
/**
|
||||
@ -149,7 +184,7 @@ native Regex CompileRegex(const char[] pattern, int flags = 0, char[] error="",
|
||||
* @param str The string to check.
|
||||
* @param regex Regex Handle from CompileRegex()
|
||||
* @param ret Error code, if applicable.
|
||||
* @return Number of substrings found or -1 on failure.
|
||||
* @return Number of captures found or -1 on failure.
|
||||
*
|
||||
* @note Use the regex handle passed to this function to extract
|
||||
* matches with GetRegexSubString().
|
||||
@ -158,14 +193,17 @@ native int MatchRegex(Handle regex, const char[] str, RegexError &ret = REGEX_ER
|
||||
|
||||
/**
|
||||
* Returns a matched substring from a regex handle.
|
||||
* Substring ids start at 0 and end at substrings-1, where substrings is the number returned
|
||||
* by MatchRegex
|
||||
* Substring ids start at 0 and end at captures-1, where captures is the number returned
|
||||
* by MatchRegex.
|
||||
*
|
||||
* @param regex The regex handle to extract data from.
|
||||
* @param str_id The index of the expression to get - starts at 0, and ends at substrings - 1.
|
||||
* @param str_id The index of the expression to get - starts at 0, and ends at captures - 1.
|
||||
* @param buffer The buffer to set to the matching substring.
|
||||
* @param maxlen The maximum string length of the buffer.
|
||||
* @return True if a substring was found, False on fail/error
|
||||
*
|
||||
* @note str_id = 0 is the full captured string, anything else is the capture group index.
|
||||
*
|
||||
*/
|
||||
native bool GetRegexSubString(Handle regex, int str_id, char[] buffer, int maxlen);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user