diff --git a/extensions/regex/CRegEx.cpp b/extensions/regex/CRegEx.cpp index 050bda39..3f57d900 100644 --- a/extensions/regex/CRegEx.cpp +++ b/extensions/regex/CRegEx.cpp @@ -33,7 +33,6 @@ #include "pcre.h" #include "CRegEx.h" -#include #include "extension.h" RegEx::RegEx() @@ -43,7 +42,7 @@ RegEx::RegEx() re = NULL; mFree = true; subject = NULL; - mSubStrings = 0; + mMatchCount = 0; } void RegEx::Clear () @@ -57,7 +56,7 @@ void RegEx::Clear () if (subject) delete [] subject; subject = NULL; - mSubStrings = 0; + mMatchCount = 0; } RegEx::~RegEx() @@ -93,7 +92,7 @@ int RegEx::Compile(const char *pattern, int iFlags) return 1; } -int RegEx::Match(const char *str) +int RegEx::Match(const char *str, unsigned int offset) { int rc = 0; @@ -106,7 +105,9 @@ int RegEx::Match(const char *str) subject = new char[strlen(str)+1]; strcpy(subject, str); - rc = pcre_exec(re, NULL, subject, (int)strlen(subject), 0, 0, ovector, 30); + unsigned int len = strlen(subject); + + rc = pcre_exec(re, NULL, subject, len, offset, 0, mMatches[0].mVector, MAX_CAPTURES); if (rc < 0) { @@ -119,10 +120,54 @@ int RegEx::Match(const char *str) } } - mSubStrings = rc; + mMatches[0].mSubStringCount = rc; + mMatchCount = 1; return 1; } + +int RegEx::MatchAll(const char *str) +{ + int rc = 0; + + if (mFree || re == NULL) + return -1; + + this->ClearMatch(); + + //save str + subject = new char[strlen(str) + 1]; + strcpy(subject, str); + + unsigned int offset = 0; + unsigned int len = strlen(subject); + unsigned int matches = 0; + + while (matches < MAX_MATCHES && offset < len && (rc = pcre_exec(re, 0, subject, len, offset, 0, mMatches[matches].mVector, MAX_CAPTURES)) >= 0) + { + offset = mMatches[matches].mVector[1]; + mMatches[matches].mSubStringCount = rc; + + matches++; + } + + if (rc < PCRE_ERROR_NOMATCH || (rc == PCRE_ERROR_NOMATCH && matches == 0)) + { + if (rc == PCRE_ERROR_NOMATCH) + { + return 0; + } + else { + mErrorOffset = rc; + return -1; + } + } + + mMatchCount = matches; + + return 1; +} + void RegEx::ClearMatch() { // Clears match results @@ -131,17 +176,18 @@ void RegEx::ClearMatch() if (subject) delete [] subject; subject = NULL; - mSubStrings = 0; + mMatchCount = 0; } -const char *RegEx::GetSubstring(int s, char buffer[], int max) +bool RegEx::GetSubstring(int s, char buffer[], int max, int match) { int i = 0; - if (s >= mSubStrings || s < 0) - return NULL; - char *substr_a = subject + ovector[2*s]; - int substr_l = ovector[2*s+1] - ovector[2*s]; + if (s >= mMatches[match].mSubStringCount || s < 0) + return false; + + char *substr_a = subject + mMatches[match].mVector[2 * s]; + int substr_l = mMatches[match].mVector[2 * s + 1] - mMatches[match].mVector[2 * s]; for (i = 0; i #ifndef _INCLUDE_CREGEX_H #define _INCLUDE_CREGEX_H +#define MAX_MATCHES 20 +#define MAX_CAPTURES MAX_MATCHES*3 + +struct RegexMatch +{ + int mSubStringCount; + int mVector[MAX_CAPTURES]; +}; + class RegEx { public: @@ -41,17 +51,18 @@ public: void Clear(); int Compile(const char *pattern, int iFlags); - int Match(const char *str); + int Match(const char *str, unsigned int offset); + int MatchAll(const char *str); void ClearMatch(); - const char *GetSubstring(int s, char buffer[], int max); + bool GetSubstring(int s, char buffer[], int max, int match); public: int mErrorOffset; const char *mError; - int mSubStrings; + int mMatchCount; + RegexMatch mMatches[MAX_MATCHES]; private: pcre *re; bool mFree; - int ovector[30]; char *subject; }; diff --git a/extensions/regex/extension.cpp b/extensions/regex/extension.cpp index c35015b8..975d7eed 100644 --- a/extensions/regex/extension.cpp +++ b/extensions/regex/extension.cpp @@ -112,6 +112,13 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params) sec.pOwner = NULL; sec.pIdentity = myself->GetIdentity(); + unsigned int offset = 0; + + if (params[0] >= 4) + { + offset = (unsigned int)params[4]; + } + RegEx *x; if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None) @@ -129,7 +136,10 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params) char *str; pCtx->LocalToString(params[2], &str); - int e = x->Match(str); + if(offset >= strlen(str)) + return pCtx->ThrowNativeError("Invalid string index\n"); + + int e = x->Match(str, offset); if (e == -1) { @@ -153,7 +163,60 @@ static cell_t MatchRegex(IPluginContext *pCtx, const cell_t *params) } else { - return x->mSubStrings; + return x->mMatches[0].mSubStringCount; + } +} + +static cell_t MatchRegexAll(IPluginContext *pCtx, const cell_t *params) +{ + Handle_t hndl = static_cast(params[1]); + HandleError err; + HandleSecurity sec; + sec.pOwner = NULL; + sec.pIdentity = myself->GetIdentity(); + + RegEx *x; + + if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None) + { + return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err); + } + + if (!x) + { + pCtx->ThrowNativeError("Regex data not found\n"); + + return 0; + } + + char *str; + pCtx->LocalToString(params[2], &str); + + int e = x->MatchAll(str); + + if (e == -1) + { + /* there was a match error. move on. */ + cell_t *res; + pCtx->LocalToPhysAddr(params[3], &res); + *res = x->mErrorOffset; + /* only clear the match results, since the regex object + may still be referenced later */ + x->ClearMatch(); + + return -1; + } + else if (e == 0) + { + /* only clear the match results, since the regex object + may still be referenced later */ + x->ClearMatch(); + + return 0; + } + else + { + return x->mMatchCount; } } @@ -165,6 +228,8 @@ static cell_t GetRegexSubString(IPluginContext *pCtx, const cell_t *params) sec.pOwner=NULL; sec.pIdentity=myself->GetIdentity(); + int match = 0; + RegEx *x; if ((err=g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None) @@ -178,17 +243,93 @@ static cell_t GetRegexSubString(IPluginContext *pCtx, const cell_t *params) return 0; } - static char buffer[4096]; - const char *ret=x->GetSubstring(params[2], buffer, sizeof(buffer)); - - if(!ret) + if (params[0] >= 5) { - return 0; + match = params[5]; } - pCtx->StringToLocalUTF8(params[3], params[4], ret, NULL); + if(match >= x->mMatchCount || match < 0) + return pCtx->ThrowNativeError("Invalid match index passed.\n"); - return 1; + char *buffer; + pCtx->LocalToString(params[3], &buffer); + + return x->GetSubstring(params[2], buffer, params[4], match); +} + +static cell_t GetRegexMatchCount(IPluginContext *pCtx, const cell_t *params) +{ + Handle_t hndl = static_cast(params[1]); + HandleError err; + HandleSecurity sec; + sec.pOwner = NULL; + sec.pIdentity = myself->GetIdentity(); + + RegEx *x; + + if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None) + { + return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err); + } + + if (!x) + { + return pCtx->ThrowNativeError("Regex data not found\n"); + } + + return x->mMatchCount; +} + +static cell_t GetRegexCaptureCount(IPluginContext *pCtx, const cell_t *params) +{ + Handle_t hndl = static_cast(params[1]); + HandleError err; + HandleSecurity sec; + sec.pOwner = NULL; + sec.pIdentity = myself->GetIdentity(); + + RegEx *x; + + if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None) + { + return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err); + } + + if (!x) + { + return pCtx->ThrowNativeError("Regex data not found\n"); + } + + if (params[2] >= x->mMatchCount || params[2] < 0) + return pCtx->ThrowNativeError("Invalid match index passed.\n"); + + return x->mMatches[params[2]].mSubStringCount; +} + +static cell_t GetRegexOffset(IPluginContext *pCtx, const cell_t *params) +{ + Handle_t hndl = static_cast(params[1]); + HandleError err; + HandleSecurity sec; + sec.pOwner = NULL; + sec.pIdentity = myself->GetIdentity(); + + RegEx *x; + + if ((err = g_pHandleSys->ReadHandle(hndl, g_RegexHandle, &sec, (void **)&x)) != HandleError_None) + { + return pCtx->ThrowNativeError("Invalid regex handle %x (error %d)", hndl, err); + } + + if (!x) + { + return pCtx->ThrowNativeError("Regex data not found\n"); + } + + if (params[2] >= x->mMatchCount || params[2] < 0) + return pCtx->ThrowNativeError("Invalid match index passed.\n"); + + return x->mMatches[params[2]].mVector[1]; } void RegexHandler::OnHandleDestroy(HandleType_t type, void *object) @@ -209,5 +350,9 @@ const sp_nativeinfo_t regex_natives[] = {"Regex.GetSubString", GetRegexSubString}, {"Regex.Match", MatchRegex}, {"Regex.Regex", CompileRegex}, + {"Regex.MatchAll", MatchRegexAll}, + {"Regex.MatchCount", GetRegexMatchCount}, + {"Regex.CaptureCount", GetRegexCaptureCount}, + {"Regex.MatchOffset", GetRegexOffset}, {NULL, NULL}, }; diff --git a/plugins/include/regex.inc b/plugins/include/regex.inc index 2d711b90..8ebdc4f1 100644 --- a/plugins/include/regex.inc +++ b/plugins/include/regex.inc @@ -110,23 +110,58 @@ methodmap Regex < Handle // @param str The string to check. // @param regex Regex Handle from CompileRegex() // @param ret Error code, if applicable. - // @return Number of substrings found or -1 on failure. + // @param offset Offset in the string to start searching from. MatchOffset returns the offset of the match. + // @return Number of captures found or -1 on failure. // // @note Use the regex handle passed to this function to extract - // matches with GetRegexSubString(). - public native int Match(const char[] str, RegexError &ret = REGEX_ERROR_NONE); + // matches with GetSubString(). + public native int Match(const char[] str, RegexError &ret = REGEX_ERROR_NONE, int offset = 0); + + // Gets all matches from a string against a pre-compiled regular expression pattern. + // + // @param str The string to check. + // @param regex Regex Handle from CompileRegex() + // @param ret Error code, if applicable. + // @return Number of matches found or -1 on failure. + // + // @note Use GetSubString() and loop from 1 -> totalmatches. + public native int MatchAll(const char[] str, RegexError &ret = REGEX_ERROR_NONE); // Returns a matched substring from a regex handle. // - // Substring ids start at 0 and end at substrings-1, where substrings is the - // number returned by Regex.Match. + // Substring ids start at 0 and end at captures-1, where captures is the + // number returned by Regex.Match or Regex.CaptureCount. // // @param regex The regex handle to extract data from. - // @param str_id The index of the expression to get - starts at 0, and ends at substrings - 1. + // @param str_id The index of the expression to get - starts at 0, and ends at captures - 1. // @param buffer The buffer to set to the matching substring. // @param maxlen The maximum string length of the buffer. + // @param match Match to get the captures for - starts at 0, and ends at MatchCount() -1 // @return True if a substring was found, False on fail/error - public native bool GetSubString(int str_id, char[] buffer, int maxlen); + // + // @note str_id = 0 is the full captured string, anything else is the capture group index. + // if Regex.Match is used match can only be 0 + public native bool GetSubString(int str_id, char[] buffer, int maxlen, int match = 0); + + // Returns number of matches + // + // When using Match this is always 1 or 0 (unless an error occured) + // @return Total number of matches found. + public native int MatchCount(); + + // Returns number of captures for a match + // + // @param match Match to get the number of captures for. Match starts at 0, and ends at MatchCount() -1 + // @return Number of captures in the match. + // + // @note Use GetSubString() and loop from 1 -> captures -1 for str_id to get all captures + public native int CaptureCount(int match = 0); + + // Returns the string offset of a match. + // + // @param match Match to get the offset of. Match starts at 0, and ends at MatchCount() -1 + // @return Offset of the match in the string. + public native int MatchOffset(int match = 0) }; /** @@ -149,7 +184,7 @@ native Regex CompileRegex(const char[] pattern, int flags = 0, char[] error="", * @param str The string to check. * @param regex Regex Handle from CompileRegex() * @param ret Error code, if applicable. - * @return Number of substrings found or -1 on failure. + * @return Number of captures found or -1 on failure. * * @note Use the regex handle passed to this function to extract * matches with GetRegexSubString(). @@ -158,14 +193,17 @@ native int MatchRegex(Handle regex, const char[] str, RegexError &ret = REGEX_ER /** * Returns a matched substring from a regex handle. - * Substring ids start at 0 and end at substrings-1, where substrings is the number returned - * by MatchRegex + * Substring ids start at 0 and end at captures-1, where captures is the number returned + * by MatchRegex. * * @param regex The regex handle to extract data from. - * @param str_id The index of the expression to get - starts at 0, and ends at substrings - 1. + * @param str_id The index of the expression to get - starts at 0, and ends at captures - 1. * @param buffer The buffer to set to the matching substring. * @param maxlen The maximum string length of the buffer. * @return True if a substring was found, False on fail/error + * + * @note str_id = 0 is the full captured string, anything else is the capture group index. + * */ native bool GetRegexSubString(Handle regex, int str_id, char[] buffer, int maxlen);