From 5a6b12358d5013f294b962ccd0614dbf4447348b Mon Sep 17 00:00:00 2001 From: mitchell <70453897+667e-11@users.noreply.github.com> Date: Mon, 4 Jul 2016 21:44:50 -0400 Subject: Replaced Lua pattern matching with Regex via Scintilla and TRE. As a result, changed `ui.find.lua` to `ui.find.regex` Also removed luautf8 dependency since it is no longer needed. Regex replacements cannot contain embedded Lua code. Jumping to "find in files" results selects those results instead of just jumping to their respective lines. --- src/scintilla.patch | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) (limited to 'src/scintilla.patch') diff --git a/src/scintilla.patch b/src/scintilla.patch index a6111696..4731fef2 100644 --- a/src/scintilla.patch +++ b/src/scintilla.patch @@ -30,3 +30,164 @@ diff -r eb69b2b4bb85 gtk/ScintillaGTK.cxx object_class->finalize = Destroy; #if GTK_CHECK_VERSION(3,0,0) widget_class->get_preferred_width = GetPreferredWidth; +diff -r bfdfb44eb777 src/Document.cxx +--- a/src/Document.cxx Sun May 22 08:57:20 2016 +1000 ++++ b/src/Document.cxx Mon Jul 04 15:23:05 2016 -0400 +@@ -2845,3 +2845,157 @@ + #endif + + #endif ++ ++#include "tre.h" ++ ++class TreRegex : public RegexSearchBase { ++public: ++ explicit TreRegex() : lastS(NULL), lastSLen(0) {} ++ virtual ~TreRegex() { if (lastS) free(lastS), tre_regfree(&preg); } ++ virtual long FindText(Document *doc, int minPos, int maxPos, const char *s, ++ bool caseSensitive, bool word, bool wordStart, int flags, ++ int *length); ++ virtual const char *SubstituteByPosition(Document *doc, const char *text, ++ int *length); ++private: ++ char *lastS; ++ int lastSLen; ++ regex_t preg; ++ regmatch_t pmatch[10]; ++ std::string substituted; ++}; ++ ++long TreRegex::FindText(Document *doc, int minPos, int maxPos, const char *s, ++ bool caseSensitive, bool, bool, int, ++ int *length) { ++ // Determine the search range. (From Document.cxx::RESearchRange.) ++ int increment, startPos, endPos; ++ if (minPos <= maxPos) ++ increment = 1, startPos = minPos, endPos = maxPos; ++ else ++ increment = -1, startPos = maxPos, endPos = minPos; ++ // Range endpoints should not be inside DBCS characters, but just in case, ++ // move them. ++ startPos = doc->MovePositionOutsideChar(startPos, 1, false); ++ endPos = doc->MovePositionOutsideChar(endPos, 1, false); ++ int lineRangeStart = doc->LineFromPosition(startPos); ++ int lineRangeEnd = doc->LineFromPosition(endPos); ++ if (increment == 1 && startPos >= doc->LineEnd(lineRangeStart) && ++ lineRangeStart < lineRangeEnd) { ++ // The start position is at end of line or between line end characters. ++ lineRangeStart++; ++ startPos = doc->LineStart(lineRangeStart); ++ } else if (increment == -1 && startPos <= doc->LineStart(lineRangeStart) && ++ lineRangeStart > lineRangeEnd) { ++ // The start position is at beginning of line. ++ lineRangeStart--; ++ startPos = doc->LineEnd(lineRangeStart); ++ } ++ ++ // Compile the regex or used the cached one. ++ if (!lastS || lastSLen != *length || strncmp(lastS, s, *length) != 0) { ++ int cflags = REG_EXTENDED | (!caseSensitive ? REG_ICASE : 0) | REG_NEWLINE; ++ if (tre_regncomp(&preg, s, *length, cflags) != REG_OK) return -1; ++ if (lastS) free(lastS); ++ lastS = static_cast(malloc(*length + 1)); ++ strncpy(lastS, s, *length); ++ lastS[*length] = '\0'; ++ lastSLen = *length; ++ } ++ ++ // Perform the matching. ++ int pos = -1, lenRet = 0; ++ const char *string = doc->BufferPointer(); ++ size_t len = endPos - startPos; ++ int eflags = ((startPos != doc->LineStart(lineRangeStart)) ? REG_NOTBOL : 0) | ++ ((endPos != doc->LineEnd(lineRangeEnd)) ? REG_NOTEOL : 0); ++ int success = tre_regnexec(&preg, string + startPos, len, 10, pmatch, eflags) == REG_OK; ++ if (success) { ++ for (int i = 0; i < 10 && pmatch[i].rm_so != -1; i++) ++ pmatch[i].rm_so += startPos, pmatch[i].rm_eo += startPos; // adjust ++ pos = pmatch[0].rm_so, lenRet = pmatch[0].rm_eo - pmatch[0].rm_so; ++ if (increment == -1) { ++ // Check for the last match on this line. ++ int repetitions = 1000; // break out of infinite loop ++ while (success && pmatch[0].rm_eo <= endPos && repetitions--) { ++ success = tre_regnexec(&preg, string + pos + 1, len - (pos + 1), 10, ++ pmatch, eflags) == REG_OK; ++ if (success) { ++ for (int i = 0; i < 10 && pmatch[i].rm_so != -1; i++) ++ pmatch[i].rm_so += pos + 1, pmatch[i].rm_eo += pos + 1; // adjust ++ if (pmatch[0].rm_eo <= minPos) ++ pos = pmatch[0].rm_so, lenRet = pmatch[0].rm_eo - pmatch[0].rm_so; ++ else ++ success = 0; ++ } ++ } ++ } ++ } ++ *length = lenRet; ++ return pos; ++} ++ ++const char *TreRegex::SubstituteByPosition(Document *doc, const char *text, ++ int *length) { ++ substituted.clear(); ++ for (int j = 0; j < *length; j++) { ++ if (text[j] == '\\') { ++ if (text[j + 1] >= '0' && text[j + 1] <= '9') { ++ unsigned int patNum = text[j + 1] - '0'; ++ unsigned int len = pmatch[patNum].rm_eo - pmatch[patNum].rm_so; ++ if (len > 0) // will be -1 for a match that did not occur ++ substituted.append(doc->BufferPointer() + pmatch[patNum].rm_so, len); ++ j++; ++ } else { ++ j++; ++ switch (text[j]) { ++ case 'a': ++ substituted.push_back('\a'); ++ break; ++ case 'b': ++ substituted.push_back('\b'); ++ break; ++ case 'f': ++ substituted.push_back('\f'); ++ break; ++ case 'n': ++ substituted.push_back('\n'); ++ break; ++ case 'r': ++ substituted.push_back('\r'); ++ break; ++ case 't': ++ substituted.push_back('\t'); ++ break; ++ case 'v': ++ substituted.push_back('\v'); ++ break; ++ case '\\': ++ substituted.push_back('\\'); ++ break; ++ default: ++ substituted.push_back('\\'); ++ j--; ++ } ++ } ++ } else { ++ substituted.push_back(text[j]); ++ } ++ } ++ *length = static_cast(substituted.length()); ++ return substituted.c_str(); ++} ++ ++#ifdef SCI_NAMESPACE ++ ++RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) { ++ return new TreRegex(); ++} ++ ++#else ++ ++RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) { ++ return new TreRegex(); ++} ++ ++#endif -- cgit v1.2.3