# HG changeset patch # User Neil # Date 1488408061 -39600 # Node ID b82fe8d33961050a7905d6588eef858b85ca804c # Parent 29534cb8eafb48f9eacc574b8063e9d66520ae8a Fix potential problems with IME on Cocoa when document contains invalid UTF-8. diff -r 29534cb8eafb -r b82fe8d33961 cocoa/ScintillaCocoa.h --- a/cocoa/ScintillaCocoa.h Thu Mar 02 09:08:04 2017 +1100 +++ b/cocoa/ScintillaCocoa.h Thu Mar 02 09:41:01 2017 +1100 @@ -48,6 +48,7 @@ #include "CaseFolder.h" #include "Document.h" #include "CaseConvert.h" +#include "UniConversion.h" #include "Selection.h" #include "PositionCache.h" #include "EditModel.h" diff -r 29534cb8eafb -r b82fe8d33961 cocoa/ScintillaView.mm --- a/cocoa/ScintillaView.mm Thu Mar 02 09:08:04 2017 +1100 +++ b/cocoa/ScintillaView.mm Thu Mar 02 09:41:01 2017 +1100 @@ -437,6 +437,7 @@ [mOwner message: SCI_SETTARGETRANGE wParam: posRange.location lParam: NSMaxRange(posRange)]; std::string text([mOwner message: SCI_TARGETASUTF8] + 1, 0); [mOwner message: SCI_TARGETASUTF8 wParam: 0 lParam: reinterpret_cast(&text[0])]; + text = FixInvalidUTF8(text); NSString *result = [NSString stringWithUTF8String: text.c_str()]; NSMutableAttributedString *asResult = [[[NSMutableAttributedString alloc] initWithString:result] autorelease]; diff -r 29534cb8eafb -r b82fe8d33961 doc/ScintillaHistory.html --- a/doc/ScintillaHistory.html Thu Mar 02 09:08:04 2017 +1100 +++ b/doc/ScintillaHistory.html Thu Mar 02 09:41:01 2017 +1100 @@ -537,6 +537,10 @@ Bug #1881.
  • + Fix potential problems with IME on Cocoa when document contains invalid + UTF-8. +
  • +
  • Fix crash on Cocoa with OS X 10.9 due to accessibility API not available. Bug #1915.
  • diff -r 29534cb8eafb -r b82fe8d33961 src/UniConversion.cxx --- a/src/UniConversion.cxx Thu Mar 02 09:08:04 2017 +1100 +++ b/src/UniConversion.cxx Thu Mar 02 09:41:01 2017 +1100 @@ -8,6 +8,7 @@ #include #include +#include #include "UniConversion.h" @@ -304,6 +305,28 @@ return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth); } +// Replace invalid bytes in UTF-8 with the replacement character +std::string FixInvalidUTF8(const std::string &text) { + std::string result; + const unsigned char *us = reinterpret_cast(text.c_str()); + size_t remaining = text.size(); + while (remaining > 0) { + const int utf8Status = UTF8Classify(us, static_cast(remaining)); + if (utf8Status & UTF8MaskInvalid) { + // Replacement character 0xFFFD = UTF8:"efbfbd". + result.append("\xef\xbf\xbd"); + us++; + remaining--; + } else { + const int len = utf8Status&UTF8MaskWidth; + result.append(reinterpret_cast(us), len); + us += len; + remaining -= len; + } + } + return result; +} + #ifdef SCI_NAMESPACE } #endif diff -r 29534cb8eafb -r b82fe8d33961 src/UniConversion.h --- a/src/UniConversion.h Thu Mar 02 09:08:04 2017 +1100 +++ b/src/UniConversion.h Thu Mar 02 09:41:01 2017 +1100 @@ -23,6 +23,7 @@ size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen); unsigned int UTF32FromUTF8(const char *s, unsigned int len, unsigned int *tbuf, unsigned int tlen); unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf); +std::string FixInvalidUTF8(const std::string &text); extern int UTF8BytesOfLead[256]; void UTF8BytesOfLeadInitialise();