1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
# HG changeset patch
# User Neil <nyamatongwe@gmail.com>
# Date 1488408061 -39600
# Node ID b82fe8d33961050a7905d6588eef858b85ca804c
# Parent 29534cb8eafb48f9eacc574b8063e9d66520ae8a
Fix potential problems with IME on Cocoa when document contains invalid UTF-8.
diff -r 29534cb8eafb -r b82fe8d33961 cocoa/ScintillaCocoa.h
--- a/cocoa/ScintillaCocoa.h Thu Mar 02 09:08:04 2017 +1100
+++ b/cocoa/ScintillaCocoa.h Thu Mar 02 09:41:01 2017 +1100
@@ -48,6 +48,7 @@
#include "CaseFolder.h"
#include "Document.h"
#include "CaseConvert.h"
+#include "UniConversion.h"
#include "Selection.h"
#include "PositionCache.h"
#include "EditModel.h"
diff -r 29534cb8eafb -r b82fe8d33961 cocoa/ScintillaView.mm
--- a/cocoa/ScintillaView.mm Thu Mar 02 09:08:04 2017 +1100
+++ b/cocoa/ScintillaView.mm Thu Mar 02 09:41:01 2017 +1100
@@ -437,6 +437,7 @@
[mOwner message: SCI_SETTARGETRANGE wParam: posRange.location lParam: NSMaxRange(posRange)];
std::string text([mOwner message: SCI_TARGETASUTF8] + 1, 0);
[mOwner message: SCI_TARGETASUTF8 wParam: 0 lParam: reinterpret_cast<sptr_t>(&text[0])];
+ text = FixInvalidUTF8(text);
NSString *result = [NSString stringWithUTF8String: text.c_str()];
NSMutableAttributedString *asResult = [[[NSMutableAttributedString alloc] initWithString:result] autorelease];
diff -r 29534cb8eafb -r b82fe8d33961 doc/ScintillaHistory.html
--- a/doc/ScintillaHistory.html Thu Mar 02 09:08:04 2017 +1100
+++ b/doc/ScintillaHistory.html Thu Mar 02 09:41:01 2017 +1100
@@ -537,6 +537,10 @@
<a href="http://sourceforge.net/p/scintilla/bugs/1881/">Bug #1881</a>.
</li>
<li>
+ Fix potential problems with IME on Cocoa when document contains invalid
+ UTF-8.
+ </li>
+ <li>
Fix crash on Cocoa with OS X 10.9 due to accessibility API not available.
<a href="http://sourceforge.net/p/scintilla/bugs/1915/">Bug #1915</a>.
</li>
diff -r 29534cb8eafb -r b82fe8d33961 src/UniConversion.cxx
--- a/src/UniConversion.cxx Thu Mar 02 09:08:04 2017 +1100
+++ b/src/UniConversion.cxx Thu Mar 02 09:41:01 2017 +1100
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <stdexcept>
+#include <string>
#include "UniConversion.h"
@@ -304,6 +305,28 @@
return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
}
+// Replace invalid bytes in UTF-8 with the replacement character
+std::string FixInvalidUTF8(const std::string &text) {
+ std::string result;
+ const unsigned char *us = reinterpret_cast<const unsigned char *>(text.c_str());
+ size_t remaining = text.size();
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(us, static_cast<int>(remaining));
+ if (utf8Status & UTF8MaskInvalid) {
+ // Replacement character 0xFFFD = UTF8:"efbfbd".
+ result.append("\xef\xbf\xbd");
+ us++;
+ remaining--;
+ } else {
+ const int len = utf8Status&UTF8MaskWidth;
+ result.append(reinterpret_cast<const char *>(us), len);
+ us += len;
+ remaining -= len;
+ }
+ }
+ return result;
+}
+
#ifdef SCI_NAMESPACE
}
#endif
diff -r 29534cb8eafb -r b82fe8d33961 src/UniConversion.h
--- a/src/UniConversion.h Thu Mar 02 09:08:04 2017 +1100
+++ b/src/UniConversion.h Thu Mar 02 09:41:01 2017 +1100
@@ -23,6 +23,7 @@
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
unsigned int UTF32FromUTF8(const char *s, unsigned int len, unsigned int *tbuf, unsigned int tlen);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf);
+std::string FixInvalidUTF8(const std::string &text);
extern int UTF8BytesOfLead[256];
void UTF8BytesOfLeadInitialise();
|