aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormitchell <70453897+667e-11@users.noreply.github.com>2013-04-30 08:50:20 -0400
committermitchell <70453897+667e-11@users.noreply.github.com>2013-04-30 08:50:20 -0400
commit37e5d7bd93c6600259ebab54b95fbe3822df4bff (patch)
tree719c15b02797ef21771cc2c2e9c9cd7f7a3d0fb2
parent78990df4f114c45adc7fd2678ffaedf0c4124d95 (diff)
downloadtextadept-37e5d7bd93c6600259ebab54b95fbe3822df4bff.tar.gz
textadept-37e5d7bd93c6600259ebab54b95fbe3822df4bff.zip
Renamed `io.try_encodings` to `io.encodings`; core/file_io.lua
Also refactored encoding detection.
-rw-r--r--core/file_io.lua64
1 files changed, 20 insertions, 44 deletions
diff --git a/core/file_io.lua b/core/file_io.lua
index 145159a4..1b24586c 100644
--- a/core/file_io.lua
+++ b/core/file_io.lua
@@ -79,33 +79,11 @@ io.recent_files = {}
-- @class table
-- @name boms
io.boms = {
- ['UTF-16BE'] = '\254\255', ['UTF-16LE'] = '\255\254',
+ ['UTF-8'] = '\239\187\191',
+ ['UTF-16BE'] = '\254\255', ['UTF-16LE'] = '\255\254',
['UTF-32BE'] = '\0\0\254\255', ['UTF-32LE'] = '\255\254\0\0'
}
--- Attempt to detect the encoding of the given text.
--- @param text Text to determine encoding from.
--- @return encoding string for `string.iconv()` (unless `'binary'`, indicating a
--- binary file), byte-order mark (BOM) string or `nil`. If encoding string is
--- `nil`, no encoding has been detected.
-local function detect_encoding(text)
- local b1, b2, b3, b4 = string.byte(text, 1, 4)
- if b1 == 239 and b2 == 187 and b3 == 191 then
- return 'UTF-8', '\239\187\191'
- elseif b1 == 254 and b2 == 255 then
- return 'UTF-16BE', io.boms['UTF-16BE']
- elseif b1 == 255 and b2 == 254 then
- return 'UTF-16LE', io.boms['UTF-16LE']
- elseif b1 == 0 and b2 == 0 and b3 == 254 and b4 == 255 then
- return 'UTF-32BE', io.boms['UTF-32BE']
- elseif b1 == 255 and b2 == 254 and b3 == 0 and b4 == 0 then
- return 'UTF-32LE', io.boms['UTF-32LE']
- elseif text:sub(1, 65536):find('\0') then
- return 'binary'
- end
- return nil
-end
-
---
-- List of encodings to try to decode files as.
-- You should add to this list if you get a "Conversion failed" error when
@@ -134,10 +112,10 @@ end
-- UTF-16BE, UTF-16LE, UTF-32, UTF-32BE, UTF-32LE, UTF-7, C99, JAVA.
--
-- [GNU iconv's encodings]: http://www.gnu.org/software/libiconv/
--- @usage io.try_encodings[#io.try_encodings + 1] = 'UTF-16'
+-- @usage io.encodings[#io.encodings + 1] = 'UTF-16'
-- @class table
--- @name try_encodings
-io.try_encodings = {'UTF-8', 'ASCII', 'ISO-8859-1', 'MacRoman'}
+-- @name encodings
+io.encodings = {'UTF-8', 'ASCII', 'ISO-8859-1', 'MacRoman'}
---
-- Opens *utf8_filenames*, a "\n" delimited string of UTF-8-encoded filenames,
@@ -171,26 +149,24 @@ function io.open_file(utf8_filenames)
error(err)
end
local buffer = buffer.new()
- -- Tries to detect character encoding and convert text from it to UTF-8.
- local encoding, encoding_bom = detect_encoding(text)
- if encoding ~= 'binary' then
- if encoding then
- if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end
- text = text:iconv('UTF-8', encoding)
- else
- -- Try list of encodings.
- for _, try_encoding in ipairs(io.try_encodings) do
- local ok, conv = pcall(string.iconv, text, 'UTF-8', try_encoding)
- if ok then encoding, text = try_encoding, conv break end
- end
- if not encoding then error(_L['Encoding conversion failed.']) end
+ buffer.encoding, buffer.encoding_bom = nil, nil
+ -- Try to detect character encoding and convert to UTF-8.
+ for encoding, bom in pairs(io.boms) do
+ if text:sub(1, #bom) == bom then
+ buffer.encoding, buffer.encoding_bom = encoding, bom
+ text = text:sub(#bom + 1, -1):iconv('UTF-8', encoding)
+ break
end
- else
- encoding = nil
end
- buffer.encoding, buffer.encoding_bom = encoding, encoding_bom
+ if not buffer.encoding and not text:sub(1, 65536):find('\0') then
+ for i = 1, #io.encodings do
+ local ok, conv = pcall(string.iconv, text, 'UTF-8', io.encodings[i])
+ if ok then buffer.encoding, text = io.encodings[i], conv break end
+ end
+ if not buffer.encoding then error(_L['Encoding conversion failed.']) end
+ end
buffer.code_page = encoding and _SCINTILLA.constants.SC_CP_UTF8 or 0
- -- Tries to set the buffer's EOL mode appropriately based on the file.
+ -- Detect EOL mode.
local s, e = text:find('\r\n?')
if s and e then
buffer.eol_mode = (s == e and _SCINTILLA.constants.SC_EOL_CR or