From 06f4637430d0afe0e0b910952bc666cf2e9b1a8c Mon Sep 17 00:00:00 2001 From: mitchell <70453897+667e-11@users.noreply.github.com> Date: Sat, 20 Jun 2009 13:18:42 -0400 Subject: Try list of encodings to load if none was detected rather than just UTF-8. Any non-detected encoding that was not UTF-8 would cause an error on load since it was treated as UTF-8. --- core/file_io.lua | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'core/file_io.lua') diff --git a/core/file_io.lua b/core/file_io.lua index 11436cbb..45f38629 100644 --- a/core/file_io.lua +++ b/core/file_io.lua @@ -35,8 +35,9 @@ boms = { --- -- [Local function] Attempt to detect the encoding of the given text. -- @param text Text to determine encoding from. --- @return encoding string for textadept.iconv(), byte-order mark (BOM) string --- or nil. If encoding string is nil, the text belongs to a binary file. +-- @return encoding string for textadept.iconv() (unless 'binary', indicating a +-- binary file), byte-order mark (BOM) string or nil. If encoding string is +-- nil, no encoding has been detected. local function detect_encoding(text) local b1, b2, b3, b4 = string.byte(text, 1, 4) if b1 == 239 and b2 == 187 and b3 == 191 then @@ -51,11 +52,22 @@ local function detect_encoding(text) return 'UTF-32LE', boms[encoding] else local chunk = #text > 65536 and text:sub(1, 65536) or text - if chunk:find('\0') then return nil end -- binary file + if chunk:find('\0') then return 'binary' end -- binary file end - return 'UTF-8' + return nil end +--- +-- [Local table] List of encodings to try to decode files as after UTF-8. +-- @class table +-- @name try_encodings +local try_encodings = { + 'UTF-8', + 'ASCII', + 'ISO-8859-1', + 'MacRoman' +} + --- -- [Local function] Opens a file or goes to its already open buffer. -- @param utf8_filename The absolute path to the file to open. Must be UTF-8 @@ -82,8 +94,25 @@ local function open_helper(utf8_filename) local c = textadept.constants -- Tries to detect character encoding and convert text from it to UTF-8. local encoding, encoding_bom = detect_encoding(text) - if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end - if encoding then text = textadept.iconv(text, 'UTF-8', encoding) end + if encoding ~= 'binary' then + if encoding then + if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end + text = textadept.iconv(text, 'UTF-8', encoding) + else + -- Try list of encodings. + for _, try_encoding in ipairs(try_encodings) do + local ret, conv = pcall(textadept.iconv, text, 'UTF-8', try_encoding) + if ret then + encoding = try_encoding + text = conv + break + end + end + if not encoding then error(locale.IO_ICONV_ERROR) end + end + else + encoding = nil + end buffer.encoding, buffer.encoding_bom = encoding, encoding_bom buffer.code_page = encoding and c.SC_CP_UTF8 or 0 -- Tries to set the buffer's EOL mode appropriately based on the file. -- cgit v1.2.3