diff options
Diffstat (limited to 'core')
-rw-r--r-- | core/file_io.lua | 41 | ||||
-rw-r--r-- | core/locale.conf | 4 |
2 files changed, 39 insertions, 6 deletions
diff --git a/core/file_io.lua b/core/file_io.lua index 11436cbb..45f38629 100644 --- a/core/file_io.lua +++ b/core/file_io.lua @@ -35,8 +35,9 @@ boms = { --- -- [Local function] Attempt to detect the encoding of the given text. -- @param text Text to determine encoding from. --- @return encoding string for textadept.iconv(), byte-order mark (BOM) string --- or nil. If encoding string is nil, the text belongs to a binary file. +-- @return encoding string for textadept.iconv() (unless 'binary', indicating a +-- binary file), byte-order mark (BOM) string or nil. If encoding string is +-- nil, no encoding has been detected. local function detect_encoding(text) local b1, b2, b3, b4 = string.byte(text, 1, 4) if b1 == 239 and b2 == 187 and b3 == 191 then @@ -51,12 +52,23 @@ local function detect_encoding(text) return 'UTF-32LE', boms[encoding] else local chunk = #text > 65536 and text:sub(1, 65536) or text - if chunk:find('\0') then return nil end -- binary file + if chunk:find('\0') then return 'binary' end -- binary file end - return 'UTF-8' + return nil end --- +-- [Local table] List of encodings to try to decode files as after UTF-8. +-- @class table +-- @name try_encodings +local try_encodings = { + 'UTF-8', + 'ASCII', + 'ISO-8859-1', + 'MacRoman' +} + +--- -- [Local function] Opens a file or goes to its already open buffer. -- @param utf8_filename The absolute path to the file to open. Must be UTF-8 -- encoded. @@ -82,8 +94,25 @@ local function open_helper(utf8_filename) local c = textadept.constants -- Tries to detect character encoding and convert text from it to UTF-8. local encoding, encoding_bom = detect_encoding(text) - if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end - if encoding then text = textadept.iconv(text, 'UTF-8', encoding) end + if encoding ~= 'binary' then + if encoding then + if encoding_bom then text = text:sub(#encoding_bom + 1, -1) end + text = textadept.iconv(text, 'UTF-8', encoding) + else + -- Try list of encodings. + for _, try_encoding in ipairs(try_encodings) do + local ret, conv = pcall(textadept.iconv, text, 'UTF-8', try_encoding) + if ret then + encoding = try_encoding + text = conv + break + end + end + if not encoding then error(locale.IO_ICONV_ERROR) end + end + else + encoding = nil + end buffer.encoding, buffer.encoding_bom = encoding, encoding_bom buffer.code_page = encoding and c.SC_CP_UTF8 or 0 -- Tries to set the buffer's EOL mode appropriately based on the file. diff --git a/core/locale.conf b/core/locale.conf index 5edabdaa..50834bf9 100644 --- a/core/locale.conf +++ b/core/locale.conf @@ -68,6 +68,10 @@ EVENTS_QUIT_MSG "The following buffers are unsaved:\n\n%s\n\nYou will have to sa ERROR_BUFFER "[Error Buffer]" % core/file_io.lua +% "Encoding conversion failed." +IO_ICONV_ERROR "Encoding conversion failed." + +% core/file_io.lua % "Open" IO_OPEN_TITLE "Open" |