Scite Unicode Input |
|
This script is also provided as a GPLv2 project on sourceforge: http://sourceforge.net/projects/emitunicodeinscite/
Enjoy!
-- DESCRIPTION: -- -- This lua script adds utf8 unicode input, to the scite text editor. -- -- The scite text editor should be set to use the UTF-8 encoding -- , because this script adds utf8, into the text buffer of the -- scite editor. Select File->Encoding->UTF-8, from the -- menu bar of scite. -- -- For example, it will be possible that you type 2200 CTRL+U -- , and 2200 is replaced to ∀; (U+2200), in the scite editor. -- -- ______________________________________________________________________________ -- -- INSTALL: -- -- To have scite running this script each time you press Ctrl+U, add next lines -- into your ~/SciTEUser.properties file, where ~ is your home directory. -- FILE ~/SciTEUser.properties: --[[ ext.lua.startup.script=$(SciteUserHome)/emitUtf8UnicodeIntoTheSciteEditor.lua command.name.12.*=Emit UTF8 Unicode command.subsystem.12.*=3 command.12.*=emitUtf8UnicodeIntoTheSciteEditor command.mode.12.*=savebefore:no command.shortcut.12.*=Ctrl+U --]] -- ______________________________________________________________________________ -- THE LUA CODE: -- -- Next is the definition of the lua function that is called by scite -- when CTRL+U is pressed, to replace unicode endpoint encoding, with -- utf8 encoding of the unicode endpoint. -- ______________________________________________________________________________ -- Computes the utf8 encoding for a unicode codepoint u -- , when 0 <= u <= 0x7f -- -- @param unicodeValue the unicode codepoint u -- -- @return the utf8 encoding of the unicode codepoint u function case1UnicodeToUtf8(unicodeValue) --print('case 1') local u = unicodeValue local byte0 = (u % 0x80) local utf8 = string.char(byte0) return utf8 end -- ______________________________________________________________________________ -- Computes the utf8 encoding for a unicode codepoint u -- , when 0x80 <= u <= 0x7ff -- -- @param unicodeValue the unicode codepoint u -- -- @return the utf8 encoding of the unicode codepoint u function case2UnicodeToUtf8(unicodeValue) --print('case 2') local u = unicodeValue local byte1 = (0x80 + (u % 0x40) ) u = math.floor(u / 0x40) local byte0 = (0xc0 + (u % 0x20) ) local utf8 = string.char(byte0, byte1) return utf8 end -- ______________________________________________________________________________ -- Computes the utf8 encoding for a unicode codepoint u -- , when 0x800 <= u <= 0xffff. -- -- @param unicodeValue the unicode codepoint u -- -- @return the utf8 encoding of the unicode codepoint u function case3UnicodeToUtf8(unicodeValue) local u = unicodeValue local byte2 = (0x80 + (u % 0x40)) -- print('byte2: '..byte2) u = math.floor(u / 0x40) local byte1 = (0x80 + (u % 0x40)) -- print('byte1: '..byte1) u = math.floor(u / 0x40) local byte0 = (0xe0 + (u % 0x10)) -- print('byte0: '..byte0) local utf8 = string.char(byte0, byte1, byte2) return utf8 end -- ______________________________________________________________________________ -- Computes the utf8 encoding for a unicode codepoint u -- , when 0x10000 <= u <= 0x10ffff. -- -- @param unicodeValue the unicode codepoint u -- -- @return the utf8 encoding of the unicode codepoint u function case4UnicodeToUtf8(unicodeValue) local u = unicodeValue local byte3 = (0x80 + (u % 0x40)) u = math.floor(u / 0x40) local byte2 = (0x80 + (u % 0x40)) u = math.floor(u / 0x40) local byte1 = (0x80 + (u % 0x40)) u = math.floor(u / 0x40) local byte0 = (0xf0 + (u % 0x8)) local utf8 = string.char(byte0, byte1, byte2, byte3) return utf8 end -- ______________________________________________________________________________ -- Converts a unicode integer value, into a utf8 string value. -- -- The unicode integer value is an integer that -- is greater than or equal to zero. -- -- The utf8 string value is a string that is a sequence of -- 8 bits characters that give the utf8 encoding of the -- unicode codepoint given by the unicode integer value. -- -- @param unicodeValue the unicode integer value; -- a unicode codepoint -- -- @return the utf8 encoding of the unicode codepoint -- provided by the unicodeValue input argument function unicodeToUtf8(unicodeValue) local u = unicodeValue if ((0x800 <= u) and (0xffff >= u)) then return case3UnicodeToUtf8(u) end if ((0x80 <= u) and (0x7fff >= u)) then return case2UnicodeToUtf8(u) end if ((0x0 <= u) and (0x7f >= u)) then return case1UnicodeToUtf8(u) end if( (0x10000 <= u) and (0x10ffff >= u) ) then return case4UnicodeToUtf8(u) end return nil end -- ______________________________________________________________________________ -- Peeks (reads) the character at position i, in the Scite Editor. -- If the character is the ascii name of a hex digit, it returns -- the corresponding hex digit, otherwise it returns nil. -- -- @param i position in the Scite Editor -- @return hex digit at position i, or nil function peekHexdigit(i) local e = editor local asciiCode = e.CharAt[i] if((0>asciiCode) or (0xff < asciiCode)) then return nil end local charValue = string.char(asciiCode) local hexDigit = tonumber(charValue,0x10) return hexDigit -- may be nil end -- ______________________________________________________________________________ -- Reads the sequence of maximum length at most 5, at the left of the cursor -- in the Scite Editor. -- Encodes the longest suffix of this sequence, that is a hex number, into -- the utf encoding of this hex number. -- Replaces this longest suffix, with the utf8 sequence. -- -- @return true a suffix of length greater than zero, at most 5 existed -- and was replaced with the utf8 encoding of the number it -- represented -- -- false , when no such suffix existed function emitUtf8Unicode() local e = editor local n = e.TextLength local i = e.CurrentPos local maxlen = 5 if ((0 == n) or (1 > i)) then return nil -- Success. No request end local len = 1 local len2 = 0 local u = 0 local thePower = 1 while ( (len <= maxlen) and (0 <= (i - len) ) ) do local hexDigit = peekHexdigit(i-len,u) if (nil == hexDigit) then break -- out of the while loop end u = ( u + (thePower * hexDigit) ) thePower = (0x10 * thePower ) len2 = len --print("u: "..u) len = len + 1 end if (0 == len2) then return nil -- Failure. No unicode end utf8 = unicodeToUtf8(u) if(nil == utf8) then return nil -- Failure. Unicode to utf8 conversion failed. end e:SetSel(i-len2,i) e:ReplaceSel(utf8) --print("utf8: "..utf8) return true -- Success. end -- ______________________________________________________________________________ -- Emits utf8 encoding in the place of the unicode codepoint -- in the editor, at the left of the cursor. -- -- Writes a message to the Output pane, if no codepoint existed -- at the left of the cursor. -- function emitUtf8UnicodeIntoTheSciteEditor() local ok = emitUtf8Unicode() if not ok then print("Failed to encode unicode into text editor.") end end -- ______________________________________________________________________________ -- -- Following web pages were useful in writing the lua scite script. -- -- http://lua-users.org/wiki/UsingLuaWithScite -- http://www.scintilla.org/PaneAPI.html -- http://www.lua.org/manual/5.1/manual.html#pdf-tonumber -- https://en.wikipedia.org/wiki/UTF-8 -- -- http://lua-users.org/lists/lua-l/2007-08/msg00276.html -- http://keplerproject.github.io/luadoc/