Jump to content

Module:Unicode convert

Permanently protected module
From Wikipedia, the free encyclopedia

localp={}-- NOTE: all these functions use frame solely for its args member.-- Modules using them may therefore call them with a fake frame table-- containing only args.p.getUTF8=function(frame)localch=mw.ustring.char(tonumber(frame.args[1]or'0',16)or0)localbytes={mw.ustring.byte(ch,1,-1)}localformat=({['10']='%d',dec='%d'})[frame.args['base']]or'%02X'fori=1,#bytesdobytes[i]=format:format(bytes[i])endreturntable.concat(bytes,' ')endp.getUTF16=function(frame)localcodepoint=tonumber(frame.args[1]or'0',16)or0localformat=({-- TODO reduce the number of options.['10']='%d',dec='%d'})[frame.args['base']]or'%04X'ifcodepoint<=0xFFFFthen-- NB this also returns lone surrogate charactersreturnformat:format(codepoint)elseifcodepoint>0x10FFFFthen-- There are no codepoints above thisreturn''endcodepoint=codepoint-0x10000bit32=require('bit32')return(format..' '..format):format(bit32.rshift(codepoint,10)+0xD800,bit32.band(codepoint,0x3FF)+0xDC00)endp.fromUTF8=function(frame)localbasein=frame.args['basein']=='dec'and10or16localformat=frame.args['base']=='dec'and'%d 'or'%02X 'localbytes={}forbyteinmw.text.gsplit(frame.args[1],'%s')dotable.insert(bytes,tonumber(byte,basein))endlocalchars={mw.ustring.codepoint(string.char(unpack(bytes)),1,-1)}returnformat:rep(#chars):sub(1,-2):format(unpack(chars))endreturnp
close