Jump to content

Module:Text

Permanently protected module
From Wikipedia, the free encyclopedia

localyesNo=require("Module:Yesno")localText={serial="2024-09-21",suite="Text"}--[=[Text utilities]=]localfunctionfiatQuote(apply,alien,advance)-- Quote text-- Parameter:-- apply -- string, with text-- alien -- string, with language code-- advance -- number, with level 1 or 2localr=applyandtostring(apply)or""alien=alienor"en"advance=tonumber(advance)or0localsuitelocaldata=mw.loadData('Module:Text/data')localQuoteLang=data.QuoteLanglocalQuoteType=data.QuoteTypelocalslang=alien:match("^(%l+)-")suite=QuoteLang[alien]orslangandQuoteLang[slang]orQuoteLang["en"]ifsuitethenlocalquotes=QuoteType[suite]ifquotesthenlocalspaceifquotes[3]thenspace="&#160;"elsespace=""endquotes=quotes[advance]ifquotesthenr=mw.ustring.format("%s%s%s%s%s",mw.ustring.char(quotes[1]),space,apply,space,mw.ustring.char(quotes[2]))endelsemw.log("fiatQuote() "..suite)endendreturnrend-- fiatQuote()Text.char=function(apply,again,accept)-- Create string from codepoints-- Parameter:-- apply -- table (sequence) with numerical codepoints, or nil-- again -- number of repetitions, or nil-- accept -- true, if no error messages to be appended-- Returns: stringlocalr=""apply=type(apply)=="table"andapplyor{}again=math.floor(tonumber(again)or1)ifagain<1thenreturn""endlocalbad={}localcodes={}for_,vinipairs(apply)dolocaln=tonumber(v)ifnotnor(n<32andn~=9andn~=10)thentable.insert(bad,tostring(v))elsetable.insert(codes,math.floor(n))endendif#bad>0thenifnotacceptthenr=tostring(mw.html.create("span"):addClass("error"):wikitext("bad codepoints: "..table.concat(bad," ")))endreturnrendif#codes>0thenr=mw.ustring.char(unpack(codes))ifagain>1thenr=r:rep(again)endendreturnrend-- Text.char()localfunctiontrimAndFormat(args,fmt)localresult={}iftype(args)~='table'thenargs={args}endfor_,vinipairs(args)dov=mw.text.trim(tostring(v))ifv~=""thentable.insert(result,fmtandmw.ustring.format(fmt,v)orv)endendreturnresultendText.concatParams=function(args,apply,adapt)-- Concat list items into one string-- Parameter:-- args -- table (sequence) with numKey=string-- apply -- string (optional); separator (default: "|")-- adapt -- string (optional); format including "%s"-- Returns: stringlocalcollect={}returntable.concat(trimAndFormat(args,adapt),applyor"|")end-- Text.concatParams()Text.containsCJK=function(s)-- Is any CJK code within?-- Parameter:-- s -- string-- Returns: true, if CJK detecteds=sandtostring(s)or""localpatternCJK=mw.loadData('Module:Text/data').PatternCJKreturnmw.ustring.find(s,patternCJK)~=nilend-- Text.containsCJK()Text.removeDelimited=function(s,prefix,suffix)-- Remove all text in s delimited by prefix and suffix (inclusive)-- Arguments:-- s = string to process-- prefix = initial delimiter-- suffix = ending delimiter-- Returns: stripped strings=sandtostring(s)or""prefix=prefixandtostring(prefix)or""suffix=suffixandtostring(suffix)or""localprefixLen=mw.ustring.len(prefix)localsuffixLen=mw.ustring.len(suffix)ifprefixLen==0orsuffixLen==0thenreturnsendlocali=s:find(prefix,1,true)localr=slocaljwhileidoj=r:find(suffix,i+prefixLen)ifjthenr=r:sub(1,i-1)..r:sub(j+suffixLen)elser=r:sub(1,i-1)endi=r:find(prefix,1,true)endreturnrendText.getPlain=function(adjust)-- Remove wikisyntax from string, except templates-- Parameter:-- adjust -- string-- Returns: stringlocalr=Text.removeDelimited(adjust,"<!--","-->")r=r:gsub("(</?%l[^>]*>)",""):gsub("'''",""):gsub("''",""):gsub("&nbsp;"," ")returnrend-- Text.getPlain()Text.isLatinRange=function(s)-- Are characters expected to be latin or symbols within latin texts?-- Arguments:-- s = string to analyze-- Returns: true, if valid for latin onlys=sandtostring(s)or""--- ensure input is always stringlocalPatternLatin=mw.loadData('Module:Text/data').PatternLatinreturnmw.ustring.match(s,PatternLatin)~=nilend-- Text.isLatinRange()Text.isQuote=function(s)-- Is this character any quotation mark?-- Parameter:-- s = single character to analyze-- Returns: true, if s is quotation marks=sandtostring(s)or""ifs==""thenreturnfalseendlocalSeekQuote=mw.loadData('Module:Text/data').SeekQuotereturnmw.ustring.find(SeekQuote,s,1,true)~=nilend-- Text.isQuote()Text.listToText=function(args,adapt)-- Format list items similar to mw.text.listToText()-- Parameter:-- args -- table (sequence) with numKey=string-- adapt -- string (optional); format including "%s"-- Returns: stringreturnmw.text.listToText(trimAndFormat(args,adapt))end-- Text.listToText()Text.quote=function(apply,alien,advance)-- Quote text-- Parameter:-- apply -- string, with text-- alien -- string, with language code, or nil-- advance -- number, with level 1 or 2, or nil-- Returns: quoted stringapply=applyandtostring(apply)or""localmode,slangiftype(alien)=="string"thenslang=mw.text.trim(alien):lower()elseslang=mw.title.getCurrentTitle().pageLanguageifnotslangthen-- TODO FIXME: Introduction expected 2017-04slang=mw.language.getContentLanguage():getCode()endendifadvance==2thenmode=2elsemode=1endreturnfiatQuote(mw.text.trim(apply),slang,mode)end-- Text.quote()Text.quoteUnquoted=function(apply,alien,advance)-- Quote text, if not yet quoted and not empty-- Parameter:-- apply -- string, with text-- alien -- string, with language code, or nil-- advance -- number, with level 1 or 2, or nil-- Returns: string; possibly quotedlocalr=mw.text.trim(applyandtostring(apply)or"")locals=mw.ustring.sub(r,1,1)ifs~=""andnotText.isQuote(s,advance)thens=mw.ustring.sub(r,-1,1)ifnotText.isQuote(s)thenr=Text.quote(r,alien,advance)endendreturnrend-- Text.quoteUnquoted()Text.removeDiacritics=function(adjust)-- Remove all diacritics-- Parameter:-- adjust -- string-- Returns: string; all latin letters should be ASCII-- or basic greek or cyrillic or symbols etc.localcleanup,decomposedlocalPatternCombined=mw.loadData('Module:Text/data').PatternCombineddecomposed=mw.ustring.toNFD(adjustandtostring(adjust)or"")cleanup=mw.ustring.gsub(decomposed,PatternCombined,"")returnmw.ustring.toNFC(cleanup)end-- Text.removeDiacritics()Text.sentenceTerminated=function(analyse)-- Is string terminated by dot, question or exclamation mark?-- Quotation, link termination and so on granted-- Parameter:-- analyse -- string-- Returns: true, if sentence terminatedlocalrlocalPatternTerminated=mw.loadData('Module:Text/data').PatternTerminatedifmw.ustring.find(analyse,PatternTerminated)thenr=trueelser=falseendreturnrend-- Text.sentenceTerminated()Text.ucfirstAll=function(adjust)-- Capitalize all words-- Arguments:-- adjust = string to adjust-- Returns: string with all first letters in upper caseadjust=adjustandtostring(adjust)or""localr=mw.text.decode(adjust,true)locali=1localc,j,mm=(r~=adjust)r=" "..rwhileidoi=mw.ustring.find(r,"%W%l",i)ifithenj=i+1c=mw.ustring.upper(mw.ustring.sub(r,j,j))r=string.format("%s%s%s",mw.ustring.sub(r,1,i),c,mw.ustring.sub(r,i+2))i=jendend-- while ir=r:sub(2)ifmthenr=mw.text.encode(r)endreturnrend-- Text.ucfirstAll()Text.uprightNonlatin=function(adjust)-- Ensure non-italics for non-latin text parts-- One single greek letter might be granted-- Precondition:-- adjust -- string-- Returns: string with non-latin parts enclosed in <span>localrlocaldata=mw.loadData('Module:Text/data')localPatternLatin=data.PatternLatinlocalRangesLatin=data.RangesLatinlocalNumLatinRanges=data.NumLatinRangesifmw.ustring.match(adjust,PatternLatin)then-- latin only, horizontal dashes, quotesr=adjustelselocalclocalj=falselocalk=1localm=falselocaln=mw.ustring.len(adjust)localspan="%s%s<span dir='auto' style='font-style:normal'>%s</span>"localflat=function(a)-- isLatinlocalrange-- NumLatinRanges has to be precomputed because # does not work from loadDatafori=1,NumLatinRangesdorange=RangesLatin[i]ifa>=range[1]anda<=range[2]thenreturntrueendend-- for iend-- flat()localfocus=function(a)-- char is not ambivalentlocalr=(a>64)ifrthenr=(a<8192ora>8212)elser=(a==38ora==60)-- '&' '<'endreturnrend-- focus()localform=function(a)returnstring.format(span,r,mw.ustring.sub(adjust,k,j-1),mw.ustring.sub(adjust,j,a))end-- form()r=""fori=1,ndoc=mw.ustring.codepoint(adjust,i,i)iffocus(c)thenifflat(c)thenifjthenifmthenifi==mthen-- single greek letter.j=falseendm=falseendifjthenlocalnx=i-1locals=""forix=nx,1,-1doc=mw.ustring.sub(adjust,ix,ix)ifc==" "orc=="("thennx=nx-1s=c..selsebreak-- for ixendend-- for ixr=form(nx)..sj=falsek=iendendelseifnotjthenj=iifc>=880andc<=1023then-- single greek letter?m=i+1elsem=falseendendelseifmthenm=m+1endend-- for iifjand(notmorm<n)thenr=form(n)elser=r..mw.ustring.sub(adjust,k)endendreturnrend-- Text.uprightNonlatin()Text.test=function(about)localrifabout=="quote"thendata=mw.loadData('Module:Text/data')r={}r.QuoteLang=data.QuoteLangr.QuoteType=data.QuoteTypeendreturnrend-- Text.test()-- Non Unicode-aware version of mw.text.split and mw.text.gsplit-- based on [[phab:diffusion/ELUA/browse/master/includes/Engines/LuaCommon/lualib/mw.text.lua]]-- These run up to 60 times faster than the Unicode-aware versionsText.split=function(text,pattern,plain)localret={}forminText.gsplit(text,pattern,plain)doret[#ret+1]=mendreturnretendText.gsplit=function(text,pattern,plain)locals,l=1,string.len(text)returnfunction()ifsthenlocale,n=string.find(text,pattern,s,plain)localretifnotethenret=string.sub(text,s)s=nilelseifn<ethen-- Empty separator!ret=string.sub(text,s,e)ife<lthens=e+1elses=nilendelseret=e>sandstring.sub(text,s,e-1)or''s=n+1endreturnretendend,nil,nilend-- Exportlocalp={}for_,funcinipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'})dop[func]=function(frame)returnText[func](frame.args[1]or"")and"1"or""endendfor_,funcinipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'})dop[func]=function(frame)returnText[func](frame.args[1]or"")endendfunctionp.char(frame)localparams=frame:getParent().argslocalstory=params[1]localcodes,lenient,multipleifnotstorythenparams=frame.argsstory=params[1]endifstorythenlocalitems=mw.text.split(mw.text.trim(story),"%s+")if#items>0thenlocaljlenient=(yesNo(params.errors)==false)codes={}multiple=tonumber(params["*"])for_,vinipairs(items)doj=tonumber((v:sub(1,1)=="x"and"0"or"")..v)table.insert(codes,jorv)endendendreturnText.char(codes,multiple,lenient)endfunctionp.concatParams(frame)localargslocaltemplate=frame.args.templateiftype(template)=="string"thentemplate=mw.text.trim(template)template=(template=="1")endiftemplatethenargs=frame:getParent().argselseargs=frame.argsendreturnText.concatParams(args,frame.args.separator,frame.args.format)endfunctionp.listToFormat(frame)locallists={}localpformat=frame.args["format"]localsep=frame.args["sep"]or";"-- Parameter parsen: Listenfork,vinpairs(frame.args)dolocalknum=tonumber(k)ifknumthenlists[knum]=vendend-- Listen splittenlocalmaxListLen=0fori=1,#listsdolists[i]=mw.text.split(lists[i],sep)if#lists[i]>maxListLenthenmaxListLen=#lists[i]endend-- Ergebnisstring generierenlocalresult=""localresult_line=""fori=1,maxListLendoresult_line=pformatforj=1,#listsdoresult_line=mw.ustring.gsub(result_line,"%%s",lists[j][i],1)endresult=result..result_lineendreturnresultendfunctionp.listToText(frame)localargslocaltemplate=frame.args.templateiftype(template)=="string"thentemplate=mw.text.trim(template)template=(template=="1")endiftemplatethenargs=frame:getParent().argselseargs=frame.argsendreturnText.listToText(args,frame.args.format)endfunctionp.quote(frame)localslang=frame.args[2]iftype(slang)=="string"thenslang=mw.text.trim(slang)ifslang==""thenslang=falseendendreturnText.quote(frame.args[1]or"",slang,tonumber(frame.args[3]))endfunctionp.quoteUnquoted(frame)localslang=frame.args[2]iftype(slang)=="string"thenslang=mw.text.trim(slang)ifslang==""thenslang=falseendendreturnText.quoteUnquoted(frame.args[1]or"",slang,tonumber(frame.args[3]))endfunctionp.zip(frame)locallists={}localseps={}localdefaultsep=frame.args["sep"]or""localinnersep=frame.args["isep"]or""localoutersep=frame.args["osep"]or""-- Parameter parsenfork,vinpairs(frame.args)dolocalknum=tonumber(k)ifknumthenlists[knum]=velseifstring.sub(k,1,3)=="sep"thenlocalsepnum=tonumber(string.sub(k,4))ifsepnumthenseps[sepnum]=vendendendend-- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwendenfori=1,math.max(#seps,#lists)doifnotseps[i]thenseps[i]=defaultsependend-- Listen splittenlocalmaxListLen=0fori=1,#listsdolists[i]=mw.text.split(lists[i],seps[i])if#lists[i]>maxListLenthenmaxListLen=#lists[i]endendlocalresult=""fori=1,maxListLendoifi~=1thenresult=result..outersependforj=1,#listsdoifj~=1thenresult=result..innersependresult=result..(lists[j][i]or"")endendreturnresultendfunctionp.split(frame)localtext=frame.args.textorframe.args[1]or''localpattern=frame.args.patternorframe.args[2]or''localplain=yesNo(frame.args.plainorframe.args[3])localindex=tonumber(frame.args.index)ortonumber(frame.args[4])or1locala=Text.split(text,pattern,plain)ifindex<0thenindex=#a+index+1endreturna[index]endfunctionp.failsafe()returnText.serialendp.Text=function()returnTextend-- p.Textreturnp
close