Jump to content

Module:String2

Permanently protected module
From Wikipedia, the free encyclopedia

require('strict');localp={}p.trim=function(frame)returnmw.text.trim(frame.args[1]or"")endp.sentence=function(frame)-- {{lc:}} is strip-marker safe, string.lower is not.frame.args[1]=frame:callParserFunction('lc',frame.args[1])returnp.ucfirst(frame)endp.ucfirst=function(frame)locals=frame.args[1];ifnotsor''==sors:match('^%s+$')then-- when <s> is nil, empty, or only whitespacereturns;-- abandon because nothing to doends=mw.text.trim(frame.args[1]or"")locals1=""localprefix_patterns_t={-- sequence of prefix patterns'^\127[^\127]*UNIQ%-%-%a+%-%x+%-QINU[^\127]*\127',-- stripmarker'^([%*;:#]+)',-- various list markup'^(\'\'\'*)',-- bold / italic markup'^(%b<>)',-- html-like tags because some templates render these'^(&%a+;)',-- html character entities because some templates render these'^(&#%d+;)',-- html numeric (decimal) entities because some templates render these'^(&#x%x+;)',-- html numeric (hexadecimal) entities because some templates render these'^(%s+)',-- any whitespace characters'^([%(%)%-%+%?%.%%!~!@%$%^&_={}/`,‘’„“”ʻ|\"\'\\]+)',-- miscellaneous punctuation}localprefixes_t={};-- list, bold/italic, and html-like markup, & whitespace saved herelocalfunctionprefix_strip(s)-- local function to strip prefixes from <s>for_,patterninipairs(prefix_patterns_t)do-- spin through <prefix_patterns_t> ifs:match(pattern)then-- when there is a matchlocalprefix=s:match(pattern);-- get a copy of the matched prefixtable.insert(prefixes_t,prefix);-- save its=s:sub(prefix:len()+1);-- remove the prefix from <s>returns,true;-- return <s> without prefix and flag; force restart at top of sequence because misc punct removal can break stripmarkerendendreturns;-- no prefix found; return <s> with nil flagendlocalprefix_removed;-- flag; boolean true as long as prefix_strip() finds and removes a prefixrepeat-- one by one remove list, bold/italic, html-like markup, whitespace, etc from start of <s>s,prefix_removed=prefix_strip(s);until(notprefix_removed);-- until <prefix_removed> is nils1=table.concat(prefixes_t);-- recreate the prefix string for later reattachmentlocalfirst_text=mw.ustring.match(s,'^%[%[[^%]]+%]%]');-- extract wikilink at start of string if present; TODO: this can be string.match()?localupcased;iffirst_texttheniffirst_text:match('^%[%[[^|]+|[^%]]+%]%]')then-- if <first_text> is a piped linkupcased=mw.ustring.match(s,'^%[%[[^|]+|%W*(%w)');-- get first letter characterupcased=mw.ustring.upper(upcased);-- upcase first letter characters=mw.ustring.gsub(s,'^(%[%[[^|]+|%W*)%w','%1'..upcased);-- replaceelse-- here when <first_text> is a wikilink but not a piped linkupcased=mw.ustring.match(s,'^%[%[%W*%w');-- get '[[' and first letterupcased=mw.ustring.upper(upcased);-- upcase first letter characters=mw.ustring.gsub(s,'^%[%[%W*%w',upcased);-- replace; no capture needed hereendelseifs:match('^%[%S+%s+[^%]]+%]')then-- if <s> is a ext link of some sort; must have label textupcased=mw.ustring.match(s,'^%[%S+%s+%W*(%w)');-- get first letter characterupcased=mw.ustring.upper(upcased);-- upcase first letter characters=mw.ustring.gsub(s,'^(%[%S+%s+%W*)%w','%1'..upcased);-- replaceelseifs:match('^%[%S+%s*%]')then-- if <s> is a ext link without label text; nothing to doreturns1..s;-- reattach prefix string (if present) and doneelse-- <s> is not a wikilink or ext link; assume plain textupcased=mw.ustring.match(s,'^%W*%w');-- get the first letter characterupcased=mw.ustring.upper(upcased);-- upcase first letter characters=mw.ustring.gsub(s,'^%W*%w',upcased);-- replace; no capture needed hereendreturns1..s;-- reattach prefix string (if present) and doneendp.title=function(frame)-- http://grammar.yourdictionary.com/capitalization/rules-for-capitalization-in-titles.html-- recommended by The U.S. Government Printing Office Style Manual:-- "Capitalize all words in titles of publications and documents,-- except a, an, the, at, by, for, in, of, on, to, up, and, as, but, or, and nor."localalwayslower={['a']=1,['an']=1,['the']=1,['and']=1,['but']=1,['or']=1,['for']=1,['nor']=1,['on']=1,['in']=1,['at']=1,['to']=1,['from']=1,['by']=1,['of']=1,['up']=1}localres=''locals=mw.text.trim(frame.args[1]or"")localwords=mw.text.split(s," ")fori,sinipairs(words)do-- {{lc:}} is strip-marker safe, string.lower is not.s=frame:callParserFunction('lc',s)ifi==1oralwayslower[s]~=1thens=mw.getContentLanguage():ucfirst(s)endwords[i]=sendreturntable.concat(words," ")end-- findlast finds the last item in a list-- the first unnamed parameter is the list-- the second, optional unnamed parameter is the list separator (default = comma space)-- returns the whole list if separator not foundp.findlast=function(frame)locals=mw.text.trim(frame.args[1]or"")localsep=frame.args[2]or""ifsep==""thensep=", "endlocalpattern=".*"..sep.."(.*)"locala,b,last=s:find(pattern)ifathenreturnlastelsereturnsendend-- stripZeros finds the first number and strips leading zeros (apart from units)-- e.g "0940" -> "940"; "Year: 0023" -> "Year: 23"; "00.12" -> "0.12"p.stripZeros=function(frame)locals=mw.text.trim(frame.args[1]or"")localn=tonumber(string.match(s,"%d+"))or""s=string.gsub(s,"%d+",n,1)returnsend-- nowiki ensures that a string of text is treated by the MediaWiki software as just a string-- it takes an unnamed parameter and trims whitespace, then removes any wikicodep.nowiki=function(frame)localstr=mw.text.trim(frame.args[1]or"")returnmw.text.nowiki(str)end-- split splits text at boundaries specified by separator-- and returns the chunk for the index idx (starting at 1)-- #invoke:String2 |split |text |separator |index |true/false-- #invoke:String2 |split |txt=text |sep=separator |idx=index |plain=true/false-- if plain is false/no/0 then separator is treated as a Lua pattern - defaults to plain=truep.split=function(frame)localargs=frame.argsifnot(args[1]orargs.txt)thenargs=frame:getParent().argsendlocaltxt=args[1]orargs.txtor""iftxt==""thenreturnnilendlocalsep=(args[2]orargs.sepor""):gsub('"','')localidx=tonumber(args[3]orargs.idx)or1localplain=(args[4]orargs.plainor"true"):sub(1,1)plain=(plain~="f"andplain~="n"andplain~="0")localsplittbl=mw.text.split(txt,sep,plain)ifidx<0thenidx=#splittbl+idx+1endreturnsplittbl[idx]end-- val2percent scans through a string, passed as either the first unnamed parameter or |txt=-- it converts each number it finds into a percentage and returns the resultant string.p.val2percent=function(frame)localargs=frame.argsifnot(args[1]orargs.txt)thenargs=frame:getParent().argsendlocaltxt=mw.text.trim(args[1]orargs.txtor"")iftxt==""thenreturnnilendlocalfunctionv2p(x)x=(tonumber(x)or0)*100ifx==math.floor(x)thenx=math.floor(x)endreturnx.."%"endtxt=txt:gsub("%d[%d%.]*",v2p)-- store just the stringreturntxtend-- one2a scans through a string, passed as either the first unnamed parameter or |txt=-- it converts each occurrence of 'one ' into either 'a ' or 'an ' and returns the resultant string.p.one2a=function(frame)localargs=frame.argsifnot(args[1]orargs.txt)thenargs=frame:getParent().argsendlocaltxt=mw.text.trim(args[1]orargs.txtor"")iftxt==""thenreturnnilendtxt=txt:gsub(" one "," a "):gsub("^one","a"):gsub("One ","A "):gsub("a ([aeiou])","an %1"):gsub("A ([aeiou])","An %1")returntxtend-- findpagetext returns the position of a piece of text in a page-- First positional parameter or |text is the search text-- Optional parameter |title is the page title, defaults to current page-- Optional parameter |plain is either true for plain search (default) or false for Lua pattern search-- Optional parameter |nomatch is the return value when no match is found; default is nilp._findpagetext=function(args)-- process parameterslocalnomatch=args.nomatchor""ifnomatch==""thennomatch=nilend--localtext=mw.text.trim(args[1]orargs.textor"")iftext==""thenreturnnilend--localtitle=args.titleor""localtitleobjiftitle==""thentitleobj=mw.title.getCurrentTitle()elsetitleobj=mw.title.new(title)end--localplain=args.plainor""ifplain:sub(1,1)=="f"thenplain=falseelseplain=trueend-- get the page content and look for 'text' - return position or nomatchlocalcontent=titleobjandtitleobj:getContent()returncontentandmw.ustring.find(content,text,1,plain)ornomatchendp.findpagetext=function(frame)localargs=frame.argslocalpargs=frame:getParent().argsfork,vinpairs(pargs)doargs[k]=vendifnot(args[1]orargs.text)thenreturnnilend-- just the first valuereturn(p._findpagetext(args))end-- returns the decoded url. Inverse of parser function {{urlencode:val|TYPE}}-- Type is:-- QUERY decodes + to space (default)-- PATH does no extra decoding-- WIKI decodes _ to spacep._urldecode=function(url,type)url=urlor""type=(type=="PATH"ortype=="WIKI")andtypereturnmw.uri.decode(url,type)end-- {{#invoke:String2|urldecode|url=url|type=type}}p.urldecode=function(frame)returnmw.uri.decode(frame.args.url,frame.args.type)end-- what follows was merged from Module:StringFunc-- helper functionsp._GetParameters=require('Module:GetParameters')-- Argument list helper function, as per Module:Stringp._getParameters=p._GetParameters.getParameters-- Escape Pattern helper function so that all characters are treated as plain text, as per Module:Stringfunctionp._escapePattern(pattern_str)returnmw.ustring.gsub(pattern_str,"([%(%)%.%%%+%-%*%?%[%^%$%]])","%%%1")end-- Helper Function to interpret boolean strings, as per Module:Stringp._getBoolean=p._GetParameters.getBoolean--[[StripThis function Strips characters from stringUsage:{{#invoke:String2|strip|source_string|characters_to_strip|plain_flag}}Parameters source: The string to strip chars: The pattern or list of characters to strip from string, replaced with '' plain: A flag indicating that the chars should be understood as plain text. defaults to true.Leading and trailing whitespace is also automatically stripped from the string.]]functionp.strip(frame)localnew_args=p._getParameters(frame.args,{'source','chars','plain'})localsource_str=new_args['source']or''localchars=new_args['chars']or''or'characters'source_str=mw.text.trim(source_str)ifsource_str==''orchars==''thenreturnsource_strendlocall_plain=p._getBoolean(new_args['plain']ortrue)ifl_plainthenchars=p._escapePattern(chars)endlocalresultresult=mw.ustring.gsub(source_str,"["..chars.."]",'')returnresultend--[[Match anyReturns the index of the first given pattern to match the input. Patterns must be consecutively numbered.Returns the empty string if nothing matches for use in {{#if:}}Usage: {{#invoke:String2|matchAll|source=123 abc|456|abc}} returns '2'.Parameters: source: the string to search plain: A flag indicating that the patterns should be understood as plain text. defaults to true. 1, 2, 3, ...: the patterns to search for]]functionp.matchAny(frame)localsource_str=frame.args['source']orerror('The source parameter is mandatory.')locall_plain=p._getBoolean(frame.args['plain']ortrue)fori=1,math.hugedolocalpattern=frame.args[i]ifnotpatternthenreturn''endifmw.ustring.find(source_str,pattern,1,l_plain)thenreturntostring(i)endendend--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------Converts a hyphen to a dash under certain conditions. The hyphen must separatelike items; unlike items are returned unmodified. These forms are modified: letter - letter (A - B) digit - digit (4-5) digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5) letterdigit - letterdigit (A1-A5) (an optional separator between letter and digit is supported – a.1-a.5 or a-1-a-5) digitletter - digitletter (5a - 5d) (an optional separator between letter and digit is supported – 5.a-5.d or 5-a-5-d)any other forms are returned unmodified.str may be a comma- or semicolon-separated list]]functionp.hyphen_to_dash(str,spacing)if(str==nilorstr=='')thenreturnstrendlocalacceptstr=mw.text.decode(str,true)-- replace html entities with their characters; semicolon mucks up the text.splitlocalout={}locallist=mw.text.split(str,'%s*[,;]%s*')-- split str at comma or semicolon separators if there are anyfor_,iteminipairs(list)do-- for each item in the listitem=mw.text.trim(item)-- trim whitespaceitem,accept=item:gsub('^%(%((.+)%)%)$','%1')ifaccept==0andmw.ustring.match(item,'^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$')then-- if a hyphenated range or has endash or emdash separatorsifitem:match('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$')or-- letterdigit hyphen letterdigit (optional separator between letter and digit)item:match('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$')or-- digitletter hyphen digitletter (optional separator between digit and letter)item:match('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$')or-- digit separator digit hyphen digit separator digititem:match('^%d+%s*%-%s*%d+$')or-- digit hyphen digititem:match('^%a+%s*%-%s*%a+$')then-- letter hyphen letteritem=item:gsub('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)','%1–%2')-- replace hyphen, remove extraneous space characterselseitem=mw.ustring.gsub(item,'%s*[–—]%s*','–')-- for endash or emdash separated ranges, replace em with en, remove extraneous whitespaceendendtable.insert(out,item)-- add the (possibly modified) item to the output tableendlocaltemp_str=table.concat(out,','..spacing)-- concatenate the output table into a comma separated stringtemp_str,accept=temp_str:gsub('^%(%((.+)%)%)$','%1')-- remove accept-this-as-written markup when it wraps all of concatenated outifaccept~=0thentemp_str=str:gsub('^%(%((.+)%)%)$','%1')-- when global markup removed, return original str; do it this way to suppress boolean second return valueendreturntemp_strendfunctionp.hyphen2dash(frame)localstr=frame.args[1]or''localspacing=frame.args[2]or' '-- space is part of the standard separator for normal spacing (but in conjunction with templates r/rp/ran we may need a narrower spacingreturnp.hyphen_to_dash(str,spacing)end-- Similar to [[Module:String#endswith]]functionp.startswith(frame)return(frame.args[1]:sub(1,frame.args[2]:len())==frame.args[2])and'yes'or''end-- Implements [[Template:Isnumeric]]functionp.isnumeric(frame)locals=frame.args[1]orframe:getParent().args[1]localboolean=(frame.args.booleanorframe:getParent().args.boolean)=='true'iftype(s)=='string'andmw.getContentLanguage():parseFormattedNumber(s)thenreturnbooleanand1orsendreturnbooleanand0or''end-- Checks if a value in a group of numbers is not an interger.-- Allows usage of an |empty= parameter to allow empty values to be skipped.functionp.isInteger(frame)localvalues=frame.argsorframe:getParent().argslocalallow_empty=frame.args.emptyorframe:getParent().args.emptyfor_,valueinipairs(values)do-- Trim spacesvalue=valueandvalue:gsub("^%s*(.-)%s*$","%1")ifvalue==""orvalue==nilthenifnotallow_emptythenreturnfalse-- Empty values are not allowedendelsevalue=tonumber(value)ifnot(type(value)=="number"andvalue==math.floor(value))thenreturnfalseendendendreturntrueend-- Returns an error found in a string.functionp.getError(frame)localtext=frame.args[1]orframe:getParent().args[1]localerror_message=text:match('(<strong class="error">.-</strong>)')returnerror_messageornilendreturnp
close