Jump to content

Module:Delink

From Wikimedia Commons, the free media repository
Lua

CodeDiscussionEditHistoryLinksLink countSubpages:DocumentationTestsResultsSandboxLive codeAll modules

This module removes links from wikitext. It de-links wikilinks, URL links, removes HTML comments, and normalises whitespace. It handles most wikitext correctly, including category links, the colon trick, the pipe trick (normal and reverse), nested wikilinks, and invalid link characters.

Code

-- This module de-links most wikitext.localp={}localfunctiondelinkReversePipeTrick(s)ifmw.ustring.match(s,"^%[%[|.*[|\n]")then-- Check for newlines or multiple pipes.returnselsereturnmw.ustring.match(s,"%[%[|(.*)%]%]")endendlocalfunctiondelinkPipeTrick(s)locallinkarea,display="",""-- We need to deal with colons, brackets, and commas, per [[Help:Pipe trick]].-- First, remove the text before the first colon, if any.ifmw.ustring.match(s,":")thens=mw.ustring.match(s,"%[%[.-:(.*)|%]%]")-- If there are no colons, grab all of the text apart from the square brackets and the pipe.elses=mw.ustring.match(s,"%[%[(.*)|%]%]")end-- Next up, brackets and commas.ifmw.ustring.match(s,"%(.-%)$")then-- Brackets trump commas.s=mw.ustring.match(s,"(.-) ?%(.-%)$")elseifmw.ustring.match(s,",")then-- If there are no brackets, display only the text before the first comma.s=mw.ustring.match(s,"(.-),.*$")endreturnsendlocalfunctiondelinkWikilink(s)localresult=s-- Deal with the reverse pipe trick.ifmw.ustring.match(result,"%[%[|")thenreturndelinkReversePipeTrick(result)endresult=mw.uri.decode(result,"PATH")-- decode percent-encoded entities. Leave underscores and plus signs.result=mw.text.decode(result,true)-- decode HTML entities.-- Check for bad titles. To do this we need to find the-- title area of the link, i.e. the part before any pipes.localtitleareaifmw.ustring.match(result,"|")then-- Find if we're dealing with a piped link.titlearea=mw.ustring.match(result,"^%[%[(.-)|.*%]%]")elsetitlearea=mw.ustring.match(result,"^%[%[(.-)%]%]")end-- Check for bad characters.ifmw.ustring.match(titlearea,"[%[%]<>{}%%%c\n]")thenreturnsend-- Check for categories, interwikis, and files.localcolonprefix=mw.ustring.match(result,"%[%[(.-):.*%]%]")or""-- Get the text before the first colon.localns=mw.site.namespaces[colonprefix]-- see if this is a known namespaceifmw.language.isKnownLanguageTag(colonprefix)or(nsand(ns.canonicalName=="File"orns.canonicalName=="Category"))thenreturn""end-- Remove the colon if the link is using the [[Help:Colon trick]].ifmw.ustring.match(result,"%[%[:")thenresult="[["..mw.ustring.match(result,"%[%[:(.*%]%])")end-- Deal with links using the [[Help:Pipe trick]].ifmw.ustring.match(result,"^%[%[[^|]*|%]%]")thenreturndelinkPipeTrick(result)end-- Find the display area of the wikilinkifmw.ustring.match(result,"|")then-- Find if we're dealing with a piped link.result=mw.ustring.match(result,"^%[%[.-|(.+)%]%]")-- Remove new lines from the display of multiline piped links,-- where the pipe is before the first new line.result=mw.ustring.gsub(result,"\n","")elseresult=mw.ustring.match(result,"^%[%[(.-)%]%]")endreturnresultendlocalfunctiondelinkURL(s)-- Assume we have already delinked internal wikilinks, and that-- we have been passed some text between two square brackets [foo].-- If the text contains a line break it is not formatted as a URL, regardless of other content.ifmw.ustring.match(s,"\n")thenreturnsend-- Check if the text has a valid URL prefix and at least one valid URL character.localvalid_url_prefixes={"//","http://","https://","ftp://","gopher://","mailto:","news:","irc://"}localurl_prefixfori,vinipairs(valid_url_prefixes)doifmw.ustring.match(s,'^%['..v..'[^"%s].*%]')thenurl_prefix=vbreakendend-- Get display textifnoturl_prefixthenreturnsends=mw.ustring.match(s,"^%["..url_prefix.."(.*)%]")-- Grab all of the text after the URL prefix and before the final square bracket.s=mw.ustring.match(s,'^.-(["<> ].*)')or""-- Grab all of the text after the first URL separator character ("<> ).s=mw.ustring.match(s,"^%s*(%S.*)$")or""-- If the separating character was a space, trim it off.s_decoded=mw.text.decode(s,true)ifmw.ustring.match(s_decoded,"%c")thenreturnselsereturns_decodedendendlocalfunctiondelinkLinkClass(s,pattern,delinkFunction)ifnottype(s)=="string"thenerror("Attempt to de-link non-string input.",2)endifnot(type(pattern)=="string"andmw.ustring.sub(pattern,1,1)=="^")thenerror('Invalid pattern detected. Patterns must begin with "^".',2)end-- Iterate over the text string, and replace any matched text. using the -- delink function. We need to iterate character by character rather -- than just use gsub, otherwise nested links aren't detected properly.localresult=""whilemw.ustring.len(s)>0do-- Replace text using one iteration of gsub.s=mw.ustring.gsub(s,pattern,delinkFunction,1)-- Append the left-most character to the result string.result=result..mw.ustring.sub(s,1,1)s=mw.ustring.sub(s,2,-1)endreturnresultendfunctionp._delink(args)localtext=args[1]or""ifargs.refs=="yes"then-- Remove any [[Help:Strip markers]] representing ref tags. In most situations -- this is not a good idea - only use it if you know what you are doing!text=mw.ustring.gsub(text,"UNIQ%w*%-ref%-%d*%-QINU","")endifnot(args.comments=="no")thentext=mw.ustring.gsub(text,"<!%-%-.-%-%->","")-- Remove html comments.endifnot(args.wikilinks=="no")thentext=delinkLinkClass(text,"^%[%[.-%]%]",delinkWikilink)-- De-link wikilinks.endifnot(args.urls=="no")thentext=delinkLinkClass(text,"^%[.-%]",delinkURL)-- De-link URLs.endifnot(args.whitespace=="no")then-- Replace single new lines with a single space, but leave double new lines-- and new lines only containing spaces or tabs before a second new line.text=mw.ustring.gsub(text,"([^\n\t][ \t]*)\n([ \t]*[^\n\t])","%1 %2")text=mw.ustring.gsub(text,"[ \t]+"," ")-- Remove extra tabs and spaces.endreturntextendfunctionp.delink(frame)localargsifframe==mw.getCurrentFrame()then-- We're being called via #invoke. If the invoking template passed any args, use-- them. Otherwise, use the args that were passed into the template.args=frame:getParent().argsfork,vinpairs(frame.args)doargs=frame.argsbreakendelse-- We're being called from another module or from the debug console, so assume-- the args are passed in directly.args=frameendreturnp._delink(args)endreturnp
close