Module:Links
Documentation for this module may be created at Module:Links/doc
local export = {}
--[=[
[[Unsupported titles]], pages with high memory usage,
extraction modules and part-of-speech names are listed
at [[Module:links/data]].
Other modules used:
[[Module:script utilities]]
[[Module:scripts]]
[[Module:languages]] and its submodules
[[Module:gender and number]]
[[Module:debug/track]]
]=]
-- These are prefixed with u to avoid confusion with the default string methods
-- of the same name.
local toNFC = mw.ustring.toNFC
local uchar = mw.ustring.char
local usub = require("Module:string utilities").sub
-- Trim only if there are non-whitespace characters.
local function cond_trim(text)
if mw.ustring.find(text, "%S") then
text = mw.text.trim(text)
end
return text
end
local function track(page, code)
local tracking_page = "links/" .. page
if code then
require("Module:debug/track"){tracking_page, tracking_page .. "/" .. code}
else
require("Module:debug/track")(tracking_page)
end
return true
end
local get_entities = require("Module:utilities").get_entities
local table_insert = table.insert
local table_concat = table.concat
local ignore_cap
local pos_tags
function export.getLinkPage(target, lang, sc, plain)
-- Remove diacritics from the page name
target = (lang:makeEntryName(target, sc))
-- If the link contains unexpanded template parameters, then don't create a link.
if target:find("{{{") then
return nil
end
if target:sub(1, 1) == "/" then
return ":" .. target
elseif get_entities(target):find("^%*?Reconstruction:") then
return target:gsub("^%*", "")
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however.
elseif target:sub(1, 1) == "*" and #target > 1 and not plain then
if lang:getCode() == "und" then
return nil
else
target = "Reconstruction:" .. lang:getCanonicalName() .. "/" .. usub(target, 2)
end
elseif lang:getType() == "reconstructed" then
local lower = require("Module:string utilities").lower
local check, m_utildata = target:match("^:*([^:]*):"), mw.loadData("Module:utilities/data")
check = check and lower(check)
if m_utildata.interwikis[check] or m_utildata.namespaces[check] then
return target
else
error("The specified language " .. lang:getCanonicalName()
.. " is unattested, while the given word is not marked with '*' to indicate that it is reconstructed.")
end
elseif lang:getType() == "appendix-constructed" then
target = "Appendix:" .. lang:getCanonicalName() .. "/" .. target
end
return target
end
local function check_self_link(link, lang)
local selfCheckTarget = get_entities(mw.uri.decode(link.target):gsub("^:", ""))
local selfCheckCurrentTitle = mw.title.getCurrentTitle().prefixedText
if selfCheckTarget:find("[#%%&+/:<=>?@[\\%]_{|}]") then
selfCheckTarget = (lang:makeEntryName(selfCheckTarget))
selfCheckCurrentTitle = (lang:makeEntryName(selfCheckCurrentTitle))
end
return selfCheckTarget == selfCheckCurrentTitle and true or false
end
-- Make a language-specific link from given link's parts
local function makeLangLink(link, lang, sc, id, allow_self_link, isolated)
-- Find fragments (when link didn't come from parseLink).
-- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]].
if link.fragment == nil then
-- Replace numeric character references with the corresponding character ( → '),
-- as they contain #, which causes the numeric character reference to be
-- misparsed (wa'a → waa → pagename wa&, fragment 29;a).
link.target = link.target:find("&#[^;]+;") and get_entities(link.target) or link.target
local first, second = link.target:match("^([^#]+)#(.+)$")
if first then
link.target, link.fragment = first, second
end
end
-- If there is no display form, then create a default one.
if not link.display then link.display = link.target end
-- Process the display form.
link.display = (lang:makeDisplayText(link.display, sc))
-- Process the target
link.target = export.getLinkPage(link.target, lang, sc)
if not link.target then
return link.display
end
-- If the target is the same as the current page and there is no sense id
-- and linking to the same page hasn't been turned on, then return a "self-link"
-- like the software does.
if (not (allow_self_link or id)) and check_self_link(link, lang) then
return "<strong class=\"selflink\">" .. link.display .. "</strong>"
end
-- Add fragment. Do not add a section link to "Undetermined", as such sections do not exist and are invalid. TabbedLanguages handles links without a section by linking to the "last visited" section, but adding "Undetermined" would break that feature. For localized prefixes that make syntax error, please use the format: ["xyz"] = true.
local prefix, lower_prefix = link.target:match("^:*([^:]+):")
local m_utildata
if prefix then
lower_prefix = require("Module:string utilities").lower(prefix)
m_utildata = mw.loadData("Module:utilities/data")
end
if not (m_utildata and m_utildata.interwikis[lower_prefix]) then
if link.fragment or link.target:find("#$") then
track("fragment", lang:getCode())
end
if not link.fragment and lang:getCode() ~= "und" then
if id then
link.fragment = require("Module:senseid").anchor(lang, id)
elseif not link.target:find("^Appendix:") and not link.target:find("^Reconstruction:") then
link.fragment = lang:getCanonicalName()
end
end
end
if isolated then
link.display = mw.loadData("Module:links/data").display_change[mw.text.decode(link.display)] or link.display
end
return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
end
local function makePlainLink(link, lang, sc, allow_self_link, isolated)
-- If there is no display form, then create a default one.
if not link.display then link.display = link.target end
-- Process the display form.
link.display = (lang:makeDisplayText(link.display, sc))
-- Process the target
link.target = export.getLinkPage(link.target, lang, sc, true)
if not link.target then
return link.display
end
-- If the target is the same as the current page and linking to the same page hasn't
-- been turned on, then return a "self-link" like the software does.
if (not allow_self_link) and check_self_link(link, lang) then
return "<strong class=\"selflink\">" .. link.display .. "</strong>"
end
-- Add fragment. For localized prefixes that make syntax error, please use the format: ["xyz"] = true,
local prefix, lower_prefix = link.target:match("^:*([^:]+):")
local m_utildata
if prefix then
lower_prefix = require("Module:string utilities").lower(prefix)
m_utildata = mw.loadData("Module:utilities/data")
end
if not (m_utildata and m_utildata.interwikis[lower_prefix]) then
if link.fragment or link.target:find("#$") then
track("fragment", lang:getCode())
end
end
if isolated then
link.display = mw.loadData("Module:links/data").display_change[mw.text.decode(link.display)] or link.display
end
return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
end
-- Split a link into its parts
local function parseLink(linktext)
local link = { target = linktext }
local first, second = link.target:match("^([^|]+)|(.+)$")
-- Prevent characters whose HTML entities are unsupported titles from being incorrectly recognised as the entity if they are in a link being re-parsed (e.g. "&" becomes "&" when returned, but "&" is also an unsupported title. If "&" is given as a link which is then re-parsed, we don't want it to be perceived as "&".)
if link.target:match("&[^;]+;") then
local unsupported_titles = mw.loadData("Module:links/data").unsupported_titles
if unsupported_titles[second] and unsupported_titles[second] ~= first then
link.target = get_entities(link.target)
first, second = link.target:match("^([^|]+)|(.+)$")
end
end
if first then
link.target = first
link.display = second
else
link.display = link.target
end
-- There's no point in processing these, as they aren't real links.
local target_lower = require("Module:string utilities").lower(link.target)
for _, falsePositive in ipairs({"category", "cat", "file", "image"}) do
if target_lower:match("^" .. falsePositive .. ":") then return nil end
end
first, second = link.target:match("^(.+)#(.+)$")
if first then
link.target = first
link.fragment = second
else
-- So that makeLangLink does not look for a fragment again
link.fragment = false
end
return link
end
-- Find embedded links and ensure they link to the correct section.
local function process_embedded_links(text, data, allow_self_link, allReconstructed, plain)
if data.alt then
track("alt-ignored")
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
"ignored alt:", data.alt, "lang:", data.lang:getCode())
end
if data.id then
track("id-ignored")
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
"ignored id:", data.id, "lang:", data.lang:getCode())
end
local function makeLink(space1, linktext, space2)
local capture = "[[" .. linktext .. "]]"
local link = parseLink(linktext)
--Return unprocessed false positives untouched (e.g. categories).
if not link then return capture end
if allReconstructed and not link.target:find("^%*") then
link.target = "*" .. link.target
end
if not plain then
linktext = makeLangLink(link, data.lang, data.sc, data.id, allow_self_link, false)
else
linktext = makePlainLink(link, data.lang, data.sc, allow_self_link, false)
end
linktext = linktext
:gsub("^%[%[", "\3")
:gsub("%]%]$", "\4")
return space1 .. linktext .. space2
end
-- Use chars 1 and 2 as temporary substitutions, so that we can use charsets. These are converted to chars 3 and 4 by makeLink, which means we can convert any remaining chars 1 and 2 back to square brackets (i.e. those not part of a link).
text = text
:gsub("%[%[", "\1")
:gsub("%]%]", "\2")
:gsub("\1(%s*)([^\1\2]-)(%s*)\2", makeLink)
:gsub("\1", "[[")
:gsub("\2", "]]")
-- Remove the extra * at the beginning of a language link if it's immediately followed by a link whose display begins with * too.
if allReconstructed then
text = text:gsub("^%*\3([^|\3\4]+)|%*", "\3%1|*")
end
-- Process the non-linked text by iterating over the text between links, reinserting square brackets as we go.
text = mw.ustring.gsub(text, "^(.-)(%s*)\3", function(m1, m2) return data.lang:makeDisplayText(m1, data.sc[1]) .. m2 .. "[[" end)
text = mw.ustring.gsub(text, "\4(.-)(%s*)\3", function(m1, m2) return "]]" .. data.lang:makeDisplayText(m1, data.sc[1]) .. m2 .. "[[" end)
text = text:gsub("\4(.-)$", function(m1) return "]]" .. data.lang:makeDisplayText(m1, data.sc[1]) end)
return text
end
-- Creates a basic wikilink to the given term. If the text already contains links, these are replaced with links to the correct section.
function export.language_link(data, allow_self_link)
if type(data) ~= "table" then
error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
end
local text = data.term
data.sc = data.sc or data.lang:findBestScript(text)
ignore_cap = ignore_cap or mw.loadData("Module:links/data").ignore_cap
if ignore_cap[data.lang:getCode()] and text then
text = text:gsub("%^", "")
end
-- If the text begins with * and another character,
-- then act as if each link begins with *
local allReconstructed = false
if require("Module:utilities").get_plaintext(text):match("^*.") then
allReconstructed = true
end
-- Do we have a redundant wikilink? If so, remove it.
local temp = text:match("^%[%[(.-)%]%]$")
-- Note: it's possible for "[[" or "]]" to be uninvolved in links, so we need to check for both individually (e.g. "[[aaa]] bb]]" would not have a redundant wikilink).
if temp and not (temp:find("%[%[") or temp:find("%]%]")) then
text, data.alt = temp:match("^([^|]+)|?(.-)$")
track("redundant wikilink")
if data.alt == "" then data.alt = nil end
end
-- Do we have embedded wikilinks?
if text:find("%[%[.-%]%]") then
text = process_embedded_links(text, data, allow_self_link, allReconstructed)
-- If not, make a link using the parameters.
else
text = cond_trim(text)
data.alt = data.alt and cond_trim(data.alt)
text = makeLangLink({ target = text, display = data.alt }, data.lang, data.sc, data.id, allow_self_link, true)
end
return text
end
function export.plain_link(data, allow_self_link)
if type(data) ~= "table" then
error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
end
local text = data.term
if (not data.lang) or data.lang:getCode() ~= "und" then
data.lang = require("Module:languages").getByCode("und")
end
data.sc = data.sc or require("Module:scripts").findBestScriptWithoutLang(text)
-- Do we have a redundant wikilink? If so, remove it.
local temp = text:match("^%[%[(.-)%]%]$")
-- Note: it's possible for "[[" or "]]" to be uninvolved in links, so we need to check for both individually (e.g. "[[aaa]] bb]]" would not have a redundant wikilink).
if temp and not (temp:find("%[%[") or temp:find("%]%]")) then
text, data.alt = temp:match("^([^|]+)|?(.-)$")
track("redundant wikilink")
if data.alt == "" then data.alt = nil end
end
-- Do we have embedded wikilinks?
if text:find("%[%[.-%]%]") then
text = process_embedded_links(text, data, allow_self_link, nil, true)
-- If not, make a link using the parameters.
else
text = cond_trim(text)
data.alt = data.alt and cond_trim(data.alt)
text = makePlainLink({ target = text, display = data.alt }, data.lang, data.sc, allow_self_link, true)
end
return text
end
-- Replace any links with links to the correct section, but don't link the whole text if no embedded links are found. Returns the display text form.
function export.embedded_language_links(data, allow_self_link)
if type(data) ~= "table" then
error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
end
local text = data.term
data.sc = data.sc or data.lang:findBestScript(text)
-- Do we have embedded wikilinks?
if text:find("%[%[.-%]%]") then
text = process_embedded_links(text, data, allow_self_link)
else
-- If there are no embedded wikilinks, return the display text.
text = cond_trim(text)
text = (data.lang:makeDisplayText(text, data.sc))
end
return text
end
function export.mark(text, itemType, face, lang)
local tag = { "", "" }
if itemType == "gloss" then
tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">',
'</span><span class="mention-gloss-double-quote">”</span>' }
elseif itemType == "tr" then
if face == "term" then
tag = { '<span lang="' .. lang:getCode() .. '" class="tr mention-tr Latn">',
'</span>' }
else
tag = { '<span lang="' .. lang:getCode() .. '" class="tr Latn">', '</span>' }
end
elseif itemType == "ts" then
tag = { '<span class="ts mention-ts Latn">/', '/</span>' }
elseif itemType == "pos" then
tag = { '<span class="ann-pos">', '</span>' }
elseif itemType == "annotations" then
tag = { '<span class="mention-gloss-paren annotation-paren">(</span>',
'<span class="mention-gloss-paren annotation-paren">)</span>' }
end
if type(text) == "string" then
return tag[1] .. text .. tag[2]
else
return ""
end
end
-- Format the annotations (things following the linked term)
function export.format_link_annotations(data, face)
local output = {}
-- Interwiki link
if data.interwiki then
table_insert(output, data.interwiki)
end
-- Genders
if type(data.genders) ~= "table" then
data.genders = { data.genders }
end
if data.genders and #data.genders > 0 then
local m_gen = require("Module:gender and number")
table_insert(output, " " .. m_gen.format_list(data.genders, data.lang))
end
local annotations = {}
-- Transliteration and transcription
if data.tr and data.tr[1] or data.ts and data.ts[1] then
local kind
if face == "term" then
kind = face
else
kind = "default"
end
if data.tr[1] and data.ts[1] then
table_insert(annotations,
require("Module:script utilities").tag_translit(data.tr[1], data.lang, kind)
.. " " .. export.mark(data.ts[1], "ts"))
elseif data.ts[1] then
table_insert(annotations, export.mark(data.ts[1], "ts"))
else
table_insert(annotations,
require("Module:script utilities").tag_translit(data.tr[1], data.lang, kind))
end
end
-- Gloss/translation
if data.gloss then
table_insert(annotations, export.mark(data.gloss, "gloss"))
end
-- Part of speech
if data.pos then
-- debug category for pos= containing transcriptions
if data.pos:find("/[^><]*/") then
data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]"
end
pos_tags = pos_tags or mw.loadData("Module:links/data").pos_tags
table_insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos"))
end
-- Literal/sum-of-parts meaning
if data.lit then
table_insert(annotations, "literally " .. export.mark(data.lit, "gloss"))
end
if #annotations > 0 then
table_insert(output, " " .. export.mark(table_concat(annotations, ", "), "annotations"))
end
return table_concat(output)
end
-- A version of {{l}} or {{m}} that can be called from other modules too
function export.full_link(data, face, allow_self_link, no_check_redundant_translit)
if type(data) ~= "table" then
error("The first argument to the function full_link must be a table. "
.. "See Module:links/documentation for more information.")
end
local multiparams = {"term", "alt", "sc", "tr", "ts"}
local terms = {true}
-- Generate multiple forms if applicable.
for i = 1, 2 do
if (type(data[multiparams[i]]) == "string" and data[multiparams[i]]:find("//")) then
local u = uchar
data[multiparams[i]] = data[multiparams[i]]
:gsub("\\\\//", u(0xE000) .. "//")
:gsub("\\//", u(0xE001))
data[multiparams[i]] = mw.text.split(data[multiparams[i]], "//") or {}
for j, subparam in ipairs(data[multiparams[i]]) do
data[multiparams[i]][j] = subparam
:gsub(u(0xE000), "\\")
:gsub(u(0xE001), "//")
if subparam == "" then data[multiparams[i]][j] = nil end
end
elseif type(data[multiparams[i]]) == "string" and not (type(data.term) == "string" and data.term:find("//")) then
data[multiparams[i]] = data.lang:generateForms(data[multiparams[i]])
elseif type(data[multiparams[i]]) == "table" and #data[multiparams[i]] == 1 then
data[multiparams[i]] = data.lang:generateForms(data[multiparams[i]][1])
end
end
for i, multiparam in ipairs(multiparams) do
data[multiparam] = data[multiparam] or {}
if type(data[multiparam]) == "string" then
data[multiparam] = {data[multiparam]}
elseif data[multiparam] and data[multiparam]._type == "script object" then
data[multiparam] = {data[multiparam]}
end
for i, subparam in pairs(data[multiparam]) do
terms[i] = true
end
end
-- Create the link
local output = {}
local categories = {}
local link = ""
local annotations
local phonetic_extraction = mw.loadData("Module:links/data").phonetic_extraction
for i in ipairs(terms) do
-- Is there any text to show?
if (data.term[i] or data.alt[i]) then
-- Try to detect the script if it was not provided
local best = data.lang:findBestScript(data.alt[i] or data.term[i])
if not data.sc[i] then
data.sc[i] = best
else
-- Track uses of sc parameter
track("sc")
if data.sc[i]:getCode() == best:getCode() then
track("sc/redundant", data.sc[i]:getCode())
else
track("sc/needed", data.sc[i]:getCode())
end
end
-- If using a discouraged character sequence, add to maintenance category
if data.sc[i]:hasNormalizationFixes() == true then
if (data.term[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.term[i])) ~= toNFC(data.term[i])) or (data.alt[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.alt[i])) ~= toNFC(data.alt[i])) then
table.insert(categories, "[[Category:Pages using discouraged character sequences]]")
end
end
local class = ""
-- Encode certain characters to avoid various delimiter-related issues at various stages. We need to encode < and >
-- because they end up forming part of CSS class names inside of <span ...> and will interfere with finding the end
-- of the HTML tag. I first tried converting them to URL encoding, i.e. %3C and %3E; they then appear in the URL as
-- %253C and %253E, which get mapped back to %3C and %3E when passed to [[Module:accel]]. But mapping them to <
-- and > somehow works magically without any further work; they appear in the URL as < and >, and get passed to
-- [[Module:accel]] as < and >. I have no idea who along the chain of calls is doing the encoding and decoding. If
-- someone knows, please modify this comment appropriately!
local encode_accel_char_map = {
["%"] = ".",
[" "] = "_",
["<"] = "<",
[">"] = ">",
}
local function encode_accel_param_chars(param)
local retval = param:gsub("[% <>]", encode_accel_char_map) -- discard second return value
return retval
end
local function encode_accel_param(prefix, param)
if not param then
return ""
end
if type(param) == "table" then
local filled_params = {}
-- There may be gaps in the sequence, especially for translit params.
local maxindex = 0
for k, v in pairs(param) do
if type(k) == "number" and k > maxindex then
maxindex = k
end
end
for i=1,maxindex do
filled_params[i] = param[i] or ""
end
-- [[Module:accel]] splits these up again.
param = table.concat(filled_params, "*~!")
end
-- This is decoded again by [[WT:ACCEL]].
return prefix .. encode_accel_param_chars(param)
end
if data.accel then
local form = data.accel.form and encode_accel_param_chars(data.accel.form) .. "-form-of" or ""
local gender = encode_accel_param("gender-", data.accel.gender)
local pos = encode_accel_param("pos-", data.accel.pos)
local translit = encode_accel_param("transliteration-",
data.accel.translit or (data.tr[i] ~= "-" and data.tr[i] or nil))
local target = encode_accel_param("target-", data.accel.target)
local lemma = encode_accel_param("origin-", data.accel.lemma)
local lemma_translit = encode_accel_param("origin_transliteration-", data.accel.lemma_translit)
local no_store = data.accel.no_store and "form-of-nostore" or ""
local accel =
form .. " " ..
gender .. " " ..
pos .. " " ..
translit .. " " ..
target .. " " ..
lemma .. " " ..
lemma_translit .. " " ..
no_store .. " "
class = "form-of lang-" .. data.lang:getCode() .. " " .. accel
end
-- Only make a link if the term has been given, otherwise just show the alt text without a link
local term_data = {term = data.term[i], alt = data.alt[i], lang = data.lang, sc = data.sc[i], id = data.id, genders = data.genders, tr = data.tr[i], ts = data.ts[i], gloss = data.gloss, pos = data.pos, lit = data.lit, accel = data.accel, interwiki = data.interwiki}
link = require("Module:script utilities").tag_text(
data.term[i] and export.language_link(term_data, allow_self_link)
or data.alt[i], data.lang, data.sc[i], face, class)
else
--[[ No term to show.
Is there at least a transliteration we can work from? ]]
link = require("Module:script utilities").request_script(data.lang, data.sc[i])
if link == "" or not data.tr[i] or data.tr[i] == "-" then
-- No link to show, and no transliteration either. Show a term request.
local category = ""
if mw.title.getCurrentTitle().nsText ~= "Template" then
table_insert(categories, "[[Category:" .. data.lang:getCanonicalName() .. " term requests]]")
end
link = "<small>[Term?]</small>"
end
end
table_insert(output, link)
if i < #terms then table_insert(output, "<span class=\"Zsym mention\" style=\"font-size:100%;\">/</span>") end
end
-- TODO: Currently only handles the first transliteration, pending consensus on how to handle multiple translits for multiple forms, as this is not always desirable (e.g. traditional/simplified Chinese).
if data.tr[1] == "" or data.tr[1] == "-" then
data.tr[1] = nil
elseif phonetic_extraction[data.lang:getCode()] then
local m_phonetic = require(phonetic_extraction[data.lang:getCode()])
data.tr[1] = data.tr[1] or m_phonetic.getTranslit(export.remove_links(data.alt[1] or data.term[1]))
elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then
-- Track whenever there is manual translit. The categories below like 'terms with redundant transliterations'
-- aren't sufficient because they only work with reference to automatic translit and won't operate at all in
-- languages without any automatic translit, like Persian and Hebrew.
if data.tr[1] then
track("manual-tr", data.lang:getCode())
end
-- Try to generate a transliteration, unless transliteration has been supplied and no_check_redundant_translit is
-- given. (Checking for redundant transliteration can use up significant amounts of memory so we don't want to do
-- it if memory is tight. `no_check_redundant_translit` is currently set when called ultimately from
-- {{multitrans|...|no-check-redundant-translit=1}}.)
if not (data.tr[1] and no_check_redundant_translit) then
local text = data.alt[1] or data.term[1]
if not data.lang:link_tr() then
text = export.remove_links(text, true)
end
local automated_tr, tr_categories
automated_tr, data.tr_fail, tr_categories = data.lang:transliterate(text, data.sc[1])
if automated_tr or data.tr_fail then
local manual_tr = data.tr[1]
if manual_tr then
if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then
table_insert(categories,
"[[Category:Terms with redundant transliterations]]"
.. "[[Category:Terms with redundant transliterations/" .. data.lang:getCode() .. "]]")
elseif not data.tr_fail then
-- Prevents Arabic root categories from flooding the tracking categories.
if mw.title.getCurrentTitle().nsText ~= "Category" then
table_insert(categories,
"[[Category:Terms with manual transliterations different from the automated ones]]"
.. "[[Category:Terms with manual transliterations different from the automated ones/" .. data.lang:getCode() .. "]]")
end
end
end
if (not manual_tr) or data.lang:overrideManualTranslit() then
data.tr[1] = automated_tr
for _, category in ipairs(tr_categories) do
table_insert(categories, category)
end
end
end
end
end
-- Link to the transliteration entry for languages that require this
if data.tr[1] and data.lang:link_tr() and not (data.tr[1]:match("%[%[(.-)%]%]") or data.tr_fail) then
data.tr[1] = export.language_link{lang = data.lang, term = data.tr[1]}
elseif data.tr[1] and not (data.lang:link_tr() or data.tr_fail) then
-- Remove the pseudo-HTML tags added by remove_links.
data.tr[1] = data.tr[1]:gsub("</?link>", "")
end
if data.tr[1] and mw.ustring.gsub(data.tr[1], "[%s%p]", ""):len() == 0 then data.tr[1] = nil end
table_insert(output, export.format_link_annotations(data, face))
return table_concat(output) .. table_concat(categories)
end
-- Strips links: deletes category links, the targets of piped links, and any double square brackets involved in links (other than file links, which are untouched). If `tag` is set, then any links removed will be given pseudo-HTML tags, which allow the substitution functions in [[Module:languages]] to properly subdivide the text in order to reduce the chance of substitution failures in modules which scrape pages like [[Module:zh-translit]].
-- FIXME: This is quite hacky. We probably want this to be integrated into [[Module:languages]], but we can't do that until we know that nothing is pushing pipe linked transliterations through it for languages which don't have link_tr set.
function export.remove_links(text, tag)
if type(text) == "table" then
text = text.args[1]
end
if not text or text == "" then
return ""
end
text = text
:gsub("%[%[", "\1")
:gsub("%]%]", "\2")
-- Parse internal links for the display text.
text = text:gsub("(\1)([^\1\2]-)(\2)",
function(c1, c2, c3)
-- Don't remove files.
for _, falsePositive in ipairs({"file", "image"}) do
if c2:lower():match("^" .. falsePositive .. ":") then return c1 .. c2 .. c3 end
end
-- Remove categories completely.
for _, falsePositive in ipairs({"category", "cat"}) do
if c2:lower():match("^" .. falsePositive .. ":") then return "" end
end
-- In piped links, remove all text before the pipe, unless it's the final character (i.e. the pipe trick), in which case just remove the pipe.
c2 = c2:match("^[^|]*|(.+)") or c2:match("([^|]+)|$") or c2
if tag then
return "<link>" .. c2 .. "</link>"
else
return c2
end
end)
text = text
:gsub("\1", "[[")
:gsub("\2", "]]")
return text
end
--[=[
This decodes old section encodings.
For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål.
It isn't picky about whether the section encodings represent the UTF-8 encoding
of a real Unicode character, so it will mangle section names that contain
a period followed by two uppercase hex characters. At least such section names
are probably pretty rare.
Wiktionary adds an additional id="" attribute for sections
using a legacy encoding, if it is different from the modern minimally modified attribute.
It is like percent encoding (URI or URL encoding) except with "." instead of "%".
See [[mw:Manual:$wgFragmentMode]] and the code that does the encoding at
https://gerrit.wikimedia.org/r/plugins/gitiles/mediawiki/core/+/7bf779524ab1fd8e1d74f79ea4840564d48eea4d/includes/parser/Sanitizer.php#893
]=]
-- The character class %x should not be used, as it includes the characters a-f,
-- which do not occur in these anchor encodings.
local capitalHex = "[0-9A-F]"
local function decodeAnchor(anchor)
return (anchor:gsub("%.(" .. capitalHex .. capitalHex .. ")",
function(hexByte)
return string.char(tonumber(hexByte, 16))
end))
end
function export.section_link(link)
if type(link) ~= "string" then
error("The first argument to section_link was a " .. type(link) .. ", but it should be a string.")
end
link = link:gsub("_", " ")
local numberSigns = select(2, link:gsub("#", ""))
if numberSigns > 1 then
error("The section link should only contain one number sign (#).")
end
link = mw.uri.decode(link, "WIKI")
local page, section = link:match("^([^#]*)#(.+)$")
if page == "" then
page = nil
end
if section then
section = decodeAnchor(section)
-- URI-encode (percent-encode) section to allow square brackets and
-- other dodgy characters in section name.
-- If not percent-encoded, they prevent the parser from creating a link.
-- Decode percent-encoding in the displayed text
if page then
return "[[" .. page .. "#" .. mw.uri.encode(section, "WIKI")
.. "|" .. page .. " § " .. section .. "]]"
else
return "[[#" .. mw.uri.encode(section, "WIKI")
.. "|§ " .. section .. "]]"
end
else
error("The function “section_link” could not find a number sign marking a section name.")
end
end
return export