Module:Multilingual description
Jump to navigation
Jump to search
Documentation for this module may be created at Module:Multilingual description/doc
local p = {}
--[==[
Remap some "valid" language codes that are still unknown, but are known by another code
in order to get a visible language name (and if possible, BCP 47 conformance)!
--]==]
local remappedLanguages = {
['als'] = 'gsw', -- known code (unfortunately broken in Wikimedia) to changed new code (also known, but conforming to BCP 47); 'als' on Wikimedia for Alemannic conflicts with the standard 'als' which actually refers to the unrelated stardard variant of the Albanian language.
['nrm'] = 'nrf', -- known code (unfortunately broken in Wikimedia) to changed new code (also known, but conforming to BCP 47); 'nrm' on Wikimedia for Norman conflicts with the standard 'nrm' which actually refers to the unrelated Narom language
['bat-smg'] = 'sgs', -- legacy interwiki still supported, newer conforming code now supported as well in interwiki
['be-x-old'] = 'be-tarask', -- legacy interwiki still supported, newer conforming code now supported as well in interwiki
['fiu-vro'] = 'vro', -- legacy interwiki still supported, newer conforming code now supported as well in interwiki
['roa-rup'] = 'rup', -- code not conforming to BCP 47 (legacy interwiki still supported, newer code supported as well), the new standard code should be used (and is now recognized as interwiki)
['roa-tara'] = 'nap-taran', -- code not conforming to BCP 47 (continental variant of Neapolitan), should be replaced by a conforming variant code
['zh-classical'] = 'lzh', -- code not conforming to BCP 47, replaced by standard code also supported in interwiki
['bh'] = 'bho', -- legacy interwiki still supported but ambiguous as a family, newer code now supported as well in interwiki; 'bh' was used in Wikimedia to refer to Bhojpuri only and not the whole Bihari family
['bu'] = 'my', -- legacy code from ISO 639 deprecated, newer code is prefered and used as interwiki
['iw'] = 'he', -- legacy code from ISO 639 deprecated, newer code is prefered and used as interwiki
['jw'] = 'jv', -- legacy code from ISO 639 deprecated, newer code is prefered and used as interwiki
['zh-min-nan'] = 'nan', -- legacy BCP 47 code, valid but deprecated in favor of new code also supported in interwiki
['zh-wuu'] = 'wuu', -- legacy BCP 47 code, valid but deprecated in favor of new code also supported in interwiki
['zh-yue'] = 'yue', -- legacy BCP 47 code, valid but deprecated in favor of new code also supported in interwiki
-- Standard BCP 47 codes that can still not be used in MediaWiki, and must be replaced for now by valid BCP 47 codes (ignoring the specific variant).
['en-us'] = 'en', -- both codes are conforming and supported, only the second one is known (there's no support for the US variant which is implicit, unlike variants in CA, GB, IN, ZA)
['fa-af'] = 'fa', -- both codes are conforming and supported, only the second one is known, actually means "Eastern Dari"
['fr-x-galo'] = 'fr', -- both codes are conforming and supported, only the second one is known
['ha-latn'] = 'ha', -- both codes are conforming and supported, only the second one is known, the Latin script is the default since the 1950s
['ha-arab'] = 'ha', -- both codes are conforming and supported, only the second one is known, the Arabic script is historic, without clear orthography
['ko-kr'] = 'ko', -- both codes are conforming and supported, only the second one is known, regional variant used in South Korea (kr-kp for the variant in North Korea is supported)
['ku-cyrl'] = 'ku', -- both codes are conforming and supported, only the second one is known, the Cyrillic script is still used
['ku-latn'] = 'ku', -- both codes are conforming and supported, only the second one is known, the Latin script is the default since the 1950s
['no'] = 'nb', -- both codes are conforming and supported, but the 1st one is now used only for meaning the second one in MediaWiki
['prd'] = 'fa', -- both codes are conforming and supported, only the second one is known; 'prd' is "Parsi-Dari", and means the same as 'fa-af'
['tgl'] = 'tl', -- both codes are conforming and supported, only the second one is known (but may have alternate forms written in the Tagal script and not Latin for modern Filipino)
-- Standard BCP 47 codes for variants that can still not be used in MediaWiki, and must be replaced for now by legacy codes, valid only in Wikimedia wikis but not conforming to BCP 47.
['sr-cyrl'] = 'sr-ec', -- this alternate known code is non-standard and in fact not supported, but has a correct native name
['sr-latn'] = 'sr-el', -- same remark
}
local sortedKnownLanguageTags = require('Module:Multilingual description/sort')
local dir = require('Module:Dir').select
local function addDescription(descriptions, lang, description, update)
if type(description) == 'string' then
if mw.text.trim(description):len() > 0 then
table.insert(descriptions, mw.getCurrentFrame():expandTemplate{
title = 'Ls',
args = {
lang,
description,
dir = dir(lang, 'rtl', 'ltr'),
classes = 'description',
update = update
}
})
end
end
end
-- Kind is either 'deprecated', 'conflicting', or 'unsupported'.
local function addTracking(descriptions, kind)
table.insert(descriptions, '[[Category:Multilingual descriptions using ' .. kind .. ' language codes]]')
end
local function _mld(args)
--[==[
Shallow copy of arguments (because keys in args cannot be unset if args is hollow, in a parent
frame outside Lua). DO NOT copy the metatable that exposes only a *read-only* interface with
accessors to PHP arrays (mw.clone does NOT work)!
--]==]
local descriptions, conflicting = {}, false
for lang, description in pairs(args) do
if type(lang) == 'string' and type(description) == 'string' then
--[==[
MediaWiki trims the names of named argument and their values, but does not remove HTML comments
in these names (some Mld contain parameters like "| sk <!--Slovensko--> = ...") or "nowiki" tags.
After removing them, we still need to trim the rest in language codes and in descriptions, to
detect conflicting descriptions for the same language code.
--]==]
description = description
:gsub('<!%-%-.-%-%->', '')
:gsub('</?nowiki%s*/?>', '')
:gsub('^%s*(.-)%s*$','%1')
--[==[
Split multiple language codes (or default) assigned with the same description.
Valid BCP 47 language codes contain only ASCII letters, digits, hyphens or
underscores: canonicalize them to lowercase with hyphens replacing underscores
(other characters are considered separators between language codes).
--]==]
for code in lang
:gsub('<!%-%-.-%-%->', '')
:gsub('</?nowiki%s*/?>', '')
:gsub('_', '-'):lower():gmatch("([%-0-9a-z]+)") do
--[==[
Detect conflicting descriptions, like "|en,default=OK|en=Bad"
(whose result is unpredictable, as all keys are in random order).
--]==]
if descriptions[code] ~= nil and descriptions[code] ~= description then
conflicting = true
end
descriptions[code] = description
end
end
end
args, descriptions = descriptions, {}
--[==[ Remap legacy language codes if there's no conflict. ]==]
local remapped = false
for cur, alt in pairs(remappedLanguages) do
if args[cur] and not(mw.language.isSupportedLanguage(cur) and mw.language.isKnownLanguageTag(cur))
and (mw.language.isSupportedLanguage(alt) and mw.language.isKnownLanguageTag(alt)) then
if args[alt] == nil then -- only if this does not conflict
args[alt] = args[cur] -- set description for the alternate known language
else
remapped = true -- signal only in case of conflict
conflicting = true
end
args[cur] = nil -- unset the description for the initial language code
end
end
--[==[ First all known languages in order if they have description. ]==]
for _, lang in ipairs(sortedKnownLanguageTags) do
if args[lang] ~= nil then
addDescription(descriptions, lang, args[lang], nil)
args[lang] = nil
end
end
--[==[ Append other unknown languages, but only if they are supported. ]==]
local unsupported = false
for lang, description in pairs(args) do
if mw.language.isSupportedLanguage(lang) then
addDescription(descriptions, lang, description, nil)
else
addDescription(descriptions, lang, description, lang)
unsupported = true
end
end
if conflicting then
addTracking(descriptions, 'conflicting')
end
if remapped then
addTracking(descriptions, 'deprecated')
end
if unsupported then
addTracking(descriptions, 'unsupported')
end
--mw.logObject(descriptions)
return table.concat(descriptions)
end
function p.mld(frame)
local args = (frame:getParent() or {}).args or {}
return _mld(args)
end
setmetatable(p, {quickTests = function()
local input = {
[1] = 'One?', -- discarded (no support for language numeric keys)
unsupported = 'What?', -- unsupported
en = ' ', -- empty description after trimming (discarded)
als = 'GSW', -- will be remapped
['en-gb '] = 'EN-GB', -- trimming at end
['en-ca <nowiki/>'] = 'EN-CA',
[' de'] = 'DE', -- trimming at start
fr = 'FR',
[' fr '] = 'FR', -- trimming both ends (description not conflicting)
rue = 'RUE',
ru = 'RU',
ko = 'KO',
ja = 'JA',
zh = 'ZH',
['he,iw'] = 'HE', -- 'iw' remapped to 'he' (description not conflicting)
ur = 'UR',
ar = 'AR',
ro = 'RO',
['be-tarask'] = 'BE-TARASK',
['be-x-old'] = 'BE-X-OLD (deprecated)',
dv = 'DV',
}
local expect = {}
--[==[
This is the exact order to expect according to native language names,
and after discarding empty descriptions or unsupported language codes.
--]==]
addDescription(expect, 'gsw', 'GSW') --[[Alemannisch]] -- remapped
addDescription(expect, 'en-gb', 'EN-GB') --[[British English]]
addDescription(expect, 'en-ca', 'EN-CA') --[[Canadian English]]
addDescription(expect, 'de', 'DE') --[[Deutsch]]
addDescription(expect, 'fr', 'FR') --[[français]]
addDescription(expect, 'ro', 'RO') --[[română]]
addDescription(expect, 'be-tarask', 'BE-TARASK') --[[беларуская (тарашкевіца)]]
-- addDescription(expect, 'be-x-old', 'BE-X-OLD (deprecated)') --[[беларуская (тарашкевіца)]] -- discarded due to conflict
addDescription(expect, 'rue', 'RUE') --[[русиньскый]]
addDescription(expect, 'ru', 'RU') --[[русский]]
addDescription(expect, 'ko', 'KO') --[[한국어]]
addDescription(expect, 'ja', 'JA') --[[日本語]]
addDescription(expect, 'zh', 'ZH') --[[中文]]
addDescription(expect, 'he', 'HE') --[[עברית]]
addDescription(expect, 'ur', 'UR') --[[اردو]]
addDescription(expect, 'ar', 'AR') --[[العربية]]
addDescription(expect, 'dv', 'DV') --[[ދިވެހިބަސް]]
--[==[
Note that unknown/unsorted languages may occur here in unpredictable
order at end of this list, but only if they are "supported" (other
will be discarded). So we can only test for the presence of one such
item.
--]==]
addDescription(expect, 'unsupported', 'What?', 'unsupported')
addTracking(expect, 'conflicting')
addTracking(expect, 'deprecated')
addTracking(expect, 'unsupported')
expect = table.concat(expect)
local actual = _mld(input)
if (actual ~= expect) then
mw.log('expect:\n' .. expect)
mw.log('actual:\n' .. actual)
return false
end
return true
end})
--[==[ Type this to run tests in the Lua console:
=getmetatable(p).quickTests() -- should return true
--]==]
return p