모듈:languages: 두 판 사이의 차이

내용 삭제됨 내용 추가됨
Dolab0526 (토론 | 기여)
잔글편집 요약 없음
Dolab0526 (토론 | 기여)
편집 요약 없음
태그: 되돌려진 기여
227번째 줄:
return text
end
 
-- Check if the raw text is an unsupported title, and if so return that. Otherwise, remove HTML entities. We do the pre-conversion to avoid loading the unsupported title list unnecessarily.
local function checkNoEntities(self, text)
줄 338 ⟶ 339:
end
 
--[==[Returns the canonical name of the language. This is the name used to represent that language on Wiktionary, and is guaranteed to be unique to that language alone. Example: {{code|lua|"French"}} for French.]==]
function Language:getCanonicalName()
local name = self._name
return self._rawData.canonicalName
if name == nil then
name = self._rawData[1]
self._name = name
end
return name
end
 
--[==[
Return the display form of the language. The display form of a language, family or script is the form it takes when
appearing as the <code><var>source</var></code> in categories such as <code>English terms derived from
<var>source</var></code> or <code>English given names from <var>source</var></code>, and is also the displayed text
in {makeCategoryLink()} links. For full and etymology-only languages, this is the same as the canonical name, but
for families, it reads <code>"<var>name</var> languages"</code> (e.g. {"Indo-Iranian languages"}), and for scripts,
it reads <code>"<var>name</var> script"</code> (e.g. {"Arabic script"}).
]==]
function Language:getDisplayForm()
local form = self._displayForm
if form == nil then
form = self:getCanonicalName()
-- Add article and " substrate" to substrates that lack them.
if self:getFamilyCode() == "qfa-sub" then
if not (match(form, "^[Tt]he ") or match(form, "^[Aa] ")) then
form = "a " .. form
end
if not match(form, "[Ss]ubstrate") then
form = form .. " substrate"
end
end
self._displayForm = form
end
return form
end
 
--[==[Returns a table of the "other names" that the language is known by, excluding the canonical name. The names are not guaranteed to be unique, in that sometimes more than one language is known by the same name. Example: {{code|lua|{"Manx Gaelic", "Northern Manx", "Southern Manx"} }} for [[:Category:Manx language|Manx]]. If <code>onlyOtherNames</code> is given and is non-{{code|lua|nil}}, only names explicitly listed in the <code>otherNames</code> field are returned; otherwise, names listed under <code>otherNames</code>, <code>aliases</code> and <code>varieties</code> are combined together and returned. For example, for Manx, Manx Gaelic is listed as an alias, while Northern Manx and Southern Manx are listed as varieties. It should be noted that the <code>otherNames</code> field itself is deprecated, and entries listed there should eventually be moved to either <code>aliases</code> or <code>varieties</code>.]==]
function Language:getOtherNames()
function Language:getOtherNames(onlyOtherNames)
return self._rawData.otherNames or {}
if #self._stack == 1 then
self:loadInExtraData()
end
return require("Module:language-like").getOtherNames(self, onlyOtherNames)
end
 
--[==[Returns a table of the aliases that the language is known by, excluding the canonical name. Aliases are synonyms for the language in question. The names are not guaranteed to be unique, in that sometimes more than one language is known by the same name. Example: {{code|lua|{"High German", "New High German", "Deutsch"} }} for [[:Category:German language|German]].]==]
function Language:getAliases()
if #self._stack == 1 then
self:loadInExtraData()
end
return self._rawData.aliases or (self._extraData and self._extraData.aliases) or {}
end
 
--[==[
Return a table of the known subvarieties of a given language, excluding subvarieties that have been given
explicit etymology-only language codes. The names are not guaranteed to be unique, in that sometimes a given name
refers to a subvariety of more than one language. Example: {{code|lua|{"Southern Aymara", "Central Aymara"} }} for
[[:Category:Aymara language|Aymara]]. Note that the returned value can have nested tables in it, when a subvariety
goes by more than one name. Example: {{code|lua|{"North Azerbaijani", "South Azerbaijani", {"Afshar", "Afshari",
"Afshar Azerbaijani", "Afchar"}, {"Qashqa'i", "Qashqai", "Kashkay"}, "Sonqor"} }} for
[[:Category:Azerbaijani language|Azerbaijani]]. Here, for example, Afshar, Afshari, Afshar Azerbaijani and Afchar
all refer to the same subvariety, whose preferred name is Afshar (the one listed first). To avoid a return value
with nested tables in it, specify a non-{{code|lua|nil}} value for the <code>flatten</code> parameter; in that case,
the return value would be {{code|lua|{"North Azerbaijani", "South Azerbaijani", "Afshar", "Afshari",
"Afshar Azerbaijani", "Afchar", "Qashqa'i", "Qashqai", "Kashkay", "Sonqor"} }}.
]==]
function Language:getVarieties(flatten)
if #self._stack == 1 then
self:loadInExtraData()
end
return require("Module:language-like").getVarieties(self, flatten)
end
 
--[==[Returns a table of types as a lookup table (with the types as keys).
줄 383 ⟶ 446:
types["etymology-only"] = true
end
--for t in gmatch(self._rawData.type이 nil일 경우 빈type, 문자열로"[^,]+") 처리do
for t in gmatch(self._rawData.type or "", "[^,]+") do
types[t] = true
end
줄 391 ⟶ 453:
return types
end
 
 
--[==[Given a list of types as strings, returns true if the language has all of them.]==]
줄 404 ⟶ 465:
end
 
--[==[Returns a table containing <code>WikimediaLanguage</code> objects (see [[Module:wikimedia languages]]), which represent languages and their codes as they are used in Wikimedia projects for interwiki linking and such. More than one object may be returned, as a single Wiktionary language may correspond to multiple Wikimedia languages. For example, Wiktionary's single code <code>sh</code> (Serbo-Croatian) maps to four Wikimedia codes: <code>sh</code> (Serbo-Croatian), <code>bs</code> (Bosnian), <code>hr</code> (Croatian) and <code>sr</code> (Serbian).
 
The code for the Wikimedia language is retrieved from the <code>wikimedia_codes</code> property in the data modules. If that property is not present, the code of the current language is used. If none of the available codes is actually a valid Wikimedia code, an empty table is returned.]==]
function Language:getWikimediaLanguages()
iflocal notwm_langs = self._wikimediaLanguageObjects then
if wm_langs == nil then
local m_wikimedia_languages = require("Module:wikimedia languages")
local get_wm_lang = require("Module:wikimedia languages").getByCode
self._wikimediaLanguageObjects = {}
local wikimedia_codescodes = self._rawData.wikimedia_codes or { self._code }:getWikimediaLanguageCodes()
wm_langs = {}
for _,i wlangcode= in1, ipairs(wikimedia_codes)#codes do
wm_langs[i] = get_wm_lang(codes[i])
table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode))
end
self._wikimediaLanguageObjects = wm_langs
end
return wm_langs
return self._wikimediaLanguageObjects
end
 
function Language:getWikimediaLanguageCodes()
 
local wm_langs = self._wikimediaLanguageCodes
function Language:getWikipediaArticle()
if wm_langs == nil then
return self._rawData.wikipedia_article or mw.ustring.gsub(self:getCategoryName(), "Creole language", "Creole")
wm_langs = self._rawData.wikimedia_codes
wm_langs = wm_langs and split(wm_langs, ",", true, true) or {self._code}
self._wikimediaLanguageCodes = wm_langs
end
return wm_langs
end
 
--[==[
 
Returns the name of the Wikipedia article for the language. `project` specifies the language and project to retrieve
the article from, defaulting to {"enwiki"} for the English Wikipedia. Normally if specified it should be the project
code for a specific-language Wikipedia e.g. "zhwiki" for the Chinese Wikipedia, but it can be any project, including
non-Wikipedia ones. If the project is the English Wikipedia and the property {wikipedia_article} is present in the data
module it will be used first. In all other cases, a sitelink will be generated from {:getWikidataItem} (if set). The
resulting value (or lack of value) is cached so that subsequent calls are fast. If no value could be determined, and
`noCategoryFallback` is {false}, {:getCategoryName} is used as fallback; otherwise, {nil} is returned. Note that if
`noCategoryFallback` is {nil} or omitted, it defaults to {false} if the project is the English Wikipedia, otherwise
to {true}. In other words, under normal circumstances, if the English Wikipedia article couldn't be retrieved, the
return value will fall back to a link to the language's category, but this won't normally happen for any other project.
]==]
function Language:getWikipediaArticle(noCategoryFallback, project)
project = project or "enwiki"
local cached_value
if project == "enwiki" then
cached_value = self._wikipedia_article
if cached_value == nil then
cached_value = self._rawData.wikipedia_article
end
else
-- If the project isn't enwiki, default to no category fallback, but this can be overridden by specifying the
-- value `false`.
if noCategoryFallback == nil then
noCategoryFallback = true
end
if self._non_en_wikipedia_articles == nil then
self._non_en_wikipedia_articles = {}
end
cached_value = self._non_en_wikipedia_articles[project]
end
if cached_value == nil then -- not false
if self:getWikidataItem() and mw.wikibase then
cached_value = mw.wikibase.sitelink(self:getWikidataItem(), project)
end
if not cached_value then
cached_value = false
end
end
-- Now cache the determined value.
if project == "enwiki" then
self._wikipedia_article = cached_value
else
self._non_en_wikipedia_articles[project] = cached_value
end
if not cached_value and not noCategoryFallback then
return self:getCategoryName():gsub("Creole language", "Creole")
end
return cached_value or nil
end
 
function Language:makeWikipediaLink()
줄 430 ⟶ 546:
end
 
--[==[Returns the Wikidata item id for the language or <code>nil</code>. This corresponds to the the second field in the data modules.]==]
function Language:getWikidataItem()
local item = self._WikidataItem
if item == nil then
item = self._rawData[2]
-- If the value is nil, it's cached as false.
item = item ~= nil and (type(item) == "number" and "Q" .. item or item) or false
self._WikidataItem = item
end
return item or nil
end
 
--[==[Returns a table of <code>Script</code> objects for all scripts that the language is written in. See [[Module:scripts]].]==]
function Language:getScripts()
iflocal notscripts = self._scriptObjects then
if scripts == nil then
local m_scripts = require("Module:scripts")
local codes = self:getScriptCodes()
self._scriptObjects = {}
if codes[1] == "All" then
for scripts _,= sc in ipairsself:loadData(self._rawData."Module:scripts or { /data"None" }) do
else
table.insert(self._scriptObjects, m_scripts.getByCode(sc))
local get_script = require("Module:scripts").getByCode
scripts = {}
for i = 1, #codes do
scripts[i] = get_script(codes[i], nil, nil, self._useRequire)
end
end
self._scriptObjects = scripts
end
return scripts
return self._scriptObjects
end
 
--[==[Returns the table of script codes in the language's data file.]==]
function Language:getScriptCodes()
returnlocal self._rawData.scripts or= { "None" }self._scriptCodes
if scripts == nil then
scripts = self._rawData[4]
if scripts then
local codes, n = {}, 0
for code in gmatch(scripts, "[^,]+") do
n = n + 1
-- Special handling of "Hants", which represents "Hani", "Hant" and "Hans" collectively.
if code == "Hants" then
codes[n] = "Hani"
codes[n + 1] = "Hant"
codes[n + 2] = "Hans"
n = n + 2
else
codes[n] = code
end
end
scripts = codes
else
scripts = {"None"}
end
self._scriptCodes = scripts
end
return scripts
end
 
줄 579 ⟶ 736:
end
return family or nil
end
 
--[==[Check whether the language belongs to `family` (which can be a family code or object). A list of objects can be given in place of `family`; in that case, return true if the language belongs to any of the specified families. Note that some languages (in particular, certain creoles) can have multiple immediate ancestors potentially belonging to different families; in that case, return true if the language belongs to any of the specified families.]==]
function Language:inFamily(...)
--check_object("family", nil, ...)
for _, family in ipairs{...} do
if type(family) == "table" then
family = family:getCode()
end
local self_family_code = self:getFamilyCode()
if not self_family_code then
return false
elseif self_family_code == family then
return true
end
local self_family = self:getFamily()
if self_family:inFamily(family) then
return true
-- If the family isn't a real family (e.g. creoles) check any ancestors.
elseif self_family:getFamilyCode() == "qfa-not" then
local ancestors = self:getAncestors()
for _, ancestor in ipairs(ancestors) do
if ancestor:inFamily(family) then
return true
end
end
end
end
return false
end
 
function Language:getParent()
local parent = self._parentObject
if parent == nil then
parent = self:getParentCode()
-- If the value is nil, it's cached as false.
parent = parent and export.getByCode(parent, nil, true, true, self._useRequire) or false
self._parentObject = parent
end
return parent or nil
end
 
function Language:getParentCode()
local parent = self._parentCode
if parent == nil then
-- If the value is nil, it's cached as false.
parent = self._rawData[5] or false
self._parentCode = parent
end
return parent or nil
end
 
function Language:getParentName()
local parent = self._parentName
if parent == nil then
parent = self:getParent()
-- If the value is nil, it's cached as false.
parent = parent and parent:getCanonicalName() or false
self._parentName = parent
end
return parent or nil
end
 
function Language:getParentChain()
local chain = self._parentChain
if chain == nil then
chain = {}
local parent, n = self:getParent(), 0
while parent do
n = n + 1
chain[n] = parent
parent = parent:getParent()
end
self._parentChain = chain
end
return chain
end
 
function Language:hasParent(...)
--check_object("language", nil, ...)
for _, otherlang in ipairs{...} do
for _, parent in ipairs(self:getParentChain()) do
if type(otherlang) == "string" then
if otherlang == parent:getCode() then return true end
else
if otherlang:getCode() == parent:getCode() then return true end
end
end
end
return false
end
 
--[==[
If the language is etymology-only, this iterates through parents until a full language or family is found, and the
corresponding object is returned. If the language is a full language, then it simply returns itself.
]==]
function Language:getFull()
local full = self._fullObject
if full == nil then
full = self:getFullCode()
full = full == self._code and self or
export.getByCode(full, nil, nil, nil, self._useRequire)
self._fullObject = full
end
return full
end
 
줄 590 ⟶ 852:
end
 
--[==[
If the language is an etymology-only language, this iterates through parents until a full language or family is
found, and the corresponding canonical name is returned. If the language is a full language, then it simply returns
the canonical name of the language.
]==]
function Language:getFullName()
local full = self._fullName
if full == nil then
full = self:getFull():getCanonicalName()
self._fullName = full
end
return full
end
 
--[==[Returns a table of <code class="nf">Language</code> objects for all languages that this language is directly descended from. Generally this is only a single language, but creoles, pidgins and mixed languages can have multiple ancestors.]==]
function Language:getAncestors()
if not self._ancestorObjects then
self._ancestorObjects = {}
local ancestors = shallowcopy(self:getAncestorCodes())
if self._rawData.#ancestors > 0 then
for _, ancestor in ipairs(self._rawData.ancestors) do
table.insert(self._ancestorObjects, export.getByCode(ancestor), ornil, require("Module:etymologytrue, languages")nil, self.getByCode(ancestor_useRequire))
end
else
local fam = self:getFamily()
local protoLang = fam and fam:getProtoLanguage() or nil
-- For the cases where the current language is the proto-language
-- Forof theits casefamily, whereor thean currentetymology-only language that is theancestral proto-languageto that
-- of its familyproto-language, we need to step up a level higher right from the
-- start.
if protoLang and (
protoLang:getCode() == self:getCode()._code thenor
(self:hasType("etymology-only") and protoLang:hasAncestor(self))
) then
fam = fam:getFamily()
protoLang = fam and fam:getProtoLanguage() or nil
end
while not protoLang and not (not fam or fam:getCode() == "qfa-not") do
fam = fam:getFamily()
protoLang = fam and fam:getProtoLanguage() or nil
end
insert(self._ancestorObjects, protoLang)
table.insert(self._ancestorObjects, protoLang)
end
end
return self._ancestorObjects
end
 
do
local function iterateOverAncestorTree(node, func)
-- Avoid a language being its own ancestor via class inheritance. We only need to check for this if the language has inherited an ancestor table from its parent, because we never want to drop ancestors that have been explicitly set in the data.
for _, ancestor in ipairs(node:getAncestors()) do
-- Recursively iterate over ancestors until we either find self or run out. If self is found, return true.
if ancestor then
local function check_ancestor(self, lang)
local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func)
local codes = lang:getAncestorCodes()
if ret then
if not codes then
return ret
return nil
end
for i = 1, #codes do
local code = codes[i]
if code == self._code then
return true
end
local anc = export.getByCode(code, nil, true, nil, self._useRequire)
if check_ancestor(self, anc) then
return true
end
end
end
 
--[==[Returns a table of <code class="nf">Language</code> codes for all languages that this language is directly descended from. Generally this is only a single language, but creoles, pidgins and mixed languages can have multiple ancestors.]==]
function Language:getAncestorCodes()
if self._ancestorCodes then
return self._ancestorCodes
end
local codes = self._rawData.ancestors
if not codes then
codes = {}
self._ancestorCodes = codes
return codes
end
codes = split(codes, ",", true, true)
self._ancestorCodes = codes
if (
#codes == 0 or
#self._stack == 1 or
self._stack[#self._stack].ancestors
) then
return codes
end
local i, code = 1
while i <= #codes do
code = codes[i]
if check_ancestor(self, self) then
remove(codes, i)
else
i = i + 1
end
end
return codes
end
end
 
--[==[Given a list of language objects or codes, returns true if at least one of them is an ancestor. This includes any etymology-only children of that ancestor. If the language's ancestor(s) are etymology-only languages, it will also return true for those language parent(s) (e.g. if Vulgar Latin is the ancestor, it will also return true for its parent, Latin). However, a parent is excluded from this if the ancestor is also ancestral to that parent (e.g. if Classical Persian is the ancestor, Persian would return false, because Classical Persian is also ancestral to Persian).]==]
function Language:hasAncestor(...)
--check_object("language", nil, ...)
 
local function iterateOverAncestorTree(node, func, parent_check)
local ancestors = node:getAncestors()
local ancestorsParents = {}
for _, ancestor in ipairs(ancestors) do
local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func, parent_check)
if ret then return ret end
end
-- Check the parents of any ancestors. We don't do this if checking the parents of the other language, so that we exclude any etymology-only children of those parents that are not directly related (e.g. if the ancestor is Vulgar Latin and we are checking New Latin, we want it to return false because they are on different ancestral branches. As such, if we're already checking the parent of New Latin (Latin) we don't want to compare it to the parent of the ancestor (Latin), as this would be a false positive; it should be one or the other).
if not parent_check then
return nil
end
for _, ancestor in ipairs(ancestors) do
local ancestorParents = ancestor:getParentChain()
for _, ancestorParent in ipairs(ancestorParents) do
if ancestorParent:getCode() == self._code or ancestorParent:hasAncestor(ancestor) then
break
else
insert(ancestorsParents, ancestorParent)
end
end
end
for _, ancestorParent in ipairs(ancestorsParents) do
local ret = func(ancestorParent)
if ret then return ret end
end
end
 
local function do_iteration(otherlang, parent_check)
-- otherlang can't be self
if (type(otherlang) == "string" and otherlang or otherlang:getCode()) == self._code then
return false
end
repeat
if iterateOverAncestorTree(
self,
function(ancestor)
return ancestor:getCode() == (type(otherlang) == "string" and otherlang or otherlang:getCode())
end,
parent_check
) then
return true
elseif type(otherlang) == "string" then
otherlang = export.getByCode(otherlang, nil, true, nil, self._useRequire)
end
otherlang = otherlang:getParent()
parent_check = false
until not otherlang
end
 
local parent_check = true
for _, otherlang in ipairs{...} do
local ret = do_iteration(otherlang, parent_check)
if ret then
return true
end
end
return false
end
 
줄 635 ⟶ 1,018:
if not self._ancestorChain then
self._ancestorChain = {}
local step = #self:getAncestors() == 1 and self:getAncestors()[1] or nil
while true do
local ancestors = step:getAncestors()
while step do
step = #ancestors == 1 and ancestors[1] or nil
table.insert(self._ancestorChain, 1, step)
if not step then break end
step = #step:getAncestors() == 1 and step:getAncestors()[1] or nil
insert(self._ancestorChain, 1, step)
end
end
return self._ancestorChain
end
 
local function fetch_descendants(self, format)
local languages = require("Module:languages/code to canonical name")
local etymology_languages = require("Module:etymology languages/code to canonical name")
local families = require("Module:families/code to canonical name")
local descendants = {}
local family = self:getFamily()
-- Iterate over all three datasets.
for _, data in ipairs{languages, etymology_languages, families} do
for code in pairs(data) do
local lang = export.getByCode(code, nil, true, true, self._useRequire)
-- Test for a descendant. Earlier tests weed out most candidates, while the more intensive tests are only used sparingly.
if (
code ~= self._code and -- Not self.
lang:inFamily(family) and -- In the same family.
(
family:getProtoLanguageCode() == self._code or -- Self is the protolanguage.
self:hasDescendant(lang) or -- Full hasDescendant check.
(lang:getFullCode() == self._code and not self:hasAncestor(lang)) -- Etymology-only child which isn't an ancestor.
)
) then
if format == "object" then
insert(descendants, lang)
elseif format == "code" then
insert(descendants, code)
elseif format == "name" then
insert(descendants, lang:getCanonicalName())
end
end
end
end
return descendants
end
 
function Language:hasAncestorgetDescendants(otherlang)
if not self._descendantObjects then
local function compare(ancestor)
self._descendantObjects = fetch_descendants(self, "object")
return ancestor:getCode() == otherlang:getCode()
end
return self._descendantObjects
return iterateOverAncestorTree(self, compare) or false
end
 
function Language:getDescendantCodes()
if not self._descendantCodes then
self._descendantCodes = fetch_descendants(self, "code")
end
return self._descendantCodes
end
 
function Language:getCategoryNamegetDescendantNames()
if not self._descendantNames then
local name = self._rawData.canonicalName
self._descendantNames = fetch_descendants(self, "name")
-- If the name already has "language" in it, don't add it.
if name:find("[Ll]anguage$") then
return name
else
return name .. " language"
end
return self._descendantNames
end
 
function Language:hasDescendant(...)
for _, lang in ipairs{...} do
if type(lang) == "string" then
lang = export.getByCode(lang, nil, true, nil, self._useRequire)
end
if lang:hasAncestor(self) then
return true
end
end
return false
end
 
local function fetch_children(self, format)
function Language:getStandardCharacters()
local m_etym_data = require("Module:etymology languages/data")
return self._rawData.standardChars
local self_code = self._code
local children = {}
for code, data in pairs(m_etym_data) do
local _data = data
repeat
local parent = _data[5]
if parent == self_code then
if format == "object" then
insert(children, export.getByCode(code, nil, true, nil, self._useRequire))
elseif format == "code" then
insert(children, code)
elseif format == "name" then
insert(children, data[1])
end
break
end
_data = m_etym_data[parent]
until not _data
end
return children
end
 
function Language:getChildren()
if not self._childObjects then
self._childObjects = fetch_children(self, "object")
end
return self._childObjects
end
 
function Language:makeEntryNamegetChildrenCodes(text)
if not self._childCodes then
text = mw.ustring.gsub(text, "^[¿¡]", "")
self._childCodes = fetch_children(self, "code")
text = mw.ustring.gsub(text, "(.)[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]$", "%1")
if self:getCode() == "ar" then
local U = mw.ustring.char
local taTwiil = U(0x640)
local waSla = U(0x671)
-- diacritics ordinarily removed by entry_name replacements
local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670)
if text == waSla or mw.ustring.find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then
return text
end
end
return self._childCodes
end
if type(self._rawData.entry_name) == "table" then
 
for i, from in ipairs(self._rawData.entry_name.from) do
function Language:getChildrenNames()
local to = self._rawData.entry_name.to[i] or ""
if not self._childNames then
text = mw.ustring.gsub(text, from, to)
self._childNames = fetch_children(self, "name")
end
return self._childNames
end
 
function Language:hasChild(...)
local lang = ...
if not lang then
return false
elseif type(lang) == "string" then
lang = export.getByCode(lang, nil, true, nil, self._useRequire)
end
if lang:hasParent(self) then
return true
end
return self:hasChild(select(2, ...))
end
 
--[==[Returns the name of the main category of that language. Example: {{code|lua|"French language"}} for French, whose category is at [[:Category:French language]]. Unless optional argument <code>nocap</code> is given, the language name at the beginning of the returned value will be capitalized. This capitalization is correct for category names, but not if the language name is lowercase and the returned value of this function is used in the middle of a sentence.]==]
function Language:getCategoryName(nocap)
if not self._categoryName then
local name = self:getCanonicalName()
-- Only add " language" if a full language.
if #self._stack == 1 then
-- If the name already has "language" in it, don't add it.
if not name:match("[Ll]anguage$") then
name = name .. " language"
end
end
self._categoryName = name
end
if nocap then
return self._categoryName
--[=[ For instance, ᾰ (alpha-breve) + combining smooth breathing is converted
else
to alpha + combining smooth breathing by the entry_name replacements.
return mw.getContentLanguage():ucfirst(self._categoryName)
It must be re-combined to alpha-smooth breathing (ἀ) so that
allowSelfLink in [[Module:links]] will work properly. ]=]
if self:getCode() == "grc" then
text = mw.ustring.toNFC(text)
end
return text
end
 
--[==[Creates a link to the category; the link text is the canonical name.]==]
function Language:makeCategoryLink()
return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
end
 
function Language:getStandardCharacters(sc)
-- Add to data tables?
if type(self._rawData.standardChars) ~= "table" then
local has_dotted_undotted_i = {
return self._rawData.standardChars
["az"] = true,
else
["crh"] = true,
if sc and type(sc) ~= "string" then
["gag"] = true,
check_object("script", nil, sc)
["kaa"] = true,
sc = sc:getCode()
["tt"] = true,
end
["tr"] = true,
if (not sc) or sc == "None" then
["zza"] = true,
local scripts = {}
}
for _, script in pairs(self._rawData.standardChars) do
insert(scripts, script)
end
return concat(scripts)
end
if self._rawData.standardChars[sc] then
return self._rawData.standardChars[sc] .. (self._rawData.standardChars[1] or "")
end
end
end
 
--[==[Make the entry name (i.e. the correct page name).]==]
function Language:makeSortKey(name, sc)
function Language:makeEntryName(text, sc)
if has_dotted_undotted_i[self:getCode()] then
if (not text) or text == "" then
name = mw.ustring.gsub(name, "I", "ı")
return text, nil, {}
end
-- Set `unsupported` as true if certain conditions are met.
name = mw.ustring.lower(name)
local unsupported
-- Check if there's an unsupported character. \239\191\189 is the replacement character U+FFFD, which can't be typed directly here due to an abuse filter. Unix-style dot-slash notation is also unsupported, as it is used for relative paths in links, as are 3 or more consecutive tildes.
-- Remove initial hyphens and *
if (
local hyphens_regex = "^[-־ـ*]+(.)"
text:find("[#<>%[%]_{|}]") or
name = mw.ustring.gsub(name, hyphens_regex, "%1")
text:find("\239\191\189") or
text:find("%f[^%z/]%.%.?%f[%z/]") or
-- If there are language-specific rules to generate the key, use those
text:find("~~~")
if type(self._rawData.sort_key) == "table" then
) then
for i, from in ipairs(self._rawData.sort_key.from) do
unsupported = true
local to = self._rawData.sort_key.to[i] or ""
-- If it looks like an interwiki link.
name = mw.ustring.gsub(name, from, to)
elseif text:find(":") then
local prefix = text:gsub("^:*(.-):.*", string.ulower)
if (
self:loadData("Module:data/namespaces")[prefix] or
self:loadData("Module:data/interwikis")[prefix]
) then
unsupported = true
end
elseif type(self._rawData.sort_key) == "string" then
name = require("Module:" .. self._rawData.sort_key).makeSortKey(name, self:getCode(), sc and sc:getCode())
end
 
-- Check if the text is a listed unsupported title.
-- Remove parentheses, as long as they are either preceded or followed by something
local unsupportedTitles = self:loadData("Module:links/data").unsupported_titles
name = mw.ustring.gsub(name, "(.)[()]+", "%1")
if unsupportedTitles[text] then
name = mw.ustring.gsub(name, "[()]+(.)", "%1")
return "Unsupported titles/" .. unsupportedTitles[text], nil, {}
if has_dotted_undotted_i[self:getCode()] then
name = mw.ustring.gsub(name, "i", "İ")
end
 
sc = checkScript(text, self, sc)
return mw.ustring.upper(name)
 
local fail, cats
text = normalize(text, sc)
text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.entry_name, "makeEntryName")
 
text = umatch(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text
 
 
-- Escape unsupported characters so they can be used in titles. ` is used as a delimiter for this, so a raw use of it in an unsupported title is also escaped here to prevent interference; this is only done with unsupported titles, though, so inclusion won't in itself mean a title is treated as unsupported (which is why it's excluded from the earlier test).
if unsupported then
local unsupported_characters = self:loadData("Module:links/data").unsupported_characters
text = text
:gsub("[#<>%[%]_`{|}\239]\191?\189?", unsupported_characters)
:gsub("%f[^%z/]%.%.?%f[%z/]", function(m)
return m:gsub("%.", "`period`")
end)
:gsub("~~~+", function(m)
return m:gsub("~", "`tilde`")
end)
text = "Unsupported titles/" .. text
end
 
return text, fail, cats
end
 
--[==[Generates alternative forms using a specified method, and returns them as a table. If no method is specified, returns a table containing only the input term.]==]
function Language:generateForms(text, sc)
if self._rawData.generate_forms then
sc = checkScript(text, self, sc)
return require("Module:" .. self._rawData.generate_forms).generateForms(text, self._code, sc:getCode())
else
return {text}
end
end
 
--[==[Creates a sort key for the given entry name, following the rules appropriate for the language. This removes diacritical marks from the entry name if they are not considered significant for sorting, and may perform some other changes. Any initial hyphen is also removed, and anything parentheses is removed as well.
The <code>sort_key</code> setting for each language in the data modules defines the replacements made by this function, or it gives the name of the module that takes the entry name and returns a sortkey.]==]
function Language:makeSortKey(text, sc)
if (not text) or text == "" then
return text, nil, {}
end
if text:find("<[^<>]+>") then
track("track HTML tag")
end
-- Remove directional characters, soft hyphens, strip markers and HTML tags.
text = ugsub(text, "[\194\173\226\128\170-\226\128\174\226\129\166-\226\129\169]", "")
text = mw.text.unstrip(text)
:gsub("<[^<>]+>", "")
 
text = decode_uri(text, "PATH")
text = checkNoEntities(self, text)
 
-- Remove initial hyphens and * unless the term only consists of spacing + punctuation characters.
text = ugsub(text, "^([􀀀-􏿽]*)[-־ـ᠊*]+([􀀀-􏿽]*)(.*[^%s%p].*)", "%1%2%3")
 
sc = checkScript(text, self, sc)
 
text = normalize(text, sc)
text = removeCarets(text, sc)
 
-- For languages with dotted dotless i, ensure that "İ" is sorted as "i", and "I" is sorted as "ı".
if self:hasDottedDotlessI() then
text = text:gsub("I\204\135", "i") -- decomposed "İ"
:gsub("I", "ı")
text = sc:toFixedNFD(text)
end
-- Convert to lowercase, make the sortkey, then convert to uppercase. Where the language has dotted dotless i, it is usually not necessary to convert "i" to "İ" and "ı" to "I" first, because "I" will always be interpreted as conventional "I" (not dotless "İ") by any sorting algorithms, which will have been taken into account by the sortkey substitutions themselves. However, if no sortkey substitutions have been specified, then conversion is necessary so as to prevent "i" and "ı" both being sorted as "I".
-- An exception is made for scripts that (sometimes) sort by scraping page content, as that means they are sensitive to changes in capitalization (as it changes the target page).
local fail, cats
if not sc:sortByScraping() then
text = text:ulower()
end
 
text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.sort_key, "makeSortKey")
 
if not sc:sortByScraping() then
if self:hasDottedDotlessI() and not self._rawData.sort_key then
text = text:gsub("ı", "I")
:gsub("i", "İ")
text = sc:toFixedNFC(text)
end
text = text:uupper()
end
 
-- Remove parentheses, as long as they are either preceded or followed by something.
text = text
:gsub("(.)[()]+", "%1")
:gsub("[()]+(.)", "%1")
 
text = escape_risky_characters(text)
return text, fail, cats
end
 
줄 814 ⟶ 1,389:
end
 
--[==[Transliterates the text from the given script into the Latin script (see [[Wiktionary:Transliteration and romanization]]). The language must have the <code>translit</code> property for this to work; if it is not present, {{code|lua|nil}} is returned.
function Language:overrideManualTranslit()
Returns three values:
if self._rawData.override_translit then
# The transliteration.
return true
# A boolean which indicates whether the transliteration failed for an unexpected reason. If {{code|lua|false}}, then the transliteration either succeeded, or the module is returning nothing in a controlled way (e.g. the input was {{code|lua|"-"}}). Generally, this means that no maintenance action is required. If {{code|lua|true}}, then the transliteration is {{code|lua|nil}} because either the input or output was defective in some way (e.g. [[Module:ar-translit]] will not transliterate non-vocalised inputs, and this module will fail partially-completed transliterations in all languages). Note that this value can be manually set by the transliteration module, so make sure to cross-check to ensure it is accurate.
else
# A table of categories selected by the transliteration module, which should be in the format expected by {{code|lua|format_categories}} in [[Module:utilities]].
return false
The <code>sc</code> parameter is handled by the transliteration module, and how it is handled is specific to that module. Some transliteration modules may tolerate {{code|lua|nil}} as the script, others require it to be one of the possible scripts that the module can transliterate, and will show an error if it's not one of them. For this reason, the <code>sc</code> parameter should always be provided when writing non-language-specific code.
The <code>module_override</code> parameter is used to override the default module that is used to provide the transliteration. This is useful in cases where you need to demonstrate a particular module in use, but there is no default module yet, or you want to demonstrate an alternative version of a transliteration module before making it official. It should not be used in real modules or templates, only for testing. All uses of this parameter are tracked by [[Wiktionary:Tracking/module_override]].
'''Known bugs''':
* This function assumes {tr(s1) .. tr(s2) == tr(s1 .. s2)}. When this assertion fails, wikitext markups like <nowiki>'''</nowiki> can cause wrong transliterations.
* HTML entities like <code>&amp;apos;</code>, often used to escape wikitext markups, do not work.]==]
function Language:transliterate(text, sc, module_override)
-- If there is no text, or the language doesn't have transliteration data and there's no override, return nil.
if not (self._rawData.translit or module_override) then
return nil, false, {}
elseif (not text) or text == "" or text == "-" then
return text, false, {}
end
-- If the script is not transliteratable (and no override is given), return nil.
sc = checkScript(text, self, sc)
if not (sc:isTransliterated() or module_override) then
return nil, true, {}
end
end
 
-- Remove any strip markers.
text = mw.text.unstrip(text)
 
-- Get the display text with the keepCarets flag set.
function Language:transliterate(text, sc, module_override)
local fail, cats, subbedChars
if not ((module_override or self._rawData.translit_module) and text) then
text, fail, cats, subbedChars = processDisplayText(text, self, sc, true)
return nil
 
end
-- Transliterate (using the module override if applicable).
text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, true, self, sc, module_override or self._rawData.translit, "tr")
if module_overridenot text then
return nil, true, cats
require("Module:debug").track("module_override")
end
-- Incomplete transliterations return nil.
return require("Module:" .. (module_override or self._rawData.translit_module)).tr(text, self:getCode(), sc and sc:getCode() or nil)
local charset = sc.characters
if charset and umatch(text, "[" .. charset .. "]") then
-- Remove any characters in Latin, which includes Latin characters also included in other scripts (as these are false positives). Anything remaining should only be script code "None" (e.g. numerals).
local check_text = ugsub(text, "[" .. require("Module:scripts").getByCode("Latn").characters .. "]", "")
if require("Module:scripts").findBestScriptWithoutLang(check_text):getCode() ~= "None" then
return nil, true, cats
end
end
 
text = escape_risky_characters(text)
text = undoTempSubstitutions(text, subbedChars)
 
-- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret).
if text and not sc:hasCapitalization() and text:match("%^") then
text = processCarets(text, "%^([\128-\191\244]*%*?)([^\128-\191\244][\128-\191]*)", function(m1, m2)
return m1 .. m2:uupper()
end)
end
 
-- Track module overrides.
if module_override ~= nil then
track("module_override")
end
 
fail = text == nil and (not not fail) or false
 
return text, fail, cats
end
 
function Language:hasTranslitoverrideManualTranslit()
return not not self._rawData.translit_module and true or falseoverride_translit
end
 
--[==[Returns {{code|lua|true}} if the language has a transliteration module, or {{code|lua|false}} if it doesn't.]==]
function Language:hasTranslit()
return not not self._rawData.translit
end
 
function Language:link_tr()
return not not self._rawData.link_tr and true or false
end
 
--[==[Returns {{code|lua|true}} if the language uses the letters I/ı and İ/i, or {{code|lua|false}} if it doesn't.]==]
function Language:hasDottedDotlessI()
return not not self._rawData.dotted_dotless_i
end
 
function Language:toJSON(returnTable)
local entryNamePatterns = nil
local entryNameRemoveDiacritics = nil
 
if self._rawData.entry_name then
entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics
entryNamePatterns = {}
if self._rawData.entry_name.from then
entryNamePatterns = {}
for i, from in ipairs(self._rawData.entry_name.from) do
localfor toi, =from in ipairs(self._rawData.entry_name.to[i] orfrom) ""do
table. insert(entryNamePatterns, { from = from, to = self._rawData.entry_name.to[i] or ""})
end
end
end
-- mainCode should only end up non-nil if dontCanonicalizeAliases is passed to make_object().
local ret = {
local ret = m_table.deepcopy{
ancestors = self._rawData.ancestors,
ancestors = self:getAncestorCodes(),
canonicalName = self:getCanonicalName(),
categoryName = self:getCategoryName("nocap"),
code = self._code,
mainCode = self._main_code,
entryNamePatterns = entryNamePatterns,
entryNameRemoveDiacritics = entryNameRemoveDiacritics,
family = self._rawData.family,
otherNamesfamily = self:getOtherNamesgetFamilyCode(),
scriptsotherNames = self._rawData.scripts:getOtherNames(true),
typealiases = self:getTypegetAliases(),
varieties = self:getVarieties(),
wikimediaLanguages = self._rawData.wikimedia_codes,
scripts = self:getScriptCodes(),
parent = self._parentCode or nil,
full = self._fullCode or nil,
type = m_table.keysToList(self:getTypes()),
wikimediaLanguages = self:getWikimediaLanguageCodes(),
wikidataItem = self:getWikidataItem(),
}
 
if returnTable then
return require("Module:JSON").toJSON(ret)
return ret
else
return require("Module:JSON").toJSON(ret)
end
end
 
--[==[
 
<span style="color: #BA0000">This function is not for use in entries or other content pages.</span>
-- Do NOT use this method!
Returns a blob of data about the language. The format of this blob is undocumented, and perhaps unstable; it's intended for things like the module's own unit-tests, which are "close friends" with the module and will be kept up-to-date as the format changes.
-- Do NOT use these methods!
-- All uses should be pre-approved on the talk page!
]==]
function Language:getRawData()
local rawData = {}
return self._rawData
for _, element in ipairs(self._stack) do
for k, v in pairs(element) do
rawData[k] = v
end
end
return rawData
end
 
--[==[<span style="color: #BA0000">This function is not for use in entries or other content pages.</span>
Language.__index = Language
Returns a blob of data about the language that contains the "extra data". Much like with getRawData, the format of this blob is undocumented, and perhaps unstable; it's intended for things like the module's own unit-tests, which are "close friends" with the module and will be kept up-to-date as the format changes.]==]
function Language:getRawExtraData()
if #self._stack == 1 then
self:loadInExtraData()
end
return self._extraData
end
 
local function getRawExtraLanguageData(self, code)
local modulename = export.getExtraDataModuleName(code)
return modulename and self:loadData("Module:" .. modulename)[code] or nil
end
 
function Language:loadInExtraData()
if not self._extraData then
-- load extra data from module and assign to _extraData field
-- use empty table as a fallback if extra data is nil
self._extraData = getRawExtraLanguageData(self, self._code) or {}
end
end
 
function export.getDataModuleName(code)
if code:findmatch("^[a-z][a-z]%l%l$") then
return "languages/data2data/2"
elseif code:findmatch("^[a-z][a-z][a-z]%l%l%l$") then
local prefix = code:sub(1, 1)
return "languages/data3data/3/" .. prefix
elseif code:findmatch("^[a-z%l-]+$") then
return "languages/dataxdata/exceptional"
else
return nil
줄 896 ⟶ 1,567:
end
 
function export.getExtraDataModuleName(code)
 
local functiondataModule getRawLanguageData= export.getDataModuleName(code)
return dataModule and dataModule .. "/extra" or nil
local modulename = export.getDataModuleName(code)
return modulename and mw.loadData("Module:" .. modulename)[code] or nil
end
 
do
local key_types = {
[2] = "unique",
aliases = "unique",
otherNames = "unique",
type = "append",
varieties = "unique"
}
 
local function export.makeObjectmake_stack(code, input_code, data, parent, useRequire)
parent.__index = parent
if data and data.deprecated then
 
require("Module:debug").track {
local lang = {
"languages/deprecated",
_code = input_code,
"languages/deprecated/" .. code
_useRequire = useRequire or nil
}
-- This can only happen if dontCanonicalizeAliases is passed to make_object().
if code ~= input_code then
lang._main_code = code
end
 
-- Full language.
if not parent._stack then
-- Create stack, accessed with rawData metamethod.
local stack = parent._rawData and {parent._rawData, data} or {data}
lang._stack = stack
lang._rawData = setmetatable({}, {
__index = function(t, k)
local key_type = key_types[k]
-- Data that isn't inherited from the parent.
if key_type == "unique" then
return stack[#stack][k]
-- Data that is appended by each generation.
elseif key_type == "append" then
local parts = {}
for i = 1, #stack do
insert(parts, stack[i][k])
end
if type(parts[1]) == "string" then
return concat(parts, ","), true
end
-- Otherwise, iterate down the stack, looking for a match.
else
local i = #stack
while not stack[i][k] and i > 1 do
i = i - 1
end
return stack[i][k]
end
end,
-- Retain immutability (as writing to rawData will break functionality).
__newindex = function()
error("not allowed to edit rawData")
end
})
-- Full code is the parent code.
lang._fullCode = parent._code or code
-- Etymology-only.
else
-- Copy over rawData and stack to the new object, and add new layer to stack.
lang._rawData = parent._rawData
lang._stack = parent._stack
insert(lang._stack, data)
-- Copy full code.
lang._fullCode = parent._fullCode
end
 
return setmetatable(lang, parent)
end
return data and setmetatable({ _rawData = data, _code = code }, Language) or nil
end
 
function export.makeObject(code, data, useRequire, dontCanonicalizeAliases)
if not data then
return nil
end
 
-- Convert any aliases.
function export.getByCode(code)
local input_code = code
if type(code) ~= "string" then
code = normalize_code(code)
error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".")
input_code = dontCanonicalizeAliases and input_code or code
 
if data.type:find("family") and not data[5] then
return require("Module:families").makeObject(code, data, useRequire)
else
local parent
if data[5] then
parent = export.getByCode(data[5], nil, true, true, useRequire)
else
parent = Language
end
return make_stack(code, input_code, data, parent, useRequire)
end
end
return export.makeObject(code, getRawLanguageData(code))
end
 
--[==[Finds the language whose code matches the one provided. If it exists, it returns a <code class="nf">Language</code> object representing the language. Otherwise, it returns {{code|lua|nil}}, unless <code class="n">paramForError</code> is given, in which case an error is generated. If <code class="n">paramForError</code> is {{code|lua|true}}, a generic error message mentioning the bad code is generated; otherwise <code class="n">paramForError</code> should be a string or number specifying the parameter that the code came from, and this parameter will be mentioned in the error message along with the bad code. If <code class="n">allowEtymLang</code> is specified, etymology-only language codes are allowed and looked up along with normal language codes. If <code class="n">allowFamily</code> is specified, language family codes are allowed and looked up along with normal language codes.]==]
function export.getByCode(code, paramForError, allowEtymLang, allowFamily, useRequire)
if type(code) ~= "string" then
local typ
if not code then
typ = "nil"
elseif check_object("language", true, code) then
typ = "a language object"
elseif check_object("family", true, code) then
typ = "a family object"
else
typ = "a " .. type(code)
end
error("The function getByCode expects a string as its first argument, but received " .. typ .. ".")
end
 
local function export.getByNameconditionalRequire(namemodulename)
if useRequire then
local byName = mw.loadData("Module:languages/by name")
return require(modulename)
local code = byName.all and byName.all[name] or byName[name]
else
return load_data(modulename)
if not code then
end
return nil
end
 
-- FIXME: Temporary. Lists bad codes to track, so we can consider eliminating them.
return export.makeObject(code, getRawLanguageData(code))
-- We list them directly here rather than in a separate module (cf. [[Module:etymology languages/track-bad-etym-code]])
-- in the hope that this reduces memory usage as we have to do this for every invocation of getByCode() for every
-- language code.
local codes_to_track = {
-- Codes duplicated been full and etymology-only languages
["bsg"] = true,
["rdb"] = true,
["tgf"] = true,
-- Aliases actively being deprecated
["prv"] = true, -- oc-pro
-- Codes that will be converted to families
["nan"] = true,
-- Codes being renamed
["cmn-wadegile"] = true,
["wuu-ngb"] = true,
["wuu-hzh"] = true,
["wuu-szh"] = true,
}
 
local function track_bad_code(code)
if codes_to_track[code] then
track(code)
end
return true
end
 
local modulename = export.getDataModuleName(code)
 
local function get_data(code)
return modulename and
track_bad_code(code) and conditionalRequire("Module:" .. modulename)[code] or
(allowEtymLang and require("Module:etymology languages/track-bad-etym-code")(code) and conditionalRequire("Module:etymology languages/data")[code]) or
(allowFamily and conditionalRequire("Module:families/data")[code]) or
(allowEtymLang and allowFamily and require("Module:families/track-bad-etym-code")(code) and conditionalRequire("Module:families/data/etymology")[code])
end
 
local data = get_data(code) or get_data(normalize_code(code))
 
local retval = code and data and export.makeObject(code, data, useRequire)
 
if not retval and paramForError then
require("Module:languages/errorGetBy").code(code, paramForError, allowEtymLang, allowFamily)
end
 
return retval
end
 
--[==[Finds the language whose canonical name (the name used to represent that language on Wiktionary) or other name matches the one provided. If it exists, it returns a <code class="nf">Language</code> object representing the language. Otherwise, it returns {{code|lua|nil}}, unless <code class="n">paramForError</code> is given, in which case an error is generated. If <code class="n">allowEtymLang</code> is specified, etymology-only language codes are allowed and looked up along with normal language codes. If <code class="n">allowFamily</code> is specified, language family codes are allowed and looked up along with normal language codes.
The canonical name of languages should always be unique (it is an error for two languages on Wiktionary to share the same canonical name), so this is guaranteed to give at most one result.
This function is powered by [[Module:languages/canonical names]], which contains a pre-generated mapping of full-language canonical names to codes. It is generated by going through the [[:Category:Language data modules]] for full languages. When <code class="n">allowEtymLang</code> is specified for the above function, [[Module:etymology languages/canonical names]] may also be used, and when <code class="n">allowFamily</code> is specified for the above function, [[Module:families/canonical names]] may also be used.]==]
function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily, useRequire)
local function conditionalRequire(modulename)
if useRequire then
return require(modulename)
else
return load_data(modulename)
end
end
 
local byName = conditionalRequire("Module:languages/canonical names")
function export.getByCanonicalName(name)
local byName = mw.loadData("Module:languages/canonical names")
local code = byName and byName[name]
 
if not code and allowEtymLang then
byName = conditionalRequire("Module:etymology languages/canonical names")
return nil
code = byName and byName[name] or
byName[name:gsub(" [Ss]ubstrate$", "")] or
byName[name:gsub("^a ", "")] or
byName[name:gsub("^a ", ""):gsub(" [Ss]ubstrate$", "")] or
-- For etymology families like "ira-pro".
-- FIXME: This is not ideal, as it allows " languages" to be appended to any etymology-only language, too.
byName[name:match("^(.*) languages$")]
end
 
if not code and allowFamily then
return export.makeObject(code, getRawLanguageData(code))
byName = conditionalRequire("Module:families/canonical names")
code = byName and byName[name] or
byName[name:match("^(.*) languages$")]
end
 
local retval = code and export.getByCode(code, errorIfInvalid, allowEtymLang, allowFamily, useRequire)
 
if not retval and errorIfInvalid then
require("Module:languages/errorGetBy").canonicalName(name, allowEtymLang, allowFamily)
end
 
return retval
end
 
--[==[Used by [[Module:languages/data/2]] (et al.) to add default types to the entities returned.]==]
function export.addDefaultTypes(data, regular, ...)
local n = arg.n
local types = n > 0 and concat(arg, ",") or ""
for _, entity in next, data do
-- "regular" encompasses everything that doesn't have another type already assigned.
if regular then
entity.type = entity.type or "regular"
end
if n > 0 then
entity.type = types .. (entity.type and ("," .. entity.type) or "")
end
end
return data
end
 
--[==[Used by [[Module:languages/data/2]] (et al.) and [[Module:etymology languages/data]] to finalize language-related data into the format that is actually returned.]==]
function export.iterateAll()
function export.finalizeLanguageData(data)
mw.incrementExpensiveFunctionCount()
-- 4 is scripts.
local m_data = mw.loadData("Module:languages/alldata")
local funcfields = {4, t"ancestors", var ="type", pairs(m_data)"wikimedia_codes"}
local fields_len = #fields
for _, entity in next, data do
return function()
for i = 1, fields_len do
local code, data = func(t, var)
local key = fields[i]
return export.makeObject(code, data)
local field = entity[key]
if field then
entity[key] = gsub(field, "%s+", "")
end
end
end
return data
end
 
--[==[Used by [[Module:etymology languages/data]] and [[Module:families/data/etymology]] to finalize etymology-related data into the format that is actually returned.]==]
function export.finalizeEtymologyData(data)
local aliases = {}
for _, entity in next, data do
-- Move parent to 5 and family to 3.
entity[5] = entity[3]
entity[3] = entity.family
entity.family = nil
end
for code, alias in next, aliases do
data[code] = alias
end
return data
end