Jump to content

Module:Wikt-lang/data

Permanently protected module
From Wikipedia, the free encyclopedia

localU=mw.ustring.char-- Diacritics, from the [[Combining Diacritical Marks]] block.localgrave=U(0x300)localacute=U(0x301)localcircumflex=U(0x302)localtilde=U(0x303)localmacron=U(0x304)localbreve=U(0x306)localdot=U(0x307)localdiaeresis=U(0x308)localdouble_acute=U(0x30B)localcaron=U(0x30C)localdouble_grave=U(0x30F)localinvbreve=U(0x311)localdot_below=U(0x323)localundertie=U(0x35C)--[[ This is a table of Wiktionary language codes with data belonging to them. Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code.]]localdata={["languages"]={["aaq"]={["name"]="Penobscot",},["ab"]={["name"]="Abkhaz",},["abe"]={["name"]="Abenaki",},["ang"]={["name"]="Old English",["article"]={"Old English"},-- Remove macrons, acutes, and overdots["replacements"]={decompose=true,from={"["..macron..acute..dot.."]"},},},["ar"]={["name"]="Arabic",["article"]="Arabic language",["direction"]="rtl",-- Should be in the script data module.["replacements"]={-- ālif with wasla is replaced by ālif;[U(0x0671)]=U(0x0627),-- taṭwīl, fatḥatan, ḍammatan, kasratan,-- fatḥa, ḍamma, kasra,-- shadda, sukūn, and superscript (dagger) ālif are removed.["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)..U(0x064E)..U(0x064F)..U(0x0650)..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",},},["ara"]={["name"]="Arabic",["article"]="Arabic language",["direction"]="rtl",-- Should be in the script data module.["replacements"]={-- ālif with wasla is replaced by ālif;[U(0x0671)]=U(0x0627),-- taṭwīl, fatḥatan, ḍammatan, kasratan,-- fatḥa, ḍamma, kasra,-- shadda, sukūn, and superscript (dagger) ālif are removed.["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)..U(0x064E)..U(0x064F)..U(0x0650)..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",},},["arb"]={["name"]="Modern Standard Arabic",["article"]="Modern Standard Arabic",["direction"]="rtl",-- Should be in the script data module.["replacements"]={-- ālif with wasla is replaced by ālif;[U(0x0671)]=U(0x0627),-- taṭwīl, fatḥatan, ḍammatan, kasratan,-- fatḥa, ḍamma, kasra,-- shadda, sukūn, and superscript (dagger) ālif are removed.["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)..U(0x064E)..U(0x064F)..U(0x0650)..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",},},["apc"]={["name"]="North Levantine Arabic",["article"]="North Levantine Arabic",["direction"]="rtl",-- Should be in the script data module.["replacements"]={-- ālif with wasla is replaced by ālif;[U(0x0671)]=U(0x0627),-- taṭwīl, fatḥatan, ḍammatan, kasratan,-- fatḥa, ḍamma, kasra,-- shadda, sukūn, and superscript (dagger) ālif are removed.["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)..U(0x064E)..U(0x064F)..U(0x0650)..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",},},["ajp"]={["name"]="South Levantine Arabic",["article"]="South Levantine Arabic",["direction"]="rtl",-- Should be in the script data module.["replacements"]={-- ālif with wasla is replaced by ālif;[U(0x0671)]=U(0x0627),-- taṭwīl, fatḥatan, ḍammatan, kasratan,-- fatḥa, ḍamma, kasra,-- shadda, sukūn, and superscript (dagger) ālif are removed.["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)..U(0x064E)..U(0x064F)..U(0x0650)..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",},},["arz"]={["name"]="Egyptian Arabic",["article"]="Egyptian Arabic",["direction"]="rtl",-- Should be in the script data module.["replacements"]={-- ālif with wasla is replaced by ālif;[U(0x0671)]=U(0x0627),-- taṭwīl, fatḥatan, ḍammatan, kasratan,-- fatḥa, ḍamma, kasra,-- shadda, sukūn, and superscript (dagger) ālif are removed.["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)..U(0x064E)..U(0x064F)..U(0x0650)..U(0x0651)..U(0x0652)..U(0x0670).."]"]="",},},["av"]={["name"]="Avar"},["be"]={["article"]="Belarusian language",["replacements"]={[acute]="",},},["bn"]={["name"]="Bengali",["article"]="Bengali language",},["bua"]={["name"]="Buryat",},["cel-pro"]={-- Incorrect tag["name"]="Proto-Celtic",["Wikipedia_code"]="cel-x-proto",},["cel-x-proto"]={["name"]="Proto-Celtic",},["cel-bry-pro"]={-- Incorrect tag["name"]="Proto-Brythonic",["article"]="Common Brittonic",["type"]="reconstructed",},["com"]={["name"]="Comanche",["article"]="Comanche language",},["cu"]={["name"]="Old Church Slavonic",["article"]="Old Church Slavonic",},["de"]={["name"]="German",["article"]="German language",},["en"]={["name"]="English",["article"]="English language",},["es"]={["name"]="Spanish",["article"]="Spanish language",},["egy"]={["name"]="Egyptian",},["evn"]={["name"]="Evenki",["article"]="Evenki language",},["fr"]={["name"]="French",["article"]="French language",},["frm"]={["name"]="Middle French",["article"]="Middle French",},["frp"]={["name"]="Franco-Provençal",},["ff"]={["name"]="Fula",},["gem-pro"]={-- Incorrect tag["name"]="Proto-Germanic",["article"]="Proto-Germanic language",["type"]="reconstructed",["replacements"]={},["Wikipedia_code"]="gem-x-proto",},["gem-x-proto"]={["name"]="Proto-Germanic",["article"]="Proto-Germanic language",["type"]="reconstructed",["replacements"]={},},["gml"]={["name"]="Middle Low German",},["gmw-ecg"]={["name"]="East Central German",},["gmw-x-proto"]={["name"]="Proto-West Germanic",["article"]="Proto-West Germanic language",["type"]="reconstructed",["replacements"]={},},["gmq-x-gut"]={["name"]="Gutnish",["article"]="Gutnish",},["goh"]={["replacements"]={decompose=true,from={"["..macron..circumflex..diaeresis.."]",},},},["got"]={["name"]="Gothic",["article"]="Gothic language",["replacements"]={-- Latin to Gothic since people will not want to have to copy-- and paste Gothic letters in["[AÁaáĀā]"]="𐌰",["[Bb]"]="𐌱",["[Gg]"]="𐌲",["[Dd]"]="𐌳",["[EeĒē]"]="𐌴",["[Qq]"]="𐌵",["[Zz]"]="𐌶",["[Hh]"]="𐌷",["[Þþ]"]="𐌸",["[IiÍí]"]="𐌹",["[Kk]"]="𐌺",["[Ll]"]="𐌻",["[Mm]"]="𐌼",["[Nn]"]="𐌽",["[Jj]"]="𐌾",["[UuÚúŪū]"]="𐌿",["[Pp]"]="𐍀",["[Rr]"]="𐍂",["[Ss]"]="𐍃",["[Tt]"]="𐍄",["[WwYy]"]="𐍅",["[Ff]"]="𐍆",["[Xx]"]="𐍇",["[Ƕƕ]"]="𐍈",-- Not sure if "hw" and "hv" can safely be converted["[OoŌō]"]="𐍉",},},["gsw"]={["name"]="Alemannic German",},["grc"]={["name"]="Ancient Greek",["article"]="Ancient Greek",["replacements"]={decompose=true,from={-- Replace variant letterforms with standard ones."ϐ","ϵ","ϑ","ϰ","ϱ","ϲ","ϕ",-- Remove macrons and breves."["..macron..breve..undertie.."]"},to={"β","ε","θ","κ","ρ","σ","φ",}},},["grk-pro"]={-- Incorrect tag["name"]="Proto-Hellenic",["Wikipedia_name"]="Proto-Greek",["article"]="Proto-Greek language",["type"]="reconstructed",["replacements"]={},["Wikipedia_code"]="grk-x-proto",},["grk-x-proto"]={["name"]="Proto-Hellenic",["Wikipedia_name"]="Proto-Greek",["article"]="Proto-Greek language",["type"]="reconstructed",["replacements"]={},},["grt"]={["name"]="Garo",},["ha"]={["name"]="Hausa",-- remove tilde, grave, acute, macron, circumflex["replacements"]={decompose=true,from={"["..grave..circumflex..macron..acute..tilde.."]"},},},["hi"]={["name"]="Hindi",["article"]="Hindi",},["ilo"]={["name"]="Ilocano",["article"]="Ilocano language",},["ine-bsl-pro"]={["name"]="Proto-Balto-Slavic",["article"]="Proto-Balto-Slavic language",["type"]="reconstructed",},["ine-pro"]={-- Incorrect tag["name"]="Proto-Indo-European",["article"]="Proto-Indo-European language",["type"]="reconstructed",["replacements"]={},["Wikipedia_code"]="ine-x-proto",},["ine-x-proto"]={["name"]="Proto-Indo-European",["article"]="Proto-Indo-European language",["type"]="reconstructed",["replacements"]={},},["ja"]={["name"]="Japanese",["article"]="Japanese language",},["jbo"]={-- Lojban["type"]="appendix",},["ket"]={["name"]="Ket",["article"]="Ket language",},["ksk"]={["name"]="Kansa",["article"]="Kansa language",},["la"]={["name"]="Latin",["article"]="Latin",["replacements"]={decompose=true,from={"["..macron..breve..diaeresis.."]"},},},["lt"]={["name"]="Lithuanian",-- remove acute, tilde, grave["replacements"]={decompose=true,from={"["..acute..tilde..grave.."]"},},},["mkh-mvi"]={["name"]="Middle Vietnamese",},["moe"]={["name"]="Cree",},["mul"]={["name"]="Translingual",["article"]="",},["nci"]={["name"]="Classical Nahuatl",["article"]="Classical Nahuatl",-- Remove macrons, acutes, circumflexes and graves["replacements"]={decompose=true,-- Remove macrons, acutes, circumflexes, graves, and saltillo;-- see [[Saltillo (linguistics)]].from={"["..grave..acute..macron..circumflex.."Ꞌꞌʻʼ'ʔ]"},},},["nds-de"]={["name"]="German Low German",},["non"]={["name"]="Old Norse",},["non-x-proto"]={["name"]="Proto-Norse",},["odt"]={["name"]="Old Dutch",},["oge"]={["name"]="Old Georgian",},["oj"]={["name"]="Ojibwe",},["orv"]={["name"]="Old East Slavic",["article"]="Old East Slavic",["replacements"]={[U(0x484)]="",},},["osx"]={["name"]="Old Saxon",},["pt"]={["name"]="Portuguese",["article"]="Portuguese language",-- ["scripts"] = { "Latn" },},["pa"]={["name"]="Punjabi",["article"]="Punjabi language",},["pgl"]={["name"]="Primitive Irish",["article"]="Primitive Irish",},["pis"]={["name"]="Pijin",["article"]="Pijin language",},["poz-x-poly-proto"]={["name"]="Proto-Nuclear Polynesian",["article"]="Proto-Polynesian language",["type"]="reconstructed",},["rap"]={["name"]="Rapa Nui",["article"]="Rapa Nui language",},["ru"]={["name"]="Russian",["article"]="Russian language",["replacements"]={[acute]="",},},["rw"]={["name"]="Rwanda-Rundi",},["se"]={["replacements"]={["([đflmnŋrsšŧv])'%1"]="%1%1",},},["sem-pro"]={["name"]="Proto-Semitic",["article"]="Proto-Semitic",["type"]="reconstructed",},["sh"]={["article"]="Serbo-Croatian language",["replacements"]={decompose=true,from={"([AaEeIiOoUuRrАаЕеИиОоУуРр])["..double_grave..grave..invbreve..acute..macron..tilde.."]"},to={"%1"},},},["sl"]={["name"]="Slovene",["replacements"]={decompose=true,-- remove tonal orthographyfrom={"ł","["..grave..acute..macron..double_grave..invbreve..circumflex..dot_below.."]"},to={"l"},},},["sla-pro"]={["name"]="Proto-Slavic",-- also Common Slavic["type"]="reconstructed",["replacements"]={["[ÀÁÃĀȀȂ]"]="A",["[àáãāȁȃ]"]="a",["[ÈÉẼĒȄȆ]"]="E",["[èéẽēȅȇ]"]="e",["[ÌÍĨĪȈȊ]"]="I",["[ìíĩīȉȋ]"]="i",["[ÒÓÕŌȌȎŐ]"]="O",["[òóõōȍȏő]"]="o",["[ÙÚŨŪȔȖŰ]"]="U",["[ùúũūȕȗű]"]="u",["[ỲÝỸȲ]"]="Y",["[ỳýỹȳ]"]="y",["Ǭ"]="Ǫ",["ǭ"]="ǫ",["["..grave..acute..double_acute..tilde..macron..double_grave..invbreve.."]"]="",["ĭ"]="ь",["ŭ"]="ъ",},},["tts"]={["name"]="Isan",-- also "Northeastern Thai"["article"]="Isan language",},["tzo"]={["name"]="Tzotzil",["article"]="Tzotzil language",},["ug"]={["name"]="Uyghur",--also less commonly "Uighur"["article"]="Uyghur language",},["uk"]={["article"]="Ukrainian language",["replacements"]={[acute]="",}},["ur"]={["name"]="Urdu",["article"]="Urdu",},["xcl"]={["name"]="Old Armenian",["article"]="Classical Armenian",["replacements"]={["[՞՜՛՟]"]="",["և"]="եւ",},},["xgf"]={["name"]="Tongva",-- not ISO name "Gabrielino-Fernandeño"["article"]="Tongva language",["replacements"]={["['`ʔ]"]="ʼ",},},["xlu"]={["name"]="Luwian",-- not ISO name "Cuneiform Luwian"["article"]="Cuneiform Luwian"},["xpq"]={["name"]="Mohegan-Pequot",},["xxt"]={["name"]="Tambora",["article"]="Tambora language",},["xvn"]={["name"]="Vandalic",["article"]="Vandalic language",},["yua"]={["name"]="Yucatec Maya",["article"]="Yucatec Maya language",},["zh"]={["name"]="Chinese",["article"]="Chinese language",-- ["scripts"] = { "Hani" },},["zle-ort"]={["name"]="Old Ruthenian",["article"]="Old Ruthenian",["replacements"]={[acute]="",},},},-- Here, keys (for example, "gem") are Wikipedia language codes used in-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary-- code.-- Subtags are not currently supported.["redirects"]={["aae"]="sq",["aiq"]="fa",["aln"]="sq",["als"]="sq",["azb"]="az",["azj"]="az",["bgn"]="bal",["bs"]="sh",["bxr"]="bua",["ciw"]="oj",["cnr"]="sh",["fil"]="tl",["fuf"]="ff",["gem"]="gem-pro",-- Not correct, but is commonly used.["hak"]="zh",["hbo"]="he",["hr"]="sh",["ine"]="ine-pro",-- Not correct, but might be commonly used.["kjv"]="sh",["nan"]="zh",["prs"]="fa",["rn"]="rw",["sli"]="gmw-ecg",["sr"]="sh",["src"]="sc",["sro"]="sc",["tw"]="ak",["wae"]="gsw",["wep"]="nds-de",["yue"]="zh",["xno"]="fro",},}returndata
close