Mòideal:scripts/data
Related pages |
---|
(deasbaireachd⧼tpt-languages-separator⧽ ⧼tpt-languages-separator⧽eachdraidh⧼tpt-languages-separator⧽ceanglaichean⧼tpt-languages-separator⧽doc⧼tpt-languages-separator⧽bogsa-gainmhich⧼tpt-languages-separator⧽cùisean deuchainn)
This module is currently protected from editing. See the protection policy and protection log for more details. Please discuss any changes on the talk page; you may submit an edit request to ask an administrator to make an edit if it is uncontroversial or supported by consensus. You may also request that this page be unprotected. |
This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
This page is not a sandbox. It should not be used for test editing. To experiment, please use the Wikipedia sandbox, your user sandbox, or the other sandboxes. |
This module contains definitions and metadata for all script codes on Wiktionary. See Wiktionary:Scripts for more information.
This module must not be imported using require
. Instead, it is imported like this:
local m_scripts = mw.loadData("Mòideal:scripts")
This ensures that the data is only loaded once per page, rather than once for every module invocation like normal.
To access this data from templates, use Mòideal:script utilities.
Required values
Every entry in the table must contain the following properties:
canonicalName
- The "canonical" name of the script. This is the name that is used in Wiktionary entries and category names.
Optional values
otherNames
- A table of all the names that this script is known by, other than the canonical name.
characters
- A Lua character class that matches on any character that belongs to this script. The character classes are the same as those in Lua search/replace patterns, but without the surrounding
[ ]
(these are implicitly added). They also resemble those found in regular expressions. - In its simplest form, the character class can just be a list of all the characters in the script (Example:
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
). But it's easier to describe using character ranges, especially when the script contains many dozens or even hundreds of different characters. Character ranges are given with only the first and last character, and separated by a hyphen-
. The set then implicitly includes all characters whose Unicode codepoints are in between the two given characters (Example:"A-Za-z"
).
-- When adding new scripts to this file, please don't forget to add
-- style definitons for the script in [[MediaWiki:Common.css]].
local u = mw.ustring.char
local m = {}
m["Afak"] = {
canonicalName = "Afaka",
}
m["Aghb"] = {
canonicalName = "Caucasian Albanian",
characters = "𐔰-𐕣𐕯",
}
m["Arab"] = {
canonicalName = "Arabic",
otherNames = {"Jawi"},
characters = "-ۿݐ-ݿﭐ-﷽ﹰ-ﻼ",
}
m["fa-Arab"] = {
canonicalName = "Arabic",
otherNames = {"Perso-Arabic"},
characters = m["Arab"].characters,
}
m["kk-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["ks-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["ku-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["mzn-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["ota-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["pa-Arab"] = {
canonicalName = "Arabic",
otherNames = {"Shahmukhi"},
characters = m["Arab"].characters,
}
m["ps-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["sd-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["tt-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["ug-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["ur-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
}
m["Armi"] = {
canonicalName = "Imperial Aramaic",
characters = "𐡀-𐡟",
}
m["Armn"] = {
canonicalName = "Armenian",
characters = "Ա-֏ﬓ-ﬗ",
}
m["Avst"] = {
canonicalName = "Avestan",
characters = "𐬀-𐬿",
}
m["Bali"] = {
canonicalName = "Balinese",
characters = "ᬀ-᭼",
}
m["Bamu"] = {
canonicalName = "Bamum",
characters = "ꚠ-꛷𖠀-𖨸",
}
m["Bass"] = {
canonicalName = "Bassa",
otherNames = {"Bassa Vah", "Vah"},
characters = "𖫐-𖫵",
}
m["Batk"] = {
canonicalName = "Batak",
characters = "ᯀ-᯿",
}
m["Beng"] = {
canonicalName = "Bengali",
characters = "ঁ-৺",
}
m["Bopo"] = {
canonicalName = "Zhuyin",
otherNames = {"Zhuyin Fuhao", "Bopomofo"},
characters = "ㄅ-ㄭㆠ-ㆺ",
}
m["Brah"] = {
canonicalName = "Brahmi",
characters = "𑀀-𑁯",
}
m["Brai"] = {
canonicalName = "Braille",
characters = "⠀-⣿",
}
m["Bugi"] = {
canonicalName = "Buginese",
characters = "ᨀ-᨟",
}
m["Buhd"] = {
canonicalName = "Buhid",
characters = "ᝀ-ᝓ",
}
m["Cakm"] = {
canonicalName = "Chakma",
characters = "𑄀-𑅃",
}
m["Cans"] = {
canonicalName = "Canadian syllabics",
characters = "᐀-ᙿ",
}
m["Cari"] = {
canonicalName = "Carian",
characters = "𐊠-𐋐",
}
m["Cham"] = {
canonicalName = "Cham",
characters = "ꨀ-꩟"
}
m["Cher"] = {
canonicalName = "Cherokee",
characters = "Ꭰ-Ᏼ",
}
m["Copt"] = {
canonicalName = "Coptic",
characters = "Ⲁ-⳿", -- This is the separate "Coptic" block, not the unified "Greek and Coptic"
}
m["Cprt"] = {
canonicalName = "Cypriot",
characters = "𐠀-𐠿",
}
m["Cyrl"] = {
canonicalName = "Cyrillic",
characters = "Ѐ-џҊ-ԧꚀ-ꚗ",
}
m["Cyrs"] = {
canonicalName = "Old Cyrillic",
characters = "Ѐ-ԧꙀ-ꚗ",
}
m["Deva"] = {
canonicalName = "Devanagari",
characters = "ऀ-ॿ꣠-ꣻ",
}
m["Dsrt"] = {
canonicalName = "Deseret",
characters = "𐐀-𐑏",
}
m["Dupl"] = {
canonicalName = "Duployan",
characters = "𛰀-𛲟",
}
m["Egyd"] = {
canonicalName = "Demotic",
}
m["Egyp"] = {
canonicalName = "Egyptian hieroglyphic",
characters = "𓀀-𓐮",
}
m["Elba"] = {
canonicalName = "Elbasan",
characters = "𐔀-𐔧",
}
m["Ethi"] = {
canonicalName = "Ethiopic",
otherNames = {"Ge'ez"},
characters = "ሀ-᎙ⶀ-ⷞꬁ-ꬮ",
}
m["Geok"] = {
canonicalName = "Nuskhuri",
otherNames = {"Khutsuri", "Asomtavruli"},
characters = "Ⴀ-Ⴭⴀ-ⴭ", -- Ⴀ-Ⴭ is Asomtavruli, ⴀ-ⴭ is Nuskhuri
}
m["Geor"] = {
canonicalName = "Georgian",
otherNames = {"Mkhedruli"},
characters = "Ⴀ-ჼ", -- technically only the range [ა-ჿ] is Mkhedruli
}
m["Glag"] = {
canonicalName = "Glagolitic",
characters = "Ⰰ-ⱞ",
}
m["Goth"] = {
canonicalName = "Gothic",
characters = "𐌰-𐍊",
}
m["Gran"] = {
canonicalName = "Grantha",
characters = "𑌁-𑍴",
}
m["Grek"] = {
canonicalName = "Greek",
characters = "Ͱ-Ͽ",
}
m["polytonic"] = {
canonicalName = "Greek",
characters = "ἀ-῾" .. m["Grek"].characters,
}
m["Gujr"] = {
canonicalName = "Gujarati",
characters = "ઁ-૱",
}
m["Guru"] = {
canonicalName = "Gurmukhi",
characters = "ਁ-ੵ",
}
m["Hang"] = {
canonicalName = "Hangul",
characters = "ᄀ-ᇿ가-힣",
}
m["Hani"] = {
canonicalName = "Han",
otherNames = {"Hanzi", "Chu Nom"},
characters = "一-鿌㐀-䶵 -〿𠀀-𫠝!-○",
}
m["Hans"] = {
canonicalName = "Simplified Han",
characters = m["Hani"].characters,
}
m["Hant"] = {
canonicalName = "Traditional Han",
characters = m["Hani"].characters,
}
m["Hira"] = {
canonicalName = "Hiragana",
characters = "ぁ-ゟ",
}
m["Kana"] = {
canonicalName = "Katakana",
characters = "゠-ヿㇰ-ㇿ",
}
-- These should be defined after the scripts they are composed of
m["Jpan"] = {
canonicalName = "Japanese",
characters = m["Hira"].characters .. m["Kana"].characters .. m["Hani"].characters,
}
m["Kore"] = {
canonicalName = "Korean",
characters = m["Hang"].characters .. m["Hani"].characters .. "!-○",
}
m["CGK"] = {
canonicalName = "Korean",
}
m["Hano"] = {
canonicalName = "Hanunoo",
characters = "ᜠ-᜴",
}
m["Hebr"] = {
canonicalName = "Hebrew",
characters = u(0x0590) .. "-" .. u(0x05FF) .. u(0xFB1D) .. "-" .. u(0xFB4F),
}
m["Hmng"] = {
canonicalName = "Hmong",
otherNames = {"Pahawh Hmong"},
characters = "𖬀-𖮏",
}
m["Ibrn"] = {
canonicalName = "Iberian",
}
m["Inds"] = {
canonicalName = "Indus",
otherNames = {"Harappan", "Indus Valley"},
}
m["IPAchar"] = {
canonicalName = "International Phonetic Alphabet",
}
m["Ital"] = {
canonicalName = "Old Italic",
characters = "𐌀-𐌣",
}
m["Java"] = {
canonicalName = "Javanese",
characters = "ꦀ-꧟",
}
m["Jurc"] = {
canonicalName = "Jurchen",
}
m["Kali"] = {
canonicalName = "Kayah Li",
characters = "꤀-꤯",
}
m["Khar"] = {
canonicalName = "Kharoshthi",
characters = "𐨀-𐩘",
}
m["Khmr"] = {
canonicalName = "Khmer",
characters = "ក-៹",
}
m["Knda"] = {
canonicalName = "Kannada",
characters = "ಂ-ೲ",
}
m["Kthi"] = {
canonicalName = "Kaithi",
characters = "𑂀-𑃁",
}
m["Lana"] = {
canonicalName = "Lanna",
}
m["Laoo"] = {
canonicalName = "Lao",
characters = "ກ-ໝ",
}
m["Latn"] = {
canonicalName = "Latin",
otherNames = {"Roman", "Rumi", "Romaji", "Rōmaji"},
characters = "0-9A-Za-z¡-ɏḀ-ỿ",
}
m["Latf"] = {
canonicalName = "Fraktur",
otherNames = {"Blackletter"},
characters = m["Latn"].characters,
}
m["Latinx"] = {
canonicalName = "Latin",
characters = m["Latn"].characters .. "Ⱡ-Ɀ꜠-ꟿꬰ-ꭥ",
}
m["nv-Latn"] = {
canonicalName = "Latin",
characters = m["Latn"].characters,
}
m["pjt-Latn"] = {
canonicalName = "Latin",
characters = m["Latn"].characters,
}
m["Lepc"] = {
canonicalName = "Lepcha",
characters = "ᰀ-ᱏ",
}
m["Limb"] = {
canonicalName = "Limbu",
characters = "ᤀ-᥏",
}
m["Lina"] = {
canonicalName = "Linear A",
characters = "𐘀-𐝧",
}
m["Linb"] = {
canonicalName = "Linear B",
characters = "𐀀-𐃺",
}
m["Lisu"] = {
canonicalName = "Lisu",
otherNames = {"Fraser"},
characters = "ꓐ-꓿",
}
m["Lyci"] = {
canonicalName = "Lycian",
characters = "𐊀-𐊜",
}
m["Lydi"] = {
canonicalName = "Lydian",
characters = "𐤠-𐤿",
}
m["Mand"] = {
canonicalName = "Mandaic",
characters = "ࡀ-࡞",
}
m["Mani"] = {
canonicalName = "Manichaean",
characters = "𐫀-𐫶",
}
m["Maya"] = {
canonicalName = "Maya",
otherNames = {"Maya hieroglyphic", "Mayan", "Mayan hieroglyphic"},
}
m["Mend"] = {
canonicalName = "Mende",
otherNames = {"Mende Kikakui"},
characters = "𞠀-𞣖",
}
m["Merc"] = {
canonicalName = "Meroitic cursive",
characters = "𐦠-𐦿",
}
m["Mero"] = {
canonicalName = "Meroitic hieroglyphic",
characters = "𐦀-𐦟",
}
m["Mlym"] = {
canonicalName = "Malayalam",
characters = "ം-ൿ",
}
m["Mong"] = {
canonicalName = "Mongolian",
characters = "᠀-ᢪ",
}
m["Mtei"] = {
canonicalName = "Meitei Mayek",
characters = "ꯀ-ꫠ-",
}
m["musical"] = {
canonicalName = "musical notation",
characters = "𝄀-𝇝",
}
m["Mymr"] = {
canonicalName = "Burmese",
characters = "က-ၙ",
}
m["Nbat"] = {
canonicalName = "Nabataean",
otherNames = {"Nabatean"},
characters = "𐢀-𐢯",
}
m["Nkoo"] = {
canonicalName = "N'Ko",
characters = "߀-ߺ",
}
m["None"] = {
canonicalName = "No script specified",
-- This should not have any characters listed
character_category = false, -- none
}
m["Ogam"] = {
canonicalName = "Ogham",
characters = " -᚜",
}
m["Olck"] = {
canonicalName = "Ol Chiki",
characters = "᱐-᱿",
}
m["Orkh"] = {
canonicalName = "Orkhon runes",
characters = "𐰀-𐱈",
}
m["Orya"] = {
canonicalName = "Oriya",
characters = "ଁ-୷",
}
m["Osma"] = {
canonicalName = "Osmanya",
characters = "𐒀-𐒩",
}
m["Palm"] = {
canonicalName = "Palmyrene",
characters = "-",
}
m["Phag"] = {
canonicalName = "Phags-pa",
characters = "ꡀ-꡷",
}
m["Phli"] = {
canonicalName = "Inscriptional Pahlavi",
characters = "𐭠-𐭿",
}
m["Phlp"] = {
canonicalName = "Psalter Pahlavi",
characters = "𐮀-𐮯",
}
m["Phlv"] = {
canonicalName = "Book Pahlavi",
-- Not in Unicode
}
m["Phnx"] = {
canonicalName = "Phoenician",
characters = "𐤀-𐤟",
}
m["Plrd"] = {
canonicalName = "Pollard",
characters = "𖼀-𖾟",
}
m["Prti"] = {
canonicalName = "Parthian",
characters = "𐭀-𐭟",
}
m["Rjng"] = {
canonicalName = "Rejang",
characters = "ꤰ-꥟",
}
m["Ruminumerals"] = {
canonicalName = "Rumi numerals",
characters = "𐹠-𐹾",
character_category = "Rumi numerals",
}
m["Runr"] = {
canonicalName = "Runic",
characters = "ᚠ-ᛰ",
}
m["Samr"] = {
canonicalName = "Samaritan",
characters = "ࠀ-࠾",
}
m["Sarb"] = {
canonicalName = "Old South Arabian",
characters = "𐩠-𐩿",
}
m["Saur"] = {
canonicalName = "Saurashtra",
characters = "ꢀ-꣙",
}
m["Sgnw"] = {
canonicalName = "SignWriting",
}
m["Shaw"] = {
canonicalName = "Shavian",
characters = "𐑐-𐑿",
}
m["Shrd"] = {
canonicalName = "Sharada",
characters = "𑆀-𑇙",
}
m["Sinh"] = {
canonicalName = "Sinhalese",
characters = "ං-෴",
}
m["Sora"] = {
canonicalName = "Sorang Sompeng",
otherNames = {"Sora Sompeng"},
characters = "𑃐-𑃹"
}
m["Sund"] = {
canonicalName = "Sundanese",
characters = "ᮀ-ᮿ",
}
m["Sylo"] = {
canonicalName = "Syloti Nagri",
otherNames = {"Sylheti Nagari"},
}
m["Syrc"] = {
canonicalName = "Syriac",
characters = "܀-ݏ",
}
m["Tagb"] = {
canonicalName = "Tagbanwa",
characters = "ᝠ-ᝳ",
}
m["Takr"] = {
canonicalName = "Takri",
characters = "𑚀-𑛉",
}
m["Tale"] = {
canonicalName = "Tai Le",
characters = "ᥐ-ᥴ",
}
m["Talu"] = {
canonicalName = "New Tai Lue",
characters = "ᦀ-᧟",
}
m["Taml"] = {
canonicalName = "Tamil",
characters = "ஂ-௺",
}
m["Tang"] = {
canonicalName = "Tangut",
}
m["Tavt"] = {
canonicalName = "Tai Viet",
characters = "ꪀ-꫟",
}
m["Telu"] = {
canonicalName = "Telugu",
characters = "ఁ-౿",
}
m["Teng"] = {
canonicalName = "Tengwar",
}
m["Tfng"] = {
canonicalName = "Tifinagh",
characters = "ⴰ-⵿",
}
m["Tglg"] = {
canonicalName = "Tagalog",
characters = "ᜀ-᜔",
}
m["Thaa"] = {
canonicalName = "Thaana",
characters = "ހ-ޱ",
}
m["Thai"] = {
canonicalName = "Thai",
characters = "ก-ฺ",
}
m["Tibt"] = {
canonicalName = "Tibetan",
characters = "ༀ-࿚",
}
m["xzh-Tibt"] = {
canonicalName = "Zhang-Zhung",
}
m["Ugar"] = {
canonicalName = "Ugaritic",
characters = "𐎀-𐎟",
}
m["Vaii"] = {
canonicalName = "Vai",
characters = "ꔀ-ꘫ",
}
m["Xpeo"] = {
canonicalName = "Old Persian",
characters = "𐎠-𐏕",
}
m["Xsux"] = {
canonicalName = "Cuneiform",
otherNames = {"Sumero-Akkadian Cuneiform"},
characters = "𒀀-𒍮𒐀-𒑳",
}
m["Yiii"] = {
canonicalName = "Yi",
characters = "ꀀ-꓆",
}
m["Zmth"] = {
canonicalName = "mathematical notation",
characters = "ℵ∀-⋿⟀-⟯⦀-⧿⨀-⫿𝐀-𝟿",
character_category = "Mathematical notation symbols" -- ?
}
m["Zsym"] = {
canonicalName = "symbol",
characters = "─-➿←⇿⌀-⏳🌀-🛅",
character_category = false, -- none
}
m["Zyyy"] = {
canonicalName = "undetermined",
-- This should not have any characters listed, probably
character_category = false, -- none
}
m["Zzzz"] = {
canonicalName = "uncoded",
-- This should not have any characters listed
character_category = false, -- none
}
return m