Modul:character list
Utsehn
This module uses Module:Unicode data to generate Appendix:Unicode and its subpages.
local export = {}
local m_unicode = require("Module:Unicode data")
local m_uni_alias = mw.loadData("Module:Unicode data/aliases")
local sc_pattable = {}
do
local m_scripts = mw.loadData("Module:scripts/data")
for sc, info in pairs(m_scripts) do
if info.characters then
sc_pattable[sc] = "[" .. info.characters .. "]"
end
end
end
local quick_sc = {}
local sc_blacklist = {
["Latf"] = true;
["Hans"] = true;
["Hant"] = true;
}
local function get_script(codepoint)
local text = mw.ustring.char(codepoint)
for sc, pat in pairs(quick_sc) do
if mw.ustring.match(text, pat) then
return sc
end
end
for sc, pat in pairs(sc_pattable) do
if not sc_blacklist[sc] then
if mw.ustring.match(text, pat) then
quick_sc[sc] = pat
return sc
end
end
end
return "Zyyy"
end
function export.show_header(frame)
local block_name = frame.args.block
local names, i = {}
for j, name in m_unicode.enum_blocks() do
names[j] = name
if block_name == name then
i = j
end
end
if not i then
error('Invalid Unicode block name: ' .. block_name)
end
local st, en = m_unicode.get_block_range(block_name)
return (
'{| style="width: 100%%;"\n' ..
' | style="width: 30%%; text-align: left;" | %s\n' ..
' | style="text-align: center;" | <h2>%s</h2>\n' ..
' | style="width: 30%%; text-align: right;" | %s\n' ..
' |}\nThis page lists the characters in the “[http://unicode.org/charts/PDF/U%04X.pdf %s]” block of the Unicode standard, version 7.0. This block covers codepoints from U+%04X to U+%04X.\n'):format(
names[i - 1] and ("'''[[Anhang:Unicode/%s|← %s]]'''"):format(names[i - 1], names[i - 1]) or "",
names[i] or "",
names[i + 1] and ("'''[[Anhang:Unicode/%s|%s →]]'''"):format(names[i + 1], names[i + 1]) or "",
st, names[i], st, en
)
end
function export.show_blocks(frame)
local result = {}
local st, en = tonumber(frame.args[1]), tonumber(frame.args[2])
table.insert(result, '{| class="wikitable" style="width: 100%;"\n! width="10%;" | Start\n! width="10%;" | End\n ! Block name\n')
for i, name, bst, ben in m_unicode.enum_blocks() do
if (bst >= st) and (ben <= en) then
table.insert(result, (
'|-\n|U+%04X\n|U+%04X\n|[[Anhang:Unicode/%s|%s]]\n'
):format(bst, ben, name, name))
end
end
table.insert(result, "|}")
return table.concat(result)
end
function export.show(frame)
local result = {}
local args = frame.args
local st, en
if args.block then
st, en = m_unicode.get_block_range(args.block)
if not st then
error("Invalid Unicode block specified")
end
elseif args[1] and args[2] then
st, en = tonumber(args[1]), tonumber(args[2])
if not (st and en) then
error("Invalid number specified")
end
else
error("Must give a Unicode block or character range")
end
local unsupported_titles = {
[0x0020] = "Unsupported titles/Space";
[0x0023] = "Unsupported titles/Number sign";
[0x002e] = "Unsupported titles/Full stop";
[0x003a] = "Unsupported titles/Colon";
[0x003c] = "Unsupported titles/Less than sign";
[0x003e] = "Unsupported titles/Greater than sign";
[0x005b] = "Unsupported titles/Left square bracket";
[0x005d] = "Unsupported titles/Right square bracket";
[0x005f] = "Unsupported titles/Low line";
[0x007b] = "Unsupported titles/Left curly bracket";
[0x007c] = "Unsupported titles/Vertical line";
[0x007d] = "Unsupported titles/Right curly bracket";
[0x1680] = "Unsupported titles/Ogham space";
}
local function present_codepoint(codepoint, np)
local display = ""
local link_target = unsupported_titles[codepoint]
if not m_unicode.is_printable(codepoint) then
return '<small>(unprintable)</small>'
end
if not link_target then
link_target = m_unicode.is_valid_pagename(mw.ustring.char(codepoint))
link_target = link_target and ('&#%u;'):format(codepoint)
end
return
(link_target and '[[%s|<bdi style="font-size: large;" class="%s">%s&#%u;</bdi>]]'
or '<!-- %s --><bdi style="font-size: large;" class="%s">%s&#%u;</bdi>'):format(
link_target or "", get_script(codepoint),
m_unicode.is_combining(codepoint) and "◌" or "", codepoint,
m_unicode.is_combining(codepoint) and "◌" or "", codepoint
)
end
table.insert(result,
'{| class="wikitable"\n' ..
' ! width="10%" | Code\n' ..
' ! width="5%" | Teken\n' ..
' ! unicode-Naam\n' ..
' ! Naam op Oostnoordseeplattdüütsch\n'
)
for cp = st, en do
if m_unicode.is_assigned(cp) then
local alt_names = ""
if m_uni_alias[cp] then
local aliases = {
["correction" ] = {},
["control" ] = {},
["alternate" ] = {},
["figment" ] = {},
["abbreviation"] = {},
}
for _, info in ipairs(m_uni_alias[cp]) do
table.insert(aliases[info[1]], "<small>" .. info[2] .. "</small>")
end
for i, name in ipairs(aliases.alternate) do
alt_names = alt_names .. (' aka %s'):format(name)
end
if #aliases.control > 0 then
alt_names = alt_names .. '; control character name: ' .. table.concat(aliases.control, " or ")
end
for i, name in ipairs(aliases.correction) do
alt_names = alt_names .. ('<br/>Corrected name: %s'):format(name)
end
for i, name in ipairs(aliases.figment) do
alt_names = alt_names .. ('<br/>Figment name: %s'):format(name)
end
if #aliases.abbreviation > 0 then
alt_names = alt_names .. ' (' .. table.concat(aliases.abbreviation, ", ") .. ')'
end
end
table.insert(result, (" |-\n | [http://unicode.org/cldr/utility/character.jsp?a=%04X U+%04X] <small>(%u)</small>\n | %s\n | <small>%s</small>%s\n | <small>%s</small>%s\n"):format(
cp, cp, cp,
present_codepoint(cp),
mw.text.nowiki(m_unicode.lookup_name(cp)),
alt_names,
mw.text.nowiki(m_unicode.lookup_name_nds_n(cp)),
alt_names
))
end
end
table.insert(result,
' |}'
)
return table.concat(result)
end
return export