Modul:character list

Vun Wiktionary
Dissen Vörlaag hef dokumentatsie neudig.
Maak de dokumentatsie veur dissen modul ovver t doel en gebroek op de dokumentatsieziede.

Vörlaag:status

This module uses Module:Unicode data to generate Appendix:Unicode and its subpages.


local export = {}
local m_unicode = require("Module:Unicode data")
local m_uni_alias = mw.loadData("Module:Unicode data/aliases")

local sc_pattable = {}

do
	local m_scripts = mw.loadData("Module:scripts/data")
	for sc, info in pairs(m_scripts) do
		if info.characters then
			sc_pattable[sc] = "[" .. info.characters .. "]"
		end
	end
end

local quick_sc = {}

local sc_blacklist = {
	["Latf"] = true;
	["Hans"] = true;
	["Hant"] = true;
}

local function get_script(codepoint)
	local text = mw.ustring.char(codepoint)
	
	for sc, pat in pairs(quick_sc) do
		if mw.ustring.match(text, pat) then
			return sc
		end
	end

	for sc, pat in pairs(sc_pattable) do
		if not sc_blacklist[sc] then
			if mw.ustring.match(text, pat) then
				quick_sc[sc] = pat
				return sc
			end
		end
	end

	return "Zyyy"
end

function export.show_header(frame)
	local block_name = frame.args.block
	local names, i = {}
	
	for j, name in m_unicode.enum_blocks() do
		names[j] = name
		if block_name == name then
			i = j
		end
	end

	if not i then
		error('Invalid Unicode block name: ' .. block_name)	
	end
	
	local st, en = m_unicode.get_block_range(block_name)
	
	return (
		'{| style="width: 100%%;"\n' ..
		' | style="width: 30%%; text-align: left;"   | %s\n' ..
		' | style="text-align: center;" | <h2>%s</h2>\n' ..
		' | style="width: 30%%; text-align: right;"  | %s\n' ..
		' |}\nThis page lists the characters in the “[http://unicode.org/charts/PDF/U%04X.pdf %s]” block of the Unicode standard, version 7.0. This block covers codepoints from U+%04X to U+%04X.\n'):format(
			names[i - 1] and ("'''[[Anhang:Unicode/%s|← %s]]'''"):format(names[i - 1], names[i - 1]) or "",
			names[i] or "",
			names[i + 1] and ("'''[[Anhang:Unicode/%s|%s →]]'''"):format(names[i + 1], names[i + 1]) or "",
			st, names[i], st, en
		)
end

function export.show_blocks(frame)
	local result = {}
	local st, en = tonumber(frame.args[1]), tonumber(frame.args[2])
	
	table.insert(result, '{| class="wikitable" style="width: 100%;"\n! width="10%;" | Start\n! width="10%;" | End\n ! Block name\n')
	for i, name, bst, ben in m_unicode.enum_blocks() do
		if (bst >= st) and (ben <= en) then
			table.insert(result, (
				'|-\n|U+%04X\n|U+%04X\n|[[Anhang:Unicode/%s|%s]]\n'
			):format(bst, ben, name, name))
		end
	end
	table.insert(result, "|}")
	
	return table.concat(result)
end

function export.show(frame)
	local result = {}
	local args = frame.args
	
	local st, en
	if args.block then
		st, en = m_unicode.get_block_range(args.block)
		if not st then
			error("Invalid Unicode block specified")
		end
	elseif args[1] and args[2] then
		st, en = tonumber(args[1]), tonumber(args[2])
		if not (st and en) then
			error("Invalid number specified")
		end
	else
		error("Must give a Unicode block or character range")
	end
	
	local unsupported_titles = {
		[0x0020] = "Unsupported titles/Space";
		[0x0023] = "Unsupported titles/Number sign";
		[0x002e] = "Unsupported titles/Full stop";
		[0x003a] = "Unsupported titles/Colon";
		[0x003c] = "Unsupported titles/Less than sign";
		[0x003e] = "Unsupported titles/Greater than sign";
		[0x005b] = "Unsupported titles/Left square bracket";
		[0x005d] = "Unsupported titles/Right square bracket";
		[0x005f] = "Unsupported titles/Low line";
		[0x007b] = "Unsupported titles/Left curly bracket";
		[0x007c] = "Unsupported titles/Vertical line";
		[0x007d] = "Unsupported titles/Right curly bracket";
		[0x1680] = "Unsupported titles/Ogham space";
	}

	local function present_codepoint(codepoint, np)
		local display = ""
		local link_target = unsupported_titles[codepoint]
 
		if not m_unicode.is_printable(codepoint) then
			return '<small>(unprintable)</small>'
		end

		if not link_target then
			link_target = m_unicode.is_valid_pagename(mw.ustring.char(codepoint))
			link_target = link_target and ('&#%u;'):format(codepoint)
		end
 
		return
			(link_target and '[[%s|<bdi style="font-size: large;" class="%s">%s&#%u;</bdi>]]'
			or '<!-- %s --><bdi style="font-size: large;" class="%s">%s&#%u;</bdi>'):format(
				link_target or "", get_script(codepoint),
				m_unicode.is_combining(codepoint) and "&#x25cc;" or "", codepoint,
				m_unicode.is_combining(codepoint) and "&#x25cc;" or "", codepoint
			)
	end

	table.insert(result,
		'{| class="wikitable"\n' ..
		' ! width="10%" | Code\n' ..
		' ! width="5%"  | Teken\n' ..
		' ! unicode-Naam\n' ..
		' ! Naam op Oostnoordseeplattdüütsch\n'
	)
	for cp = st, en do
		if m_unicode.is_assigned(cp) then
			local alt_names = ""
			
			if m_uni_alias[cp] then
				local aliases = {
					["correction"  ] = {},
					["control"     ] = {},
					["alternate"   ] = {},
					["figment"     ] = {},
					["abbreviation"] = {},
				}
				
				for _, info in ipairs(m_uni_alias[cp]) do
					table.insert(aliases[info[1]], "<small>" .. info[2] .. "</small>")
				end
				
				for i, name in ipairs(aliases.alternate) do
					alt_names = alt_names .. (' aka %s'):format(name)
				end

				if #aliases.control > 0 then
					alt_names = alt_names .. '; control character name: ' .. table.concat(aliases.control, " or ")
				end

				for i, name in ipairs(aliases.correction) do
					alt_names = alt_names .. ('<br/>Corrected name: %s'):format(name)
				end
				
				for i, name in ipairs(aliases.figment) do
					alt_names = alt_names .. ('<br/>Figment name: %s'):format(name)
				end

				if #aliases.abbreviation > 0 then
					alt_names = alt_names .. ' (' .. table.concat(aliases.abbreviation, ", ") .. ')'
				end
			end
			
			table.insert(result, (" |-\n | [http://unicode.org/cldr/utility/character.jsp?a=%04X U+%04X] <small>(%u)</small>\n | %s\n | <small>%s</small>%s\n | <small>%s</small>%s\n"):format(
				cp, cp, cp,
				present_codepoint(cp),
				mw.text.nowiki(m_unicode.lookup_name(cp)),
				alt_names,
				mw.text.nowiki(m_unicode.lookup_name_nds_n(cp)),
				alt_names
			))
		end
	end
	
	table.insert(result,
		' |}'
	)

	return table.concat(result)
end

return export