You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
83 lines
2.2 KiB
Lua
83 lines
2.2 KiB
Lua
-- RFC 5646 Section 2.1
|
|
|
|
local lpeg = require "lpeg"
|
|
local core = require "lpeg_patterns.core"
|
|
|
|
local C = lpeg.C
|
|
local P = lpeg.P
|
|
local R = lpeg.R
|
|
local Cg = lpeg.Cg
|
|
local Ct = lpeg.Ct
|
|
local Cmt = lpeg.Cmt
|
|
|
|
local M = {}
|
|
|
|
local alphanum = core.ALPHA + core.DIGIT
|
|
|
|
local extlang = core.ALPHA * core.ALPHA * core.ALPHA * -#alphanum
|
|
* (P"-" * core.ALPHA * core.ALPHA * core.ALPHA * -#alphanum)^-2
|
|
|
|
local language = Cg(core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA^-3, "language")
|
|
+ Cg(core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA, "language")
|
|
+ Cg(core.ALPHA * core.ALPHA * core.ALPHA^-1, "language") * (P"-" * Cg(extlang, "extlang"))^-1
|
|
|
|
local script = core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA
|
|
* -#alphanum -- Prevent intepretation of a 'variant'
|
|
|
|
local region = (
|
|
core.ALPHA * core.ALPHA
|
|
+ core.DIGIT * core.DIGIT * core.DIGIT
|
|
) * -#alphanum -- Prevent intepretation of a 'variant'
|
|
|
|
local variant = core.DIGIT * alphanum * alphanum * alphanum
|
|
+ alphanum * alphanum * alphanum * alphanum * alphanum * alphanum^-3
|
|
|
|
local singleton = core.DIGIT + R("AW", "YZ", "aw", "yz")
|
|
|
|
local extension = C(singleton) * Ct((P"-" * (alphanum*alphanum*alphanum^-6 / string.lower))^1)
|
|
|
|
M.privateuse = P"x" * Ct((P"-" * C(alphanum*alphanum^-7))^1)
|
|
|
|
M.langtag = language
|
|
* (P"-" * Cg(script, "script"))^-1
|
|
* (P"-" * Cg(region, "region"))^-1
|
|
* Cg(Ct((P"-" * C(variant))^1), "variant")^-1
|
|
* Cg(Cmt(Ct((P"-" * Ct(extension))^1), function(_, _, c)
|
|
-- Can't use a fold with rawset as we want the pattern to not match if there is a duplicate extension
|
|
local r = {}
|
|
for _, v in ipairs(c) do
|
|
local a, b = v[1], v[2]
|
|
if r[a] then
|
|
-- duplicate extension
|
|
return false
|
|
end
|
|
r[a] = b
|
|
end
|
|
return true, r
|
|
end), "extension")^-1
|
|
* (P"-" * Cg(M.privateuse, "privateuse"))^-1
|
|
|
|
local irregular = P"en-GB-oed"
|
|
+ P"i-ami"
|
|
+ P"i-bnn"
|
|
+ P"i-default"
|
|
+ P"i-enochian"
|
|
+ P"i-hak"
|
|
+ P"i-klingon"
|
|
+ P"i-lux"
|
|
+ P"i-mingo"
|
|
+ P"i-navajo"
|
|
+ P"i-pwn"
|
|
+ P"i-tao"
|
|
+ P"i-tay"
|
|
+ P"i-tsu"
|
|
+ P"sgn-BE-FR"
|
|
+ P"sgn-BE-NL"
|
|
+ P"sgn-CH-DE"
|
|
|
|
M.Language_Tag = C((M.langtag
|
|
+ M.privateuse
|
|
+ irregular) / function() end) -- capture the whole tag. throws away decomposition
|
|
|
|
return M
|