Modül:heceleme
Bu belgeleme Modül:heceleme/belge (düzenle | geçmiş) sayfasından yansıtılmaktadır. Arayüz düzenleyicilerinin deney yapabilmeleri için ayrıca Modül:heceleme/deneme tahtası sayfası kullanılabilir.
local cikart = {}
local unluler = mw.loadData("Modül:IPA/veri/semboller")["ünlüler"] .. "ᵻ" .. "ᵿ"
local U = mw.ustring.char
--[[
Add diphthongs to the list if they do not contain semivowel symbols: /j w ɰ ɥ/.
Do not include non-syllabic diacritics: /au/, not /au̯/.
The module automatically does not count vowels with non-syllabic diacritics.
]]--
local ikiliunluler = {
["de"] = {
"a[ɪʊ]",
"ɔ[ʏɪ]",
},
["ca"] = { -- Catalan has diphthongs, but they are generally transcribed using
-- /w/ and /j/, so do not need to be listed (see [[w:Catalan language#Diphthongs and triphthongs]].
},
["cs"] = { -- [[w:Czech phonology#Diphthongs]]
"[aeo]u",
},
["en"] = { -- from [[Appendix:English pronunciation]] mostly, but /ʌɪ/ is from the OED
"[aɑeɛoɔʌ][ɪi]",
"[ɑɒæo]e",
"[əɐ]ʉ",
"[aɒəoɔʌ]ʊ",
"æo",
"[ɛeɪiɔʊʉ]ə", -- /iə/ is a diphthong in NZE, but a disyllabic sequence in GA.
-- /ɪə/ is both a disyllabic sequence and a diphthong in old-fashioned RP.
"[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects?
},
["fi"] = { -- Finnish has diphthongs, but they are now automatically transcribed with
-- the nonsyllabic diacritic
},
["fr"] = { -- French has diphthongs, but they are transcribed
}, -- with semivowel symbols: [[w:French phonology#Glides and diphthongs]].
["grc"] = {
"[aeyo]i",
"[ae]u",
"[ɛɔa]ː[iu]",
},
["is"] = { -- [[w:Icelandic phonology#Vowels]]
"[aeø][iɪy]", -- Wikipedia is oddly specific about the second element: ei and ai, but øɪ.
"[ao]u",
},
["it"] = {
"[aeɛoɔu]i",
"[aeɛioɔ]u",
},
["la"] = {
"[eaou]i",
"[eao]u",
"[ao]e",
},
["lb"] = {
"[iu]ə",
"[ɜoæɑ]ɪ",
"[əæɑ]ʊ",
},
["sl"] = { -- No diphthongs, properly speaking; sequences of a vowel, /j/ and /w/ though
},
["sk"] = { -- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.
},
["ru"] = { -- No diphthongs, properly speaking; sequences of a vowel and /j/ though.
},
["sq"] = { -- [[w:Albanian language#Vowels]] doesn't mention anything about diphthongs.
},
["tr"] = { -- No diphthongs, properly speaking; sequences of a vowel and /j/ though.
},
["ug"] = { -- No diphthongs.
},
}
--[[ No use for this at the moment, though it is an interesting catalogue.
It might be usable for phonetic transcriptions.
Diacritics added to vowels:
inverted breve above, inverted breve below,
up tack, down tack,
left tack, right tack,
diaeresis (above), diaeresis below,
right half ring, left half ring,
plus sign below, minus sign below,
combining x above, rhotic hook,
tilde (above), tilde below
ligature tie (combining double breve), ligature tie below
]]
local diyakritikler = U(0x311)..U(0x32F)
..U(0x31D)..U(0x31E)
..U(0x318)..U(0x319)
..U(0x308)..U(0x324)
..U(0x339)..U(0x31C)
..U(0x31F)..U(0x320)
..U(0x33D)..U(0x2DE)
..U(0x303)..U(0x330)
..U(0x361)..U(0x35C)
--[[
combining acute and grave tone marks, circumflex
]]--
local ton = "[" .. U(0x341) .. U(0x340) ..U (0x302) .. "]"
local hecesizDiyakritikler = U(0x311) .. U(0x32F)
local heceliDiyakritikler = U(0x0329) .. U(0x030D)
local ties = U(0x361) .. U(0x35C)
-- long, half-long, extra short
local uzunlukDiyakritikler = U(0x2D0) .. U(0x2D1) .. U(0x306)
local unlu = "[" .. unluler .. "]" .. ton .. "?"
local tie = "[" .. ties .. "]"
local hecesizDiyakritik = "[" .. hecesizDiyakritikler .. "]"
local heceliDiyakritik = "[" .. heceliDiyakritikler .. "]"
local UTF8Char = "[\1-\127\194-\244][\128-\191]*"
function cikart.getirUnluler(remainder, lang)
if string.find(remainder, "^[%\[/]?%-") or string.find(remainder, "%-[%\[/]?$") then
return nil
end -- If a hyphen is at the beginning or end of the transcription, do not count syllables.
local count = 0
local diphs = ikiliunluler[lang:getirKod()] or {}
remainder = mw.ustring.toNFD(remainder)
remainder = string.gsub(remainder, "%((.*)%)", "%1") -- Remove parentheses.
while remainder ~= "" do
-- Ignore nonsyllabic vowels
remainder = mw.ustring.gsub(remainder, "^" .. unlu .. hecesizDiyakritik, "")
local m =
mw.ustring.match(remainder, "^." .. heceliDiyakritik) or -- Syllabic consonant
mw.ustring.match(remainder, "^" .. unlu .. tie .. unlu) -- Tie bar
-- Starts with a recognised diphthong?
for _, diph in ipairs(diphs) do
if m then
break
end
m = m or mw.ustring.match(remainder, "^" .. diph)
end
-- If we haven't found anything yet, just match on a single vowel
m = m or mw.ustring.match(remainder, "^" .. unlu)
if m then
-- Found a vowel, add it
count = count + 1
remainder = string.sub(remainder, #m + 1)
else
-- Found a non-vowel, skip it
remainder = string.gsub(remainder, "^" .. UTF8Char, "")
end
end
if count ~= 0 then return count end
return nil
end
function cikart.sayUnlulerTestIcin(frame)
local params = {
[1] = {required = true},
[2] = {default = ""},
}
local args = require("Modül:parameters").process(frame.args, params)
local lang = require("Modül:diller").getirKodaGore(args[1]) or require("Modül:diller").err(args[1], 1)
local count = cikart.getirUnluler(args[2], lang)
return 'The text "' .. args[2] .. '" contains ' .. count .. ' vowels.'
end
local function sayUnluler(text)
text = mw.ustring.toNFD(text) or error("Invalid UTF-8")
local _, count = mw.ustring.gsub(text, unlu, "")
local _, ikinciSay = mw.ustring.gsub(text, unlu.."+", "")
local _, hecesizSay = mw.ustring.gsub(text, unlu .. hecesizDiyakritik, "")
local _, tieCount = mw.ustring.gsub(text, unlu .. tie .. unlu, "")
local ikiliunluSay = count - (hecesizSay + tieCount)
return count, ikinciSay, ikiliunluSay
end
local function sayIkiliunlu(text, lang)
text = mw.ustring.toNFD(text) or error("Invalid UTF-8")
local ikiliunluler = ikiliunluler[lang:getCode()] or {}
local _, count
local total = 0
if ikiliunluler then
for i, ikiliunlu in pairs(ikiliunluler) do
_, count = mw.ustring.gsub(text, ikiliunlu, "")
total = total + count
end
end
return total
end
-- [[Modül:IPA]] tarafından kullanılıyor
function cikart.varmiIkiliunlu(lang)
if (ikiliunluler[lang:getirKod()]) then
return true
end
return false
end
function cikart.sayUnluler(frame)
local params = {
[1] = {default = ""},
}
local args = require("Modül:parameters").process(frame.args, params)
local count, ikinciSay, ikiliunluSay = sayUnluler(args[1])
local outputs = {}
table.insert(outputs, (count or 'an unknown number of') .. ' vowels')
table.insert(outputs, (ikinciSay or 'an unknown number of') .. ' vowel sequences')
table.insert(outputs, (ikiliunluSay or 'an unknown number of') .. ' vowels or vowels and diphthongs')
return 'The text "' .. args[1] .. '" contains ' .. mw.text.listToText(outputs) .. "."
end
function cikart.sayUnlulerIkiliunlu(frame)
local params = {
[1] = {required = true},
[2] = {default = ""},
}
local args = require("Modül:parameters").process(frame.args, params)
local lang = require("Modül:diller").getirKodaGore(args[1]) or require("Modül:diller").err(args[1], 1)
local unluler = sayUnluler(args[2])
local count = unluler - sayIkiliunlu(args[2], lang) or 0
local out = 'The text "' .. args[2] .. '" contains ' .. (count or 'an unknown number of')
if count == 1 then
out = out .. ' vowel or diphthong.'
else
out = out .. ' vowels or diphthongs.'
end
return out
end
return cikart