Module:Citation/CS1/Configuration and Module:Citation/CS1/Configuration/sandbox: Difference between pages
(Difference between pages)
Jump to navigation
Jump to search
(bump ssrn;) |
No edit summary |
||
Line 1: | Line 1: | ||
--[[ |
|||
History of changes since last sync: 2022-01-26 |
|||
2022-01-31: move {{citation}} specific |volume= & |issue= lists from main module; see Help_talk:Citation_Style_1#%7Cissue%3D_and_%7Cvolume%3D_i18n |
|||
2022-02-03: automate local language month name fetching; see Help_talk:Citation_Style_1#Internationalisation_need_#2 |
|||
2022-02-27: i18n local_lang_cat_enable bug fix; see User_talk:Trappist_the_monk#CS1_-_Miscellaneous |
|||
2022-03-08: no auto-date formatting in template namespace; see Help_talk:Citation_Style_1#Checking_for_date_format_on_template_pages |
|||
2022-04-15: detect et al with trailing semicolon; see Help_talk:Citation_Style_1#et_al_and_punctuation |
|||
]] |
|||
local lang_obj = mw.language.getContentLanguage(); -- make a language object for the local language; used here for languages and dates |
|||
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------ |
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------ |
||
Line 131: | Line 144: | ||
local et_al_patterns = { |
local et_al_patterns = { |
||
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.\"']*$", -- variations on the 'et al' theme |
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][%.;,\"']*$", -- variations on the 'et al' theme |
||
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.\"']*$", |
"[;,]? *[\"']*%f[%a][Ee][Tt]%.? *[Aa][Ll][Ii][AaIi][Ee]?[%.;,\"']*$", -- variations on the 'et alia', 'et alii' and 'et aliae' themes (false positive 'et aliie' unlikely to match) |
||
"[;,]? *%f[%a]and [Oo]thers", -- an alternative to et al. |
"[;,]? *%f[%a]and [Oo]thers", -- an alternative to et al. |
||
"%[%[ *[Ee][Tt]%.? *[Aa][Ll]%.? *%]%]", -- a wikilinked form |
"%[%[ *[Ee][Tt]%.? *[Aa][Ll]%.? *%]%]", -- a wikilinked form |
||
"%(%( *[Ee][Tt]%.? *[Aa][Ll]%.? *%)%)", -- a double-bracketed form (to counter partial removal of ((...)) syntax) |
"%(%( *[Ee][Tt]%.? *[Aa][Ll]%.? *%)%)", -- a double-bracketed form (to counter partial removal of ((...)) syntax) |
||
"[%(%[] *[Ee][Tt]%.? *[Aa][Ll]%.? *[%)%]]", -- a bracketed form |
"[%(%[] *[Ee][Tt]%.? *[Aa][Ll]%.? *[%)%]]", -- a bracketed form |
||
} |
} |
||
Line 436: | Line 449: | ||
-- Lua patterns to match generic titles; usually created by bots or reference filling tools |
-- Lua patterns to match generic titles; usually created by bots or reference filling tools |
||
-- translators: replace ['local'] = nil with lowercase translation only when bots or tools create generic titles in your language |
-- translators: replace ['local'] = nil with lowercase translation only when bots or tools create generic titles in your language |
||
['generic_titles'] = { |
|||
-- generic titles and patterns in this table should be lowercase only |
-- generic titles and patterns in this table should be lowercase only |
||
-- leave ['local'] nil except when there is a matching generic title in your language |
-- leave ['local'] nil except when there is a matching generic title in your language |
||
-- boolean 'true' for plain-text searches; 'false' for pattern searches |
-- boolean 'true' for plain-text searches; 'false' for pattern searches |
||
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil}, |
|||
['generic_titles'] = { |
|||
{['en'] = {'are you a robot', true}, ['local'] = nil}, |
|||
['accept'] = { |
|||
{['en'] = {'hugedomains.com', true}, ['local'] = nil}, |
|||
}, |
|||
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil}, |
|||
['reject'] = { |
|||
{['en'] = {'page not found', true}, ['local'] = nil}, |
|||
{['en'] = {' |
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'are you a robot', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'hugedomains.com', true}, ['local'] = nil}, |
||
{['en'] = {'^ |
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'page not found', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'subscribe to read', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'^[%(%[{<]?unknown[>}%]%)]?$', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'website is for sale', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'^404', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'log into facebook', true}, ['local'] = nil}, |
||
{['en'] = {'login • instagram', true}, ['local'] = nil}, |
|||
{['en'] = {'redirecting...', true}, ['local'] = nil}, |
|||
{['en'] = {'usurped title', true}, ['local'] = nil}, -- added by a GreenC bot |
|||
{['en'] = {'webcite query result', true}, ['local'] = nil}, |
|||
{['en'] = {'wikiwix\'s cache', true}, ['local'] = nil}, |
|||
} |
|||
}, |
}, |
||
-- boolean 'true' for plain-text searches, search string must be lowercase only |
|||
['generic_names'] = { |
|||
-- boolean 'false' for pattern searches |
|||
-- generic names and patterns in this table should be lowercase only |
|||
-- leave ['local'] nil except when there is a matching generic name in your language |
-- leave ['local'] nil except when there is a matching generic name in your language |
||
-- boolean 'true' for plain-text searches; 'false' for pattern searches |
|||
['generic_names'] = { |
|||
{['en'] = {'about us', true}, ['local'] = nil}, |
|||
['accept'] = { |
|||
{['en'] = {'%f[%a][Aa]dvisor%f[%A]', false}, ['local'] = nil}, |
|||
{['en'] = {'% |
{['en'] = {'%[%[[^|]*%(author%) *|[^%]]*%]%]', false}, ['local'] = nil}, |
||
}, |
|||
{['en'] = {'collaborator', true}, ['local'] = nil}, |
|||
['reject'] = { |
|||
{['en'] = {'contributor', true}, ['local'] = nil}, |
|||
{['en'] = {' |
{['en'] = {'about us', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'%f[%a][Aa]dvisor%f[%A]', false}, ['local'] = nil}, |
||
{['en'] = {'%f[% |
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'collaborator', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'contributor', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'contact us', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'directory', true}, ['local'] = nil}, |
||
{['en'] = {'%f[% |
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'[,%.%s]%f[e]eds?%.?$', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'^eds?[%.,;]', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'^[%(%[]%s*[Ee][Dd][Ss]?%.?%s*[%)%]]', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'%f[%a][Ee]dited%f[%A]', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'%f[%a][Ee]ditors?%f[%A]', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'%f[%a]]Ee]mail%f[%A]', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'facebook', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'google', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'home page', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'instagram', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'interviewer', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'linkedIn', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'^[Nn]ews$', false}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'pinterest', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'policy', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'privacy', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'translator', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'tumblr', true}, ['local'] = nil}, |
||
{['en'] = {' |
{['en'] = {'twitter', true}, ['local'] = nil}, |
||
{['en'] = {'site name', true}, ['local'] = nil}, |
|||
} |
|||
{['en'] = {'statement', true}, ['local'] = nil}, |
|||
{['en'] = {'submitted', true}, ['local'] = nil}, |
|||
{['en'] = {'super.?user', false}, ['local'] = nil}, |
|||
{['en'] = {'%f['..is_Latn..'][Uu]ser%f[^'..is_Latn..']', false}, ['local'] = nil}, |
|||
{['en'] = {'verfasser', true}, ['local'] = nil}, |
|||
} |
|||
} |
|||
} |
} |
||
Line 519: | Line 544: | ||
Easter and Christmas are defined here as 98 and 99, which should be out of the |
Easter and Christmas are defined here as 98 and 99, which should be out of the |
||
ISO 8601 (EDTF) range of uses for a while. |
ISO 8601 (EDTF) range of uses for a while. |
||
local_date_names_from_mediawiki is a boolean. When set to: |
|||
true – module will fetch local month names from MediaWiki for both date_names['local']['long'] and date_names['local']['short'] |
|||
false – module will *not* fetch local month names from MediaWiki |
|||
Caveat lector: There is no guarantee that MediaWiki will provide short month names. At your wiki you can test |
|||
the results of the MediaWiki fetch in the debug console with this command (the result is alpha sorted): |
|||
=mw.dumpObject (p.date_names['local']) |
|||
While the module can fetch month names from MediaWiki, it cannot fetch the quarter, season, and named date names |
|||
from MediaWiki. Those must be translated manually. |
|||
]] |
]] |
||
local local_date_names_from_mediawiki = true; -- when false, manual translation required for date_names['local']['long'] and date_names['local']['short'] |
|||
-- when true, module fetches long and short month names from MediaWiki |
|||
local date_names = { |
local date_names = { |
||
['en'] = { -- English |
['en'] = { -- English |
||
Line 530: | Line 568: | ||
['named'] = {['Easter'] = 98, ['Christmas'] = 99}, |
['named'] = {['Easter'] = 98, ['Christmas'] = 99}, |
||
}, |
}, |
||
-- when local_date_names_from_mediawiki = false |
|||
['local'] = { -- replace these English date names with the local language equivalents |
['local'] = { -- replace these English date names with the local language equivalents |
||
['long'] = {['January'] = 1, ['February'] = 2, ['March'] = 3, ['April'] = 4, ['May'] = 5, ['June'] = 6, ['July'] = 7, ['August'] = 8, ['September'] = 9, ['October'] = 10, ['November'] = 11, ['December'] = 12}, |
['long'] = {['January'] = 1, ['February'] = 2, ['March'] = 3, ['April'] = 4, ['May'] = 5, ['June'] = 6, ['July'] = 7, ['August'] = 8, ['September'] = 9, ['October'] = 10, ['November'] = 11, ['December'] = 12}, |
||
Line 537: | Line 576: | ||
['named'] = {['Easter'] = 98, ['Christmas'] = 99}, |
['named'] = {['Easter'] = 98, ['Christmas'] = 99}, |
||
}, |
}, |
||
[' |
['inv_local_long'] = {}, -- used in date reformatting & translation; copy of date_names['local'].long where k/v are inverted: [1]='<local name>' etc. |
||
[' |
['inv_local_short'] = {}, -- used in date reformatting & translation; copy of date_names['local'].short where k/v are inverted: [1]='<local name>' etc. |
||
['inv_local_quarter'] = {}, -- used in date translation; copy of date_names['local'].quarter where k/v are inverted: [1]='<local name>' etc. |
|||
['inv_local_season'] = {}, -- used in date translation; copy of date_names['local'].season where k/v are inverted: [1]='<local name>' etc. |
|||
['inv_local_named'] = {}, -- used in date translation; copy of date_names['local'].named where k/v are inverted: [1]='<local name>' etc. |
|||
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9 |
['local_digits'] = {['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', ['8'] = '8', ['9'] = '9'}, -- used to convert local language digits to Western 0-9 |
||
['xlate_digits'] = {}, |
['xlate_digits'] = {}, |
||
} |
} |
||
if local_date_names_from_mediawiki then -- if fetching local month names from MediaWiki is enabled |
|||
for name, i in pairs (date_names['local'].long) do -- this table is ['name'] = i |
|||
local long_t = {}; |
|||
date_names['inv_local_l'][i] = name; -- invert to get [i] = 'name' for conversions from ymd |
|||
local short_t = {}; |
|||
for i=1, 12 do -- loop 12x and |
|||
local name = lang_obj:formatDate('F', '2022-' .. i .. '-1'); -- get long month name for each i |
|||
long_t[name] = i; -- save it |
|||
name = lang_obj:formatDate('M', '2022-' .. i .. '-1'); -- get short month name for each i |
|||
short_t[name] = i; -- save it |
|||
end |
|||
date_names['local']['long'] = long_t; -- write the long table – overwrites manual translation |
|||
date_names['local']['short'] = short_t; -- write the short table – overwrites manual translation |
|||
end |
end |
||
-- create inverted date-name tables for reformatting and/or translation |
|||
for _, invert_t in pairs {{'long', 'inv_local_long'}, {'short', 'inv_local_short'}, {'quarter', 'inv_local_quarter'}, {'season', 'inv_local_season'}, {'named', 'inv_local_named'}} do |
|||
for name, i in pairs (date_names['local'].short) do -- this table is ['name'] = i |
|||
date_names[' |
for name, i in pairs (date_names['local'][invert_t[1]]) do -- this table is ['name'] = i |
||
date_names[invert_t[2]][i] = name; -- invert to get [i] = 'name' for conversions from ymd |
|||
end |
|||
end |
end |
||
Line 572: | Line 625: | ||
local function get_date_format () |
local function get_date_format () |
||
local title_object = mw.title.getCurrentTitle(); |
|||
local content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625 |
|||
if title_object.namespace == 10 then -- not in template space so that unused templates appear in unused-template-reports; |
|||
return nil; -- auto-formatting does not work in Template space so don't set global_df |
|||
end |
|||
local content = title_object:getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625 |
|||
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects |
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects |
||
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format |
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format |
||
Line 601: | Line 658: | ||
local templates_not_using_page = {'audio-visual', 'episode', 'mailinglist', 'newsgroup', 'podcast', 'serial', 'sign', 'speech'} |
local templates_not_using_page = {'audio-visual', 'episode', 'mailinglist', 'newsgroup', 'podcast', 'serial', 'sign', 'speech'} |
||
--[[ |
|||
These tables control when it is appropriate for {{citation}} to render |volume= and/or |issue=. The parameter |
|||
names in the tables constrain {{citation}} so that its renderings match the renderings of the equivalent cs1 |
|||
templates. For example, {{cite web}} does not support |volume= so the equivalent {{citation |website=...}} must |
|||
not support |volume=. |
|||
]] |
|||
local citation_no_volume_t = { -- {{citation}} does not render |volume= when these parameters are used |
|||
'website', 'mailinglist', 'script-website', |
|||
} |
|||
local citation_issue_t = { -- {{citation}} may render |issue= when these parameters are used |
|||
'journal', 'magazine', 'newspaper', 'periodical', 'work', |
|||
'script-journal', 'script-magazine', 'script-newspaper', 'script-periodical', 'script-work', |
|||
} |
|||
--[[ |
--[[ |
||
Line 890: | Line 963: | ||
]] |
]] |
||
local this_wiki_code = mw.getContentLanguage():getCode(); -- get this wiki's language code |
--local this_wiki_code = mw.getContentLanguage():getCode(); -- get this wiki's language code |
||
local this_wiki_code = lang_obj:getCode(); -- get this wiki's language code |
|||
if string.match (mw.site.server, 'wikidata') then |
|||
if string.match (mw.site.server, 'wikidata') then |
|||
this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on Wikidata so use interface language setting instead |
this_wiki_code = mw.getCurrentFrame():preprocess('{{int:lang}}'); -- on Wikidata so use interface language setting instead |
||
end |
end |
||
Line 964: | Line 1,038: | ||
} |
} |
||
local lang_name_remap = { -- used for |language= |
local lang_name_remap = { -- used for |language=; names require proper capitalization; tags must be lowercase |
||
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org |
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org |
||
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap |
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap |
||
Line 976: | Line 1,050: | ||
['kölsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name |
['kölsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name |
||
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639 |
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639 |
||
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan- |
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan-tw'}, -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese |
||
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found |
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found |
||
['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan |
['valencian'] = {'Valencian', 'ca'}, -- variant of Catalan; categorizes as Catalan |
||
Line 2,080: | Line 2,154: | ||
indic_script = indic_script, |
indic_script = indic_script, |
||
emoji = emoji, |
emoji = emoji, |
||
local_lang_cat_enable = local_lang_cat_enable, |
|||
maint_cats = maint_cats, |
maint_cats = maint_cats, |
||
messages = messages, |
messages = messages, |
||
Line 2,101: | Line 2,174: | ||
mw_languages_by_name_t = mw_languages_by_name_t, |
mw_languages_by_name_t = mw_languages_by_name_t, |
||
citation_class_map_t = citation_class_map_t, |
citation_class_map_t = citation_class_map_t, |
||
citation_issue_t = citation_issue_t, |
|||
citation_no_volume_t = citation_no_volume_t, |
|||
} |
} |