m (1 revision) |
m (1 revision) |
||
Line 1: | Line 1: | ||
+ | --[[ | ||
+ | History of changes since last sync 2013-12-14 | ||
+ | |||
+ | 2013-12-21: Fix cite journal script error that occured when the citation had a |pmc= without |title= | ||
+ | 2013-12-31: Migrate cite speech; | ||
+ | 2014-01-01: Kerning for title and chapter leading and trailing single or double quote marks for quoted titles; | ||
+ | 2014-01-02: Enhance month / season year date range to check for proper left to right time sequencing of month or seasons in the range; | ||
+ | 2014-01-05: Streamline date validation; Add day range validation; | ||
+ | 2014-01-11: Migrate cite podcast; | ||
+ | 2014-02-01: Add simple PMID error checking; | ||
+ | 2014-02-02: Refine DOI error check to catch trailing punctuation errors; | ||
+ | ]] | ||
+ | |||
local z = { | local z = { | ||
error_categories = {}; | error_categories = {}; | ||
Line 6: | Line 19: | ||
-- Include translation message hooks, ID and error handling configuration settings. | -- Include translation message hooks, ID and error handling configuration settings. | ||
− | local cfg = mw.loadData( 'Module:Citation/CS1/Configuration' ); | + | local cfg = mw.loadData( 'Module:Citation/CS1/Configuration/sandbox' ); |
-- Contains a list of all recognized parameters | -- Contains a list of all recognized parameters | ||
− | local whitelist = mw.loadData( 'Module:Citation/CS1/Whitelist' ); | + | local whitelist = mw.loadData( 'Module:Citation/CS1/Whitelist/sandbox' ); |
-- Whether variable is set or not | -- Whether variable is set or not | ||
Line 54: | Line 67: | ||
function substitute( msg, args ) | function substitute( msg, args ) | ||
return args and tostring( mw.message.newRawMessage( msg, args ) ) or msg; | return args and tostring( mw.message.newRawMessage( msg, args ) ) or msg; | ||
+ | end | ||
+ | |||
+ | --[[ | ||
+ | Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value. | ||
+ | This function will positive kern either single or double quotes: | ||
+ | "'Unkerned title with leading and trailing single quote marks'" | ||
+ | " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example) | ||
+ | ]] | ||
+ | function kern_quotes (str) | ||
+ | local left='<span style="padding-left:0.2em;">%1</span>'; -- spacing to use when title contains leading single or double quote mark | ||
+ | local right='<span style="padding-right:0.2em;">%1</span>'; -- spacing to use when title contains leading single or double quote mark | ||
+ | |||
+ | str = string.gsub( str, "^([\"\'])", left, 1 ); -- replace (captured) leading single or double quote with left-side <span> | ||
+ | str = string.gsub( str, "([\"\'])$", right, 1 ); -- replace (captured) trailing single or double quote with right-side <span> | ||
+ | return str; | ||
end | end | ||
Line 120: | Line 148: | ||
Looks for a parameter's name in the whitelist. | Looks for a parameter's name in the whitelist. | ||
− | Parameters in the whitelist can have three | + | Parameters in the whitelist can have three values: |
true - active, supported parameters | true - active, supported parameters | ||
false - deprecated, supported parameters | false - deprecated, supported parameters | ||
Line 259: | Line 287: | ||
return false; -- embargo expired or |embargo= not set | return false; -- embargo expired or |embargo= not set | ||
end | end | ||
+ | |||
+ | --[[ | ||
+ | Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This code checks the PMID to see that it | ||
+ | contains only digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically as more PMIDs are issued. | ||
+ | ]] | ||
+ | function pmid(id) | ||
+ | local test_limit = 30000000; -- update this value as PMIDs approach | ||
+ | local handler = cfg.id_handlers['PMID']; | ||
+ | local err_cat = ''; -- presume that PMID is valid | ||
+ | |||
+ | if id:match("[^%d]") then -- if PMID has anything but digits | ||
+ | err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message | ||
+ | else -- PMID is only digits | ||
+ | local id_num = tonumber(id); -- convert id to a number for range testing | ||
+ | if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries | ||
+ | err_cat = ' ' .. seterror( 'bad_pmid' ); -- set an error message | ||
+ | end | ||
+ | end | ||
+ | |||
+ | return externallinkid({link = handler.link, label = handler.label, | ||
+ | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
+ | end | ||
+ | |||
--[[ | --[[ | ||
Line 299: | Line 350: | ||
inactive = "" | inactive = "" | ||
end | end | ||
− | if | + | |
+ | if nil == string.match(id, "^10%..-[^%p]$") then -- doi must begin with '10.' and must not end with punctuation | ||
cat = seterror( 'bad_doi' ); | cat = seterror( 'bad_doi' ); | ||
end | end | ||
Line 381: | Line 433: | ||
end | end | ||
− | if " | + | if "podcast" == cite_class then -- if this citation is cite podcast |
+ | return "Podcast"; -- display podcast annotation | ||
+ | |||
+ | elseif "pressrelease" == cite_class then -- if this citation is cite press release | ||
return "Press release"; -- display press release annotation | return "Press release"; -- display press release annotation | ||
− | |||
− | |||
− | |||
elseif "techreport" == cite_class then -- if this citation is cite techreport | elseif "techreport" == cite_class then -- if this citation is cite techreport | ||
return "Technical report"; -- display techreport annotation | return "Technical report"; -- display techreport annotation | ||
Line 395: | Line 447: | ||
end | end | ||
− | -- returns a number according to the month in a date: 1 for January, etc. If not a valid month, returns 0 | + | -- returns a number according to the month in a date: 1 for January, etc. Capitalization and spelling must be correct. If not a valid month, returns 0 |
function get_month_number (month) | function get_month_number (month) | ||
− | local long_months = {[' | + | local long_months = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12}; |
− | local short_months = {[' | + | local short_months = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12}; |
local temp; | local temp; | ||
− | temp=long_months[month | + | temp=long_months[month]; |
if temp then return temp; end -- if month is the long-form name | if temp then return temp; end -- if month is the long-form name | ||
− | temp=short_months[month | + | temp=short_months[month]; |
if temp then return temp; end -- if month is the short-form name | if temp then return temp; end -- if month is the short-form name | ||
− | return 0; -- misspelled or not a month name | + | return 0; -- misspelled, improper case, or not a month name |
end | end | ||
− | -- returns | + | -- returns a number according to the sequence of seasons in a year: 1 for Winter, etc. Capitalization and spelling must be correct. If not a valid season, returns 0 |
− | function | + | function get_season_number (season) |
− | + | local season_list = {['Winter']=1, ['Spring']=2, ['Summer']=3, ['Fall']=4, ['Autumn']=4} | |
− | + | local temp; | |
− | end | + | temp=season_list[season]; |
− | return | + | if temp then return temp; end -- if season is a valid name return its number |
+ | return 0; -- misspelled, improper case, or not a season name | ||
end | end | ||
Line 445: | Line 498: | ||
end | end | ||
− | --Check a pair of months or seasons to see if both are valid members of a month or season pair. | + | --[[ |
− | + | Check a pair of months or seasons to see if both are valid members of a month or season pair. | |
+ | |||
+ | Month pairs are expected to be left to right, earliest to latest in time. Similarly, seasons are also left to right, earliest to latest in time. There is | ||
+ | an oddity with seasons. Winter is assigned a value of 1, spring 2, ..., fall and autumn 4. Because winter can follow fall/autumn at the end of a calender year, a special test | ||
+ | is made to see if |date=Fall-Winter yyyy (4-1) is the date. | ||
+ | ]] | ||
function is_valid_month_season_range(range_start, range_end) | function is_valid_month_season_range(range_start, range_end) | ||
− | + | local range_start_number = get_month_number (range_start); | |
− | + | ||
− | + | if 0 == range_start_number then -- is this a month range? | |
+ | local range_start_number = get_season_number (range_start); -- not a month; is it a season? get start season number | ||
+ | local range_end_number = get_season_number (range_end); -- get end season number | ||
+ | |||
+ | if 0 ~= range_start_number then -- is start of range a season? | ||
+ | if range_start_number < range_end_number then -- range_start is a season | ||
+ | return true; -- return true when range_end is also a season and follows start season; else false | ||
+ | end | ||
+ | if 4 == range_start_number and 1 == range_end_number then -- special case when range is Fall-Winter or Autumn-Winter | ||
+ | return true; | ||
+ | end | ||
end | end | ||
− | return false; -- range_start is not a month or a season | + | return false; -- range_start is not a month or a season; or range_start is a season and range_end is not; or improper season sequence |
end | end | ||
− | + | ||
− | return | + | local range_end_number = get_month_number (range_end); -- get end month number |
+ | if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end? | ||
+ | return true; -- if yes, return true | ||
end | end | ||
− | return | + | return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month |
end | end | ||
Line 480: | Line 550: | ||
function check_date (date_string) | function check_date (date_string) | ||
local year; | local year; | ||
− | local month; | + | local month=0; -- assume that month and day are not used; if either is zero then final year/month/day validation is not necessary |
− | local day; | + | local day=0; |
+ | local day2=0; -- second day in a day range | ||
local anchor_year; | local anchor_year; | ||
local coins_date; | local coins_date; | ||
if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- Year-initial numerical year month day format | if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- Year-initial numerical year month day format | ||
− | |||
year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)"); | year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)"); | ||
− | |||
month=tonumber(month); | month=tonumber(month); | ||
− | if 12 < month or 1 > month then return false; end | + | if 12 < month or 1 > month or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar |
+ | anchor_year = year; | ||
− | elseif date_string:match("^%a+ | + | elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month-initial: month day, year |
− | + | month, day, anchor_year, year=string.match(date_string, "(%a+)%s*(%d%d?),%s*((%d%d%d%d)%a?)"); | |
− | month, day, anchor_year, year=string.match(date_string, "(%a+)%s*(%d%d | + | month = get_month_number (month); |
− | month = get_month_number (month | + | |
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
− | elseif date_string:match("^ | + | elseif date_string:match("^%a+ +[1-9]%d?–[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month-initial day range: month day–day, year; days are separated by endash |
− | + | month, day, day2, anchor_year, year=string.match(date_string, "(%a+) +(%d%d?)–(%d%d?), +((%d%d%d%d)%a?)"); | |
+ | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
+ | month = get_month_number (month); | ||
+ | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
+ | |||
+ | elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-initial: day month year | ||
day, month, anchor_year, year=string.match(date_string, "(%d%d*)%s*(%a+)%s*((%d%d%d%d)%a?)"); | day, month, anchor_year, year=string.match(date_string, "(%d%d*)%s*(%a+)%s*((%d%d%d%d)%a?)"); | ||
− | month = get_month_number (month | + | month = get_month_number (month); |
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
− | elseif | + | elseif date_string:match("^[1-9]%d?–[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-range-initial: day–day month year; days are separated by endash |
+ | day, day2, month, anchor_year, year=string.match(date_string, "(%d%d?)–(%d%d?) +(%a+) +((%d%d%d%d)%a?)"); | ||
+ | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
+ | month = get_month_number (month); | ||
+ | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
+ | |||
+ | elseif mw.ustring.match (date_string, "^%a+–%a+ +[1-9]%d%d%d%a?$") then -- month/season range year; months separated by endash | ||
local month2 | local month2 | ||
− | + | month, month2, anchor_year, year=mw.ustring.match (date_string, "(%a+)[%-/–](%a+)%s*((%d%d%d%d)%a?)"); | |
− | + | ||
− | month, month2, anchor_year, year=mw.ustring.match (date_string, "(%a+) | + | |
− | + | ||
if false == is_valid_month_season_range(month, month2) then | if false == is_valid_month_season_range(month, month2) then | ||
return false; | return false; | ||
end | end | ||
− | elseif date_string:match("^%a+ | + | elseif date_string:match("^%a+ +%d%d%d%d%a?$") then -- month/season year |
− | + | ||
month, anchor_year, year=string.match(date_string, "(%a+)%s*((%d%d%d%d)%a?)"); | month, anchor_year, year=string.match(date_string, "(%a+)%s*((%d%d%d%d)%a?)"); | ||
− | + | if 0 == get_month_number (month) then -- if month text isn't one of the twelve months, might be a season | |
− | + | if 0 == get_season_number (month) then -- not a month, is it a season? | |
− | + | ||
− | + | ||
− | if | + | |
return false; -- return false not a month or one of the five seasons | return false; -- return false not a month or one of the five seasons | ||
end | end | ||
end | end | ||
− | elseif date_string:match("^ | + | elseif date_string:match("^[1-9]%d%d%d?%a?$") then -- year; here accept either YYY or YYYY |
− | + | ||
anchor_year, year=string.match(date_string, "((%d%d%d%d?)%a?)"); | anchor_year, year=string.match(date_string, "((%d%d%d%d?)%a?)"); | ||
− | + | ||
else | else | ||
return false; -- date format not one of the MOS:DATE approved formats | return false; -- date format not one of the MOS:DATE approved formats | ||
Line 535: | Line 607: | ||
if 0~=month and 0~=day then -- check year month day dates for validity | if 0~=month and 0~=day then -- check year month day dates for validity | ||
+ | if 0~=day2 then -- If there is a second day (d–d Mmm YYYY or Mmm d–d, YYYY) test the second date | ||
+ | if false==is_valid_date(year,month,day2) then | ||
+ | return false; -- second date in date range string is not a real date return false; unset anchor_year and coins_date | ||
+ | end -- if second date range string is valid, fall through to test the first date range | ||
+ | end | ||
if false==is_valid_date(year,month,day) then | if false==is_valid_date(year,month,day) then | ||
return false; -- date string is not a real date return false; unset anchor_year and coins_date | return false; -- date string is not a real date return false; unset anchor_year and coins_date | ||
end | end | ||
end | end | ||
+ | |||
+ | coins_date= mw.ustring.gsub( date_string, "–", "-" ); -- if here, then date_string is valid; set coins_date and replace any ndash with a hyphen | ||
return true, anchor_year, coins_date; -- format is good and date string represents a real date | return true, anchor_year, coins_date; -- format is good and date string represents a real date | ||
Line 561: | Line 640: | ||
for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set(v) then -- if the parameter has a value | if is_set(v) then -- if the parameter has a value | ||
− | if v:match("^c%. | + | if v:match("^c%. [1-9]%d%d%d?%a?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year= |
if 'date'==k then | if 'date'==k then | ||
− | good_date, anchor_year, COinS_date = true, v:match("((c%. | + | good_date, anchor_year, COinS_date = true, v:match("((c%. [1-9]d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter |
elseif 'year'==k then | elseif 'year'==k then | ||
good_date = true; | good_date = true; | ||
end | end | ||
elseif 'year'==k then -- if the parameter is |year= (but not c. year) | elseif 'year'==k then -- if the parameter is |year= (but not c. year) | ||
− | if v:match("^ | + | if v:match("^[1-9]%d%d%d?%a?$") then -- year with or without CITEREF disambiguator |
good_date = true; | good_date = true; | ||
end | end | ||
Line 912: | Line 991: | ||
elseif k == 'PMC' then | elseif k == 'PMC' then | ||
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); | table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); | ||
+ | elseif k == 'PMID' then | ||
+ | table.insert( new_list, {handler.label, pmid( v ) } ); | ||
elseif k == 'ISSN' then | elseif k == 'ISSN' then | ||
table.insert( new_list, {handler.label, issn( v ) } ); | table.insert( new_list, {handler.label, issn( v ) } ); | ||
Line 1,278: | Line 1,359: | ||
if not is_embargoed(Embargo) then | if not is_embargoed(Embargo) then | ||
URL=cfg.id_handlers['PMC'].prefix .. ID_list['PMC']; -- set url to be the same as the PMC external link if not embargoed | URL=cfg.id_handlers['PMC'].prefix .. ID_list['PMC']; -- set url to be the same as the PMC external link if not embargoed | ||
+ | URLorigin = cfg.id_handlers['PMC'].parameters[1]; -- set URLorigin to parameter name for use in error message if citation is missing a |title= | ||
end | end | ||
end | end | ||
Line 1,430: | Line 1,512: | ||
not is_set(TranscriptURL) then | not is_set(TranscriptURL) then | ||
− | -- Test if cite web is | + | -- Test if cite web or cite podcast |url= is missing or empty |
− | + | if inArray(config.CitationClass, {"web","podcast"}) then | |
− | + | table.insert( z.message_tail, { seterror( 'cite_web_url', {}, true ) } ); | |
− | + | end | |
-- Test if accessdate is given without giving a URL | -- Test if accessdate is given without giving a URL | ||
Line 1,476: | Line 1,558: | ||
TransChapter = wrap( 'trans-italic-title', TransChapter ); | TransChapter = wrap( 'trans-italic-title', TransChapter ); | ||
else | else | ||
+ | Chapter = kern_quotes (Chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks | ||
Chapter = wrap( 'quoted-title', Chapter ); | Chapter = wrap( 'quoted-title', Chapter ); | ||
TransChapter = wrap( 'trans-quoted-title', TransChapter ); | TransChapter = wrap( 'trans-quoted-title', TransChapter ); | ||
Line 1,523: | Line 1,606: | ||
if is_set(Periodical) then | if is_set(Periodical) then | ||
+ | Title = kern_quotes (Title); -- if necessary, separate title's leading and trailing quote marks from Module provided quote marks | ||
Title = wrap( 'quoted-title', Title ); | Title = wrap( 'quoted-title', Title ); | ||
TransTitle = wrap( 'trans-quoted-title', TransTitle ); | TransTitle = wrap( 'trans-quoted-title', TransTitle ); | ||
− | elseif inArray(config.CitationClass, {"web","news","pressrelease","conference"}) and | + | elseif inArray(config.CitationClass, {"web","news","pressrelease","conference","podcast"}) and |
not is_set(Chapter) then | not is_set(Chapter) then | ||
+ | Title = kern_quotes (Title); -- if necessary, separate title's leading and trailing quote marks from Module provided quote marks | ||
Title = wrap( 'quoted-title', Title ); | Title = wrap( 'quoted-title', Title ); | ||
TransTitle = wrap( 'trans-quoted-title', TransTitle ); | TransTitle = wrap( 'trans-quoted-title', TransTitle ); | ||
Line 1,593: | Line 1,678: | ||
if is_set(Pages) then | if is_set(Pages) then | ||
if is_set(Periodical) and | if is_set(Periodical) and | ||
− | not inArray(config.CitationClass, {"encyclopaedia","web","book","news"}) then | + | not inArray(config.CitationClass, {"encyclopaedia","web","book","news","podcast"}) then |
Pages = ": " .. Pages; | Pages = ": " .. Pages; | ||
elseif tonumber(Pages) ~= nil then | elseif tonumber(Pages) ~= nil then | ||
Line 1,603: | Line 1,688: | ||
else | else | ||
if is_set(Periodical) and | if is_set(Periodical) and | ||
− | not inArray(config.CitationClass, {"encyclopaedia","web","book","news"}) then | + | not inArray(config.CitationClass, {"encyclopaedia","web","book","news","podcast"}) then |
Page = ": " .. Page; | Page = ": " .. Page; | ||
else | else | ||
Line 1,659: | Line 1,744: | ||
-- handle type parameter for those CS1 citations that have default values | -- handle type parameter for those CS1 citations that have default values | ||
− | if inArray(config.CitationClass, {" | + | if inArray(config.CitationClass, {"podcast","pressrelease","techreport","thesis"}) then |
TitleType = set_titletype (config.CitationClass, TitleType); | TitleType = set_titletype (config.CitationClass, TitleType); | ||
if is_set(Degree) and "Thesis" == TitleType then -- special case for cite thesis | if is_set(Degree) and "Thesis" == TitleType then -- special case for cite thesis | ||
Line 1,782: | Line 1,867: | ||
local Publisher; | local Publisher; | ||
if is_set(Periodical) and | if is_set(Periodical) and | ||
− | not inArray(config.CitationClass, {"encyclopaedia","web","pressrelease"}) then | + | not inArray(config.CitationClass, {"encyclopaedia","web","pressrelease","podcast"}) then |
if is_set(PublisherName) then | if is_set(PublisherName) then | ||
if is_set(PublicationPlace) then | if is_set(PublicationPlace) then | ||
Line 1,829: | Line 1,914: | ||
end | end | ||
end | end | ||
+ | |||
+ | --[[ | ||
+ | Handle the oddity that is cite speech. This code overrides whatever may be the value assigned to TitleNote (through |department=) and forces it to be " (Speech)" so that | ||
+ | the annotation directly follows the |title= parameter value in the citation rather than the |event= parameter value (if provided). | ||
+ | ]] | ||
+ | if "speech" == config.CitationClass then -- cite speech only | ||
+ | TitleNote = " (Speech)"; -- annotate the citation | ||
+ | if is_set (Periodical) then -- if Periodical, perhaps because of an included |website= or |journal= parameter | ||
+ | if is_set (Conference) then -- and if |event= is set | ||
+ | Conference = Conference .. sepc .. " "; -- then add appropriate punctuation to the end of the Conference variable before rendering | ||
+ | end | ||
+ | end | ||
+ | end | ||
-- Piece all bits together at last. Here, all should be non-nil. | -- Piece all bits together at last. Here, all should be non-nil. | ||
Line 1,951: | Line 2,049: | ||
end | end | ||
names[ #names + 1 ] = first_set(Year, anchor_year); -- Year first for legacy citations | names[ #names + 1 ] = first_set(Year, anchor_year); -- Year first for legacy citations | ||
− | |||
id = anchorid(names) | id = anchorid(names) | ||
end | end |