pandoc-lua-filters/text-crossrefs.lua

542 lines
18 KiB
Lua
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- TODO : permettre la citation de références multiples avec un seul préfixe
-- [ref-one, ref-two>ref-four, ref-three]{.ref} → pp. 1, 3-6 et 12
-- Compléter README et test une fois que ce sera fait.
-- TODO : créer des commandes latex et context pour les énumérations
-- de notes et de pages sur le modèle de \tcrfpagerangeref
-- Begin of initialization
local IDENTIFIER_PATTERN = '[%w_.:-]+'
local RAW_ATTRIBUTE
local IS_LABEL_SET_BY_PANDOC
local LABEL_TEMPLATE
local NOTELABEL_TEMPLATE
local function define_raw_attribute()
if FORMAT == 'native' then
RAW_ATTRIBUTE = pandoc.system.environment().TESTED_FORMAT
elseif FORMAT == 'docx' then
RAW_ATTRIBUTE = 'openxml'
elseif FORMAT == 'odt' or FORMAT == 'opendocument' then
RAW_ATTRIBUTE = 'opendocument'
elseif FORMAT == 'context' or FORMAT == 'latex' then
RAW_ATTRIBUTE = FORMAT
else
error(FORMAT ..
' output not supported by text-crossrefs.lua.')
end
end
local function define_label_template()
if RAW_ATTRIBUTE == 'opendocument' or RAW_ATTRIBUTE == 'openxml' then
IS_LABEL_SET_BY_PANDOC = true
elseif RAW_ATTRIBUTE == 'context' then
if PANDOC_VERSION < pandoc.types.Version('2.14') then
LABEL_TEMPLATE = '\\pagereference[{{label}}]'
else
IS_LABEL_SET_BY_PANDOC = true
end
elseif RAW_ATTRIBUTE == 'latex' then
LABEL_TEMPLATE = '\\label{{{label}}}'
end
end
define_raw_attribute()
define_label_template()
-- End of initialization
-- Configuration
local config = {
page_prefix = 'p. ',
pages_prefix = 'p. ',
note_prefix = 'n. ',
notes_prefix = 'n. ',
pagenote_order = 'pagefirst',
pagenote_separator = ', ',
pagenote_at_end = '',
multiple_delimiter = ', ',
multiple_before_last = ' and ',
references_range_separator = '>',
range_separator = '-',
references_enum_separator = ';',
only_explicit_labels = 'false',
default_info_type = 'page',
filelabel_ref_separator = '::'
}
local function format_config_to_openxml()
to_format = { 'page_prefix',
'pages_prefix',
'note_prefix',
'pagenote_separator',
'pagenote_at_end',
'range_separator' }
for i = 1, #to_format do
config[to_format[i]] = '<w:r><w:t xml:space="preserve">' ..
config[to_format[i]] .. '</w:t></w:r>'
end
end
local function set_configuration_item_from_metadata(item, metamap)
metakey = 'tcrf-' .. string.gsub(item, '_', '-')
if metamap[metakey] then
-- The metadata values are Str in MetaInlines.
config[item] = metamap[metakey][1].c
end
end
local function configure(metadata)
for item, _ in pairs(config) do
set_configuration_item_from_metadata(item, metadata)
end
if RAW_ATTRIBUTE == 'openxml' then
format_config_to_openxml()
end
end
-- End of configuration
-- Extensions for the output document's format
local function define_tex_macros(document)
if RAW_ATTRIBUTE == 'context' then
local footnote_redefinition = '\\let\\oldfootnote\\footnote\n' ..
'\\define[2]\\footnote{\\oldfootnote[#2]{#1}%\n' ..
'\\expandafter\\edef\\csname #2pagenumber\\endcsname{\\userpage}}\n'
local predefined_strings =
'\\define\\tcrfpage{' .. config.page_prefix .. '}\n' ..
'\\define\\tcrfpages{' .. config.pages_prefix .. '}\n' ..
'\\define\\tcrfrangesep{' .. config.range_separator .. '}\n'
local range_ref = '\\ifdefined\\tcrfpagerangeref\\else\n' ..
'\\define[2]\\tcrfpagerangeref{' ..
'\\if' ..
'\\csname #1pagenumber\\endcsname' ..
'\\csname #2pagenumber\\endcsname\n' ..
'\\tcrfpage\\at[#1]\n' ..
'\\else\n' ..
'\\tcrfpages\\at[#1]\\tcrfrangesep\\at[#2]\\fi}\n' ..
'\\fi\n'
local macros_block = pandoc.RawBlock('context',
footnote_redefinition ..
predefined_strings ..
range_ref)
table.insert(document.blocks, 1, macros_block)
elseif RAW_ATTRIBUTE == 'latex' then
local predefined_strings =
'\\newcommand*{\\tcrfpage}{' .. config.page_prefix .. '}\n' ..
'\\newcommand*{\\tcrfpages}{' .. config.pages_prefix .. '}\n' ..
'\\newcommand*{\\tcrfrangesep}{' .. config.range_separator .. '}\n'
local label_redefinition = '\\let\\oldlabel\\label\n' ..
'\\renewcommand*{\\label}[1]{\\oldlabel{#1}%\n' ..
'\\expandafter\\xdef\\csname #1pagenumber\\endcsname{\\thepage}}\n'
local range_ref = '\\ifdefined\\tcrfpagerangeref\\else\n' ..
'\\newcommand*{\\tcrfpagerangeref}[2]{%\n' ..
'\\if' ..
'\\csname #1pagenumber\\endcsname' ..
'\\csname #2pagenumber\\endcsname\n' ..
'\\tcrfpage\\pageref{#1}\n' ..
'\\else\n' ..
'\\tcrfpages\\pageref{#1}\\tcrfrangesep\\pageref{#2}\\fi}\n' ..
'\\fi\n'
local macros_block = pandoc.RawBlock('latex',
predefined_strings ..
label_redefinition ..
range_ref)
table.insert(document.blocks, 1, macros_block)
end
return document
end
-- End of the extensions for the output document's format
-- Identifiers
local spans_to_note_labels = {}
local current_odt_note_index = 0
local is_first_span_in_note = true
local current_note_label
local function map_span_to_label(span)
if RAW_ATTRIBUTE == 'opendocument' then
spans_to_note_labels[span.identifier] = 'ftn' .. current_odt_note_index
elseif RAW_ATTRIBUTE == 'openxml' or RAW_ATTRIBUTE == 'context' then
if is_first_span_in_note then
current_note_label = span.identifier
is_first_span_in_note = false
end
spans_to_note_labels[span.identifier] = current_note_label
end
end
local function map_spans_to_labels(container)
for i = 1, #container.content do
-- The tests must be separate in order to support spans inside spans.
if container.content[i].t == 'Span'
and container.content[i].identifier ~= ''
then
map_span_to_label(container.content[i])
end
if container.content[i].content then
map_spans_to_labels(container.content[i])
end
end
end
local function map_spans_to_notelabels(note)
if RAW_ATTRIBUTE == 'context'
or RAW_ATTRIBUTE == 'opendocument'
or RAW_ATTRIBUTE == 'openxml' then
is_first_span_in_note = true
map_spans_to_labels(note)
current_odt_note_index = current_odt_note_index + 1
end
end
local function make_label(label)
if IS_LABEL_SET_BY_PANDOC then
return pandoc.Str('')
else
label_rawcode = string.gsub(LABEL_TEMPLATE, '{{label}}', label)
return pandoc.RawInline(RAW_ATTRIBUTE, label_rawcode)
end
end
local function labelize_span(span)
if span.identifier ~= '' then
local label = span.identifier
local label_begin = make_label(label, 'begin')
return { label_begin, span }
end
end
local function has_class(elem, class)
if elem.classes then
for i = 1, #elem.classes do
if elem.classes[i] == class then
return true
end
end
return false
else
error('function has_class used on an element of type ' ..
elem.t .. ' that cannot have classes.')
end
end
local current_note_labels = {}
local collect_note_labels = {
Span = function(span)
if span.identifier ~= '' and
(config.only_explicit_labels == 'false' or has_class(span, 'label'))
then
table.insert(current_note_labels, span.identifier)
end
end
}
local function make_notelabel(pos)
local raw_code = ''
if pos == 'begin' then
if RAW_ATTRIBUTE == 'openxml' then
raw_code = string.gsub(
'<w:bookmarkStart w:id="{{label}}_Note" w:name="{{label}}_Note"/>',
'{{label}}', current_note_labels[#current_note_labels])
end
elseif pos == 'end' then
if RAW_ATTRIBUTE == 'context' then
local label = current_note_labels[1] .. '_note'
raw_code = '{' .. label .. '}'
elseif RAW_ATTRIBUTE == 'openxml' then
raw_code = string.gsub('<w:bookmarkEnd w:id="{{label}}_Note"/>',
'{{label}}', current_note_labels[1])
end
end
return pandoc.RawInline(RAW_ATTRIBUTE, raw_code)
end
local function labelize_note(note)
local label_begin = make_notelabel('begin')
local label_end = make_notelabel('end')
return { label_begin, note, label_end }
end
function set_notelabels(note)
current_note_labels = {}
pandoc.walk_inline(note, collect_note_labels)
if #current_note_labels > 0 then
return labelize_note(note)
end
end
-- End of identifiers-related code
-- References
local function is_reference_valid(ref)
if string.find(ref, '^[' .. IDENTIFIER_PATTERN .. ']') then
error('text-crossrefs.lua: Invalid character in reference: ' .. ref ..
'\nIdentifier and reference names can only contain' ..
' alphanumerical characters, periods, underscores and hyphens.\n')
else
return true
end
end
local function is_ref_external(rawref)
if string.find(rawref, config.filelabel_ref_separator, 1, true) then
return true
else
return false
end
end
local function is_ref_range(rawref)
if string.find(rawref, config.references_range_separator, 1, true) then
return true
else
return false
end
end
function get_first_reference_end_index(range_separator_index)
if range_separator_index then
return range_separator_index - 1
end
end
local function get_first_reference(rawref)
local _, file_ref_separator_index =
string.find(rawref, config.filelabel_ref_separator, 1, true)
local range_separator_index, _ =
string.find(rawref, config.references_range_separator, 1, true)
local ref = string.sub(rawref,
(file_ref_separator_index or 0) + 1,
get_first_reference_end_index(range_separator_index))
if is_reference_valid(ref) then return ref end
end
local function get_second_reference(rawref)
local second_ref_begin_index
local _, file_ref_separator_index =
string.find(rawref, config.filelabel_ref_separator, 1, true)
if file_ref_separator_index then
_, file_ref_separator_index =
string.find(rawref,
config.filelabel_ref_separator,
config.file_ref_separator_index + 1,
true)
second_ref_begin_index = file_ref_separator_index + 1
else
local _, range_separator_index, _ =
string.find(rawref, config.references_range_separator, 1, true)
second_ref_begin_index = range_separator_index + 1
end
local ref = string.sub(rawref, second_ref_begin_index)
if is_reference_valid(ref) then return ref end
end
local function is_ref_enumeration(raw_reference)
if string.match(raw_reference, '%' .. config.references_enum_separator) then
return true
else
return false
end
end
local function analyze_reference_span(reference_span)
if #reference_span.content == 1 and reference_span.content[1].t == 'Str' then
raw_reference = reference_span.content[1].c
analyzed_reference = {}
analyzed_reference.is_external = is_ref_external(raw_reference)
analyzed_reference.is_range = is_ref_range(raw_reference)
analyzed_reference.is_enumeration = is_ref_enumeration(raw_reference)
if analyzed_reference.is_external then
analyzed_reference.filelabel = get_extfilelabel(raw_reference)
end
analyzed_reference.first = get_first_reference(raw_reference)
if analyzed_reference.is_range then
analyzed_reference.second = get_second_reference(raw_reference)
end
return analyzed_reference
else
error('The content of a span with class ref must be a plain string.')
end
end
local function insert_page_target_in_xml(target)
if RAW_ATTRIBUTE == 'opendocument' then
return '<text:bookmark-ref ' ..
' text:reference-format="page" text:ref-name="' ..
target .. '">000</text:bookmark-ref>'
elseif RAW_ATTRIBUTE == 'openxml' then
return '<w:r><w:fldChar w:fldCharType="begin" w:dirty="true"/></w:r>' ..
'<w:r><w:instrText xml:space="preserve"> PAGEREF ' ..
target .. ' \\h </w:instrText></w:r>' ..
'<w:r><w:fldChar w:fldCharType="separate"/></w:r>' ..
'<w:r><w:t>000</w:t></w:r>' ..
'<w:r><w:fldChar w:fldCharType="end"/></w:r>'
end
end
local function format_prefix(info_type, is_enumeration)
if not is_enumeration then
return config[info_type .. '_prefix']
elseif RAW_ATTRIBUTE == 'context' or RAW_ATTRIBUTE == 'latex' then
return ''
elseif RAW_ATTRIBUTE == 'opendocument' or RAW_ATTRIBUTE == 'openxml' then
return config[info_type .. 's_prefix']
end
end
local function format_page_reference(target)
if RAW_ATTRIBUTE == 'context' then
return '\\at[' .. target .. ']'
elseif RAW_ATTRIBUTE == 'latex' then
return '\\pageref{' .. target .. '}'
elseif RAW_ATTRIBUTE == 'opendocument' then
return insert_page_target_in_xml(target)
elseif RAW_ATTRIBUTE == 'openxml' then
return insert_page_target_in_xml(target)
end
end
local function format_pagerange_reference(first, second, is_prefixed)
if RAW_ATTRIBUTE == 'context' or RAW_ATTRIBUTE == 'latex' then
local bracketed_arg = ''
if is_prefixed then bracketed_arg = 'prefixed' end
-- TODO : implémenter l'argument entre crochets
return '\\tcrfpagerangeref[' .. bracketed_arg .. ']{' .. first .. '}{' .. second .. '}'
elseif RAW_ATTRIBUTE == 'opendocument' or RAW_ATTRIBUTE == 'openxml' then
local to_return = ''
if is_prefixed then to_return = config.pages_prefix end
return to_return .. insert_page_target_in_xml(first) ..
config.range_separator .. insert_page_target_in_xml(second)
end
end
local function format_note_reference(target)
if RAW_ATTRIBUTE == 'context' then
return '\\in[' .. spans_to_note_labels[target] .. '_note' .. ']'
elseif RAW_ATTRIBUTE == 'latex' then
return '\\ref{' .. target .. '}'
elseif RAW_ATTRIBUTE == 'opendocument' then
return '<text:note-ref text:note-class="footnote"' ..
' text:reference-format="text" text:ref-name="' ..
spans_to_note_labels[target] .. '">000</text:note-ref>'
elseif RAW_ATTRIBUTE == 'openxml' then
return
'<w:r><w:fldChar w:fldCharType="begin" w:dirty="true"/></w:r>' ..
'<w:r><w:instrText xml:space="preserve"> NOTEREF ' ..
target .. '_Note' .. ' \\h </w:instrText></w:r>' ..
'<w:r><w:fldChar w:fldCharType="separate"/></w:r>' ..
'<w:r><w:t>000</w:t></w:r>' ..
'<w:r><w:fldChar w:fldCharType="end"/></w:r>'
end
end
local function format_pagenote_reference(target)
if config.pagenote_order == 'pagefirst' then
return format_prefix('page', false) .. format_page_reference(target) ..
config.pagenote_separator .. format_prefix('note', false) ..
format_note_reference(target) .. config.pagenote_at_end
elseif config.pagenote_order == 'notefirst' then
return format_prefix('note', false) .. format_note_reference(target) ..
config.pagenote_separator .. format_prefix('page', false) ..
format_page_reference(target) .. config.pagenote_at_end
else
error('tcrf-pagenote-order must be set either to pagefirst or notefirst.')
end
end
local function format_reference(target, info_type)
if info_type == 'page' and target.is_range then
return format_pagerange_reference(target.first, target.second,
not target.is_enumeration)
elseif info_type == 'page' then
return format_page_reference(target.first)
elseif info_type == 'note' then
return format_note_reference(target.first)
elseif info_type == 'pagenote' then
return format_pagenote_reference(target.first)
else
error('Invalid value for attribute type in span with class ref: ' ..
info_type)
end
end
local function make_reference_head(info_type, is_enumeration, is_range)
if (info_type == 'page' or info_type == 'note') and not is_range then
return format_prefix(info_type, is_enumeration)
else
return ''
end
end
local function format_all_items_in_enumeration(enum, info_type)
local reference_body = ''
local enumerated_ref_spans = {}
for ref in string.gmatch(enum,
'[^%' .. config.references_enum_separator .. ']+') do
ref_span = pandoc.Span(ref, {['type'] = info_type})
table.insert(enumerated_ref_spans, ref_span)
end
for i = 1, #enumerated_ref_spans do
target_in_enum = analyze_reference_span(enumerated_ref_spans[i])
reference_body = reference_body .. format_reference(target_in_enum, info_type)
if i < #enumerated_ref_spans then
if i < #enumerated_ref_spans-1 then
reference_body = reference_body .. config.multiple_delimiter
else
reference_body = reference_body .. config.multiple_before_last
end
end
end
return reference_body
end
local function format_enumeration(target, info_type)
if RAW_ATTRIBUTE == 'context' or RAW_ATTRIBUTE == 'latex' then
-- TODO
return format_enumeration_smart(target.first, info_type)
elseif RAW_ATTRIBUTE == 'opendocument' or RAW_ATTRIBUTE == 'openxml' then
return format_all_items_in_enumeration(target.first, info_type)
end
end
local function make_reference_body(target, info_type)
local reference_body
if target.is_enumeration then
reference_body = format_enumeration(target, info_type)
else
reference_body = format_reference(target, info_type)
end
return reference_body
end
local function make_reference(span)
if has_class(span, 'ref') then
local target = analyze_reference_span(span)
if not target.is_external then
local info_type = span.attributes.type or config.default_info_type
local head = make_reference_head(info_type, target.is_enumeration, target.is_range)
local body = make_reference_body(target, info_type)
span.content[1] = pandoc.RawInline(RAW_ATTRIBUTE, head .. body)
return span
end
end
end
-- End of references-related code
return {
{ Meta = configure },
{ Pandoc = define_tex_macros },
{ Note = set_notelabels },
{ Note = map_spans_to_notelabels },
{ Span = labelize_span },
{ Span = make_reference }
}