Files
pandoc-lua-filters/text-crossrefs/text-crossrefs.lua
2025-12-21 20:40:26 +01:00

699 lines
23 KiB
Lua
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- text-crossrefs.lua
-- A Pandoc Lua filter that extends Pandoc's cross-referencing abilities
-- with references to any portion of text
-- by its page number, its note number (when applicable)
-- or an arbitrary reference type (with ConTeXt or LaTeX output).
-- Copyright 20242025 Bastien Dumont (bastien.dumont [at] posteo.net)
-- This file is under the MIT License: see LICENSE for more details
local stringify = pandoc.utils.stringify
local TEXT_CROSSREF_CLASS = 'tcrf'
local REF_TYPE_ATTR = 'reftype'
local PREFIXED_ATTR = 'prefixref'
local PLACE_LABEL_ATTR = 'refanchor'
local IS_CONFIG_ARRAY = { ['additional_types'] = true }
local RAW_ATTRIBUTE
local function warning(message)
io.stderr:write('WARNING [text-crossrefs]: ' .. message .. '\n')
end
-- ConTeXt-specific tweak in order to add the label to the footnote
--[[
Placing the label in square brackets immediatly after \footnote
in the regular way would require unpacking the content
of the Note and wrapping them with the RawInlines
'\footnote[note:' .. label .. ']{' and '}'.
However, Notes have the strange property of being Inlines
that contain Blocks, so this would result in Blocks being
brought into the content of the object that contains the Note,
which would be invalid.
That's why we place the label at the end of the \footnote
and redefine the macro so that it takes it into account.
]]--
local function support_footnote_label_ConTeXt(metadata)
if RAW_ATTRIBUTE == 'context' then
local label_macro_def = '\n\\def\\withfirstopt[#1]#2{#2[#1]}\n'
if not metadata['header-includes'] then
metadata['header-includes'] = pandoc.MetaBlocks(pandoc.RawBlock('context', ''))
end
metadata['header-includes']:insert(pandoc.RawBlock('context', label_macro_def))
end
return metadata
end
-- Configuration
local function define_raw_attribute()
if FORMAT == 'native' then
RAW_ATTRIBUTE = pandoc.system.environment().TESTED_FORMAT
elseif FORMAT == 'docx' then
RAW_ATTRIBUTE = 'openxml'
elseif FORMAT == 'odt' or FORMAT == 'opendocument' then
RAW_ATTRIBUTE = 'opendocument'
elseif FORMAT == 'context' or FORMAT == 'latex' or FORMAT == 'typst' then
RAW_ATTRIBUTE = FORMAT
else
error(FORMAT ..
' output not supported by text-crossrefs.lua.')
end
end
local function define_label_template()
local version = pandoc.types.Version
if RAW_ATTRIBUTE == 'context' and PANDOC_VERSION < version('2.14') then
LABEL_TEMPLATE = '\\pagereference[{{label}}]'
elseif RAW_ATTRIBUTE == 'latex' and PANDOC_VERSION < version('3.1.7') then
LABEL_TEMPLATE = '\\label{{{label}}}'
else
IS_LABEL_SET_BY_PANDOC = true
end
end
local config = {
page_prefix = 'p. ',
pages_prefix = 'pp. ',
note_prefix = 'n. ',
notes_prefix = 'nn. ',
pagenote_first_type = 'page',
pagenote_separator = ', ',
pagenote_at_end = '',
pagenote_factorize_first_prefix_in_enum = false,
multiple_delimiter = ', ',
multiple_before_last = ' and ',
references_range_separator = '>',
range_separator = '',
references_enum_separator = ',',
only_explicit_labels = false,
default_reftype = 'page',
default_prefixref = true,
filelabel_ref_separator = '::',
range_delim_crossrefenum = ' to ',
typst_crossrefenum = false,
additional_types = {}
}
local accepted_types = {
page = true,
note = true,
pagenote = true
}
local TYPST_VARIANT = {
get = function(self)
error('Attempt to get the Typst variant before it has been set.')
end,
set = function(self)
local variant = 'ref'
if RAW_ATTRIBUTE ~= 'typst' then
variant = 'none'
elseif config.typst_crossrefenum then
variant = 'crossrefenum'
end
self.get = function(self) return variant end
end
}
local function format_config_to_openxml()
local to_format = { 'page_prefix',
'pages_prefix',
'note_prefix',
'notes_prefix',
'pagenote_separator',
'pagenote_at_end',
'range_separator',
'multiple_delimiter',
'multiple_before_last' }
for i = 1, #to_format do
config[to_format[i]] = '<w:r><w:t xml:space="preserve">' ..
config[to_format[i]] .. '</w:t></w:r>'
end
end
local function set_configuration_item_from_metadata(item, metamap)
local metakey = 'tcrf-' .. string.gsub(item, '_', '-')
if metamap[metakey] then
if IS_CONFIG_ARRAY[item] then
-- The metadata values is a list of MetaInlines,
-- each of them contains a single Str.
for _, value_metalist in ipairs(metamap[metakey]) do
table.insert(config[item], value_metalist[1].text)
end
else
local value = metamap[metakey]
if type(value) == 'boolean' then
config[item] = value
else
-- The metadata value is a single Str in a MetaInlines.
config[item] = value[1].text
end
end
end
end
local function configure(metadata)
define_raw_attribute()
define_label_template()
for item, _ in pairs(config) do
set_configuration_item_from_metadata(item, metadata)
end
TYPST_VARIANT:set()
if RAW_ATTRIBUTE == 'openxml' then
format_config_to_openxml()
end
if RAW_ATTRIBUTE == 'context' or RAW_ATTRIBUTE == 'latex'
or TYPST_VARIANT:get() == 'crossrefenum'
then
for _, additional_type in ipairs(config.additional_types) do
accepted_types[additional_type] = true
end
end
end
-- End of configuration
-- Preprocessing of identifiers on notes
-- Necessary for those output format where a note can be referred to
-- only via an identifier directly attached to it, not to its content
local spans_to_note_labels = {}
local current_odt_note_index = 0
local is_first_span_in_note = true
local current_note_label
local text_to_note_labels = {}
local function map_span_to_label(span)
if RAW_ATTRIBUTE == 'opendocument' then
spans_to_note_labels[span.identifier] = 'ftn' .. current_odt_note_index
elseif RAW_ATTRIBUTE == 'openxml' or RAW_ATTRIBUTE == 'context'
or RAW_ATTRIBUTE == 'typst'
then
if is_first_span_in_note then
current_note_label = span.identifier
is_first_span_in_note = false
end
spans_to_note_labels[span.identifier] = current_note_label
end
end
local function map_spans_to_labels(container)
for i = 1, #container.content do
-- The tests must be separate in order to support spans inside spans.
if container.content[i].t == 'Span'
and container.content[i].identifier
then
map_span_to_label(container.content[i])
end
if container.content[i].content then
map_spans_to_labels(container.content[i])
end
end
end
local function map_spans_to_notelabels(note)
if RAW_ATTRIBUTE == 'context'
or RAW_ATTRIBUTE == 'opendocument'
or RAW_ATTRIBUTE == 'openxml'
or RAW_ATTRIBUTE == 'typst'
then
is_first_span_in_note = true
map_spans_to_labels(note)
current_odt_note_index = current_odt_note_index + 1
end
end
local function control_label_placement(span)
local label_placement = span.attributes[PLACE_LABEL_ATTR]
if label_placement then
local id = span.identifier
if label_placement == 'beg' then
span.content:insert(1, pandoc.Span({}, { id = id }))
span.identifier = ''
elseif label_placement == 'end' then
span.content:insert(pandoc.Span({}, { id = id }))
span.identifier = ''
elseif label_placement == 'both' then
span.content:insert(1, pandoc.Span({}, { id = id .. '-beg' })) -- for DOCX/ODT
span.content:insert(pandoc.Span({}, { id = id .. '-end' }))
else
warning('Invalid value ' .. label_placement .. ' on attribute ' .. PLACE_LABEL_ATTR .. ': ' ..
'shoud be “beg”, “end” or “both”. Falling back to Pandocs default.')
end
end
return span
end
local function make_label(label)
-- pandoc.Null() cannot be used here because it is a Block element.
local label_pandoc_object = pandoc.Str('')
if not IS_LABEL_SET_BY_PANDOC then
local label_rawcode = string.gsub(LABEL_TEMPLATE, '{{label}}', label)
label_pandoc_object = pandoc.RawInline(RAW_ATTRIBUTE, label_rawcode)
end
return label_pandoc_object
end
local function labelize_span(span)
if span.identifier ~= '' then
local label = span.identifier
local label_begin = make_label(label)
return { label_begin, span }
end
end
local function collect_note_labels(labels_in_current_note)
return {
Span = function(span)
if span.identifier ~= ''
and ((not config.only_explicit_labels) or span.classes:includes('label'))
then
table.insert(labels_in_current_note, span.identifier)
end
end
}
end
local function make_notelabel(pos, labels_in_current_note)
-- About the strategy followed with ConTeXt,
-- see above support_footnote_label_ConTeXt.
local raw_code = ''
if pos == 'begin' then
if RAW_ATTRIBUTE == 'openxml' then
raw_code = string.gsub(
'<w:bookmarkStart w:id="{{label}}_Note" w:name="{{label}}_Note"/>',
'{{label}}', labels_in_current_note[1])
elseif RAW_ATTRIBUTE == 'context' then
raw_code = '\\withfirstopt[note:' .. labels_in_current_note[1] .. ']'
end
elseif pos == 'end' then
if RAW_ATTRIBUTE == 'openxml' then
raw_code = string.gsub('<w:bookmarkEnd w:id="{{label}}_Note"/>',
'{{label}}', labels_in_current_note[1])
elseif RAW_ATTRIBUTE == 'typst' then
raw_code = '<note:' .. labels_in_current_note[1] .. '>'
end
end
return pandoc.RawInline(RAW_ATTRIBUTE, raw_code)
end
local function labelize_note(note, labels_in_current_note)
local label_begin = make_notelabel('begin', labels_in_current_note)
local label_end = make_notelabel('end', labels_in_current_note)
return { label_begin, note, label_end }
end
local function map_text_to_note_labels(labels_in_current_note)
local note_label = 'note:' .. labels_in_current_note[1]
for _, label in ipairs(labels_in_current_note) do
text_to_note_labels[label] = note_label
end
end
local function set_notelabels(note)
local labels_in_current_note = {}
note:walk(collect_note_labels(labels_in_current_note))
if #labels_in_current_note > 0 then
map_text_to_note_labels(labels_in_current_note)
return labelize_note(note, labels_in_current_note)
end
end
-- End of preprocessing of identifiers on notes
-- Gathering of data from the references span
local function trim_spaces(s)
return s:gsub('^ *', ''):gsub(' *$', '')
end
local function new_ref(anchor, end_of_range)
-- A ref is a string-indexed table containing an "anchor" field
-- and an optionnal "end_of_range" field.
-- When "end_of_range" is non-nil, the ref is a range.
local ref = {}
ref.anchor = anchor
ref.end_of_range = end_of_range
return ref
end
local function is_ref_external(raw_references)
if string.find(raw_references, config.filelabel_ref_separator, 1, true) then
return true
else
return false
end
end
local function parse_possible_range(reference)
-- If reference is a string representing a range,
-- returns the strings representing the boundaries of the range.
-- Else, returns the string.
local range_first, range_second = nil, nil
local delim_beg, delim_end = string.find(reference,
config.references_range_separator,
1, true)
if delim_beg then
range_first = trim_spaces(string.sub(reference, 1, delim_beg - 1))
range_second = trim_spaces(string.sub(reference, delim_end + 1))
end
return (range_first or reference), range_second
end
local function parse_next_reference(raw_references, beg_of_search)
-- Returns the ref corresponding to the next reference string
-- and the index which the parsing should be resumed at.
-- Returns false if there is no reference string left.
local current_ref = false
local next_ref_beg = nil
if beg_of_search < #raw_references then
-- The delimiter can be composed of more than one character.
local delim_beg, delim_end = string.find(raw_references,
config.references_enum_separator,
beg_of_search, true)
local reference = ''
if delim_beg then
reference = string.sub(raw_references, beg_of_search, delim_beg - 1)
next_ref_beg = delim_end + 1
else
reference = string.sub(raw_references, beg_of_search)
next_ref_beg = #raw_references
end
reference = trim_spaces(reference)
current_ref = new_ref(parse_possible_range(reference))
end
return current_ref, next_ref_beg
end
local function parse_references_enum(raw_references)
-- raw_refs is a string consisting of a list of single references or ranges.
-- Returns an array of refs produced by "new_ref" above.
local parsed_refs = {}
local current_ref, next_ref_beg = parse_next_reference(raw_references, 1)
while current_ref do
table.insert(parsed_refs, current_ref)
current_ref, next_ref_beg =
parse_next_reference(raw_references, next_ref_beg)
end
return parsed_refs
end
local function error_on_attr(attr_key, attr_value, span_content)
warning('Invalid value "' .. attr_value .. '" for attribute "' .. attr_key ..
'" in the span with class "' .. TEXT_CROSSREF_CLASS ..
'" whose content is "' .. stringify(span_content) .. '". ' ..
'Falling back to default.')
end
local function get_ref_type(span)
local ref_type = span.attributes[REF_TYPE_ATTR] or config.default_reftype
if not accepted_types[ref_type] then
error_on_attr(REF_TYPE_ATTR, ref_type, span.content)
ref_type = config.default_reftype
end
return ref_type
end
local function if_prefixed(span)
-- yes, true, no and false in the metadata header are parsed as booleans,
-- but they are parsed as strings in the value of a span attribute.
local is_prefixed = true
local prefixed_attr_value = span.attributes[PREFIXED_ATTR]
if prefixed_attr_value then
if prefixed_attr_value ~= 'yes' and prefixed_attr_value ~= 'no' then
error_on_attr(PREFIXED_ATTR, prefixed_attr_value, span.content)
prefixed_attr_value = config.default_prefixref
end
if prefixed_attr_value == 'no' then is_prefixed = false end
else
is_prefixed = config.default_prefixref
end
return is_prefixed
end
-- End of gathering of data from the references span
-- Formatting references as raw inlines.
local function make_crossrefenum_first_arg(ref_type)
local ref_type_is_explicit = ref_type ~= config.default_reftype
local arg_template = '[%s]'
if RAW_ATTRIBUTE == 'typst' then arg_template = 'form: %s, ' end
local crossrefenum_first_arg = ''
if ref_type_is_explicit then
crossrefenum_first_arg = string.format(arg_template, ref_type)
end
return crossrefenum_first_arg
end
local function make_crossrefenum_second_arg(is_prefixed)
local is_prefixed_is_explicit = is_prefixed ~= config.default_prefixref
local crossrefenum_second_arg = ''
local is_prefixed_string = ''
local arg_template = '[%s]'
if RAW_ATTRIBUTE == 'typst' then arg_template = 'prefixed: %s, ' end
if is_prefixed_is_explicit then
if is_prefixed then
if RAW_ATTRIBUTE == 'typst' then
is_prefixed_string = 'true'
else
is_prefixed_string = 'withprefix'
end
else
if RAW_ATTRIBUTE == 'typst' then
is_prefixed_string = 'false'
else
is_prefixed_string = 'noprefix'
end
end
crossrefenum_second_arg = string.format(arg_template, is_prefixed_string)
end
return crossrefenum_second_arg
end
local function make_crossrefenum_references_list(refs, ref_type)
local crossrefenum_references_list = ''
for i = 1, #refs do
local ref = refs[i]
local anchor = ref.anchor
if (FORMAT == 'context' or FORMAT == 'typst')
and (ref_type == 'note' or ref_type == 'pagenote')
then
local note_label = text_to_note_labels[anchor]
if note_label then
anchor = note_label
else
warning('Wrong reference to non-existent label "' .. anchor .. '".')
end
end
local formatted = anchor
if FORMAT == 'typst' then formatted = '<' .. formatted .. '>' end
if ref.end_of_range then
if FORMAT == 'typst' then
formatted = '(beg: ' .. formatted .. ', end: <' .. ref.end_of_range .. '>)'
else
formatted = formatted .. config.range_delim_crossrefenum .. ref.end_of_range
end
end
if i < #refs then formatted = formatted .. ', ' end
crossrefenum_references_list = crossrefenum_references_list .. formatted
end
if FORMAT == 'typst' and #refs > 1 then
crossrefenum_references_list = '(' .. crossrefenum_references_list .. ')'
end
return crossrefenum_references_list
end
local function make_crossrefenum(refs, ref_type, is_prefixed)
local cmd_template = '\\crossrefenum%s%s{%s}'
if RAW_ATTRIBUTE == 'typst' then cmd_template = '#crossrefenum(%s%s%s)' end
return string.format(cmd_template,
make_crossrefenum_first_arg(ref_type),
make_crossrefenum_second_arg(is_prefixed),
make_crossrefenum_references_list(refs, ref_type))
end
local function make_prefix_per_ref(ref_type, is_plural)
local prefix = ''
if is_plural then
prefix = config[ref_type .. 's_prefix']
else
prefix = config[ref_type .. '_prefix']
end
return prefix
end
local function make_page_reference_per_ref(target, is_prefixed)
local page_ref = ''
if is_prefixed then
page_ref = make_prefix_per_ref('page', false)
end
if RAW_ATTRIBUTE == 'opendocument' then
page_ref = page_ref ..
'<text:bookmark-ref ' ..
' text:reference-format="page" text:ref-name="' ..
target .. '">000</text:bookmark-ref>'
elseif RAW_ATTRIBUTE == 'openxml' then
page_ref = page_ref ..
'<w:r><w:fldChar w:fldCharType="begin" w:dirty="true"/></w:r>' ..
'<w:r><w:instrText xml:space="preserve"> PAGEREF ' ..
target .. ' \\h </w:instrText></w:r>' ..
'<w:r><w:fldChar w:fldCharType="separate"/></w:r>' ..
'<w:r><w:t>000</w:t></w:r>' ..
'<w:r><w:fldChar w:fldCharType="end"/></w:r>'
elseif RAW_ATTRIBUTE == 'typst' then
page_ref = page_ref ..
'#ref(form: "page", <' .. target .. '>)'
end
return page_ref
end
local function make_pagerange_reference_per_ref(first, second, is_prefixed)
local prefix = ''
if is_prefixed then prefix = make_prefix_per_ref('page', true) end
return prefix .. make_page_reference_per_ref(first, false) ..
config.range_separator .. make_page_reference_per_ref(second, false)
end
local function make_note_reference_per_ref(target, is_prefixed)
local note_ref = ''
if is_prefixed then
note_ref = make_prefix_per_ref('note', false)
end
if RAW_ATTRIBUTE == 'opendocument' then
note_ref = note_ref ..
'<text:note-ref text:note-class="footnote"' ..
' text:reference-format="text" text:ref-name="' ..
(spans_to_note_labels[target] or '') .. '">000</text:note-ref>'
elseif RAW_ATTRIBUTE == 'openxml' then
note_ref = note_ref ..
'<w:r><w:fldChar w:fldCharType="begin" w:dirty="true"/></w:r>' ..
'<w:r><w:instrText xml:space="preserve"> NOTEREF ' ..
(spans_to_note_labels[target] or '') .. '_Note' .. ' \\h </w:instrText></w:r>' ..
'<w:r><w:fldChar w:fldCharType="separate"/></w:r>' ..
'<w:r><w:t>000</w:t></w:r>' ..
'<w:r><w:fldChar w:fldCharType="end"/></w:r>'
elseif RAW_ATTRIBUTE == 'typst' then
note_ref = note_ref ..
'#ref(form: "normal", <note:' .. (spans_to_note_labels[target] or '') .. '>)'
end
return note_ref
end
local function make_pagenote_reference_per_ref(target, is_prefixed)
local pagenote_ref = ''
if is_prefixed then
pagenote_ref = make_prefix_per_ref(config.pagenote_first_type, false)
end
if config.pagenote_first_type == 'page' then
pagenote_ref = pagenote_ref ..
make_page_reference_per_ref(target, false) ..
config.pagenote_separator .. make_note_reference_per_ref(target, true) ..
config.pagenote_at_end
elseif config.pagenote_first_type == 'note' then
pagenote_ref = pagenote_ref ..
make_note_reference_per_ref(target, false) ..
config.pagenote_separator .. make_page_reference_per_ref(target, true) ..
config.pagenote_at_end
else
error('“tcrf-pagenote-first-type” must be set either to “page” or “note”.')
end
return pagenote_ref
end
local function make_reference_per_ref(ref, ref_type, is_prefixed)
local reference = ''
if ref_type == 'page' and ref.end_of_range then
reference =
make_pagerange_reference_per_ref(ref.anchor, ref.end_of_range, is_prefixed)
elseif ref_type == 'page' then
reference = make_page_reference_per_ref(ref.anchor, is_prefixed)
elseif ref_type == 'note' then
reference = make_note_reference_per_ref(ref.anchor, is_prefixed)
elseif ref_type == 'pagenote' then
reference = make_pagenote_reference_per_ref(ref.anchor, is_prefixed)
end
return reference
end
local function make_global_prefix_several_refs(ref_type)
local global_prefix = ''
local prefix_type = ref_type
if ref_type == 'pagenote' then
prefix_type = config.pagenote_first_type
end
global_prefix = make_prefix_per_ref(prefix_type, true)
return global_prefix
end
local function make_references_per_ref(refs, ref_type, is_prefixed)
local references = ''
for i = 1, #refs do
references = references ..
make_reference_per_ref(refs[i], ref_type, is_prefixed)
if i < #refs then
if i < #refs - 1 then
references = references .. config.multiple_delimiter
else
references = references .. config.multiple_before_last
end
end
end
return references
end
local function make_raw_content_per_ref(refs, ref_type, is_prefixed)
local is_enumeration = #refs > 1
local global_prefix = ''
if is_enumeration and is_prefixed
and ((ref_type ~= 'pagenote') or config.pagenote_factorize_first_prefix_in_enum)
then
global_prefix = make_global_prefix_several_refs(ref_type)
is_prefixed = false
end
local refs_raw_content = make_references_per_ref(refs, ref_type, is_prefixed)
return global_prefix .. refs_raw_content
end
local function make_raw_content(refs, ref_type, is_prefixed)
local raw_content = ''
if RAW_ATTRIBUTE == 'context' or RAW_ATTRIBUTE == 'latex'
or TYPST_VARIANT:get() == 'crossrefenum'
then
raw_content = make_crossrefenum(refs, ref_type, is_prefixed)
else
raw_content = make_raw_content_per_ref(refs, ref_type, is_prefixed)
end
return raw_content
end
local function format_references(refs, ref_type, is_prefixed)
local raw_content = make_raw_content(refs, ref_type, is_prefixed)
return pandoc.RawInline(RAW_ATTRIBUTE, raw_content)
end
local function format_enum(span)
-- A reference is a Str contained in a span representing a label or a range of labels.
-- A ref is a ref object produced by the function "new_ref" defined above.
if span.classes:includes(TEXT_CROSSREF_CLASS)
and not(is_ref_external(stringify(span.content)))
then
local refs = parse_references_enum(stringify(span.content))
local ref_type = get_ref_type(span)
local is_prefixed = if_prefixed(span)
span.content = format_references(refs, ref_type, is_prefixed)
end
return span
end
return {
{ Meta = configure },
{ Meta = support_footnote_label_ConTeXt },
{ Note = set_notelabels },
{ Note = map_spans_to_notelabels },
{ Span = control_label_placement },
{ Span = labelize_span },
{ Span = format_enum }
}