pandoc-lua-filters/margin-notes/margin-notes.lua

548 lines
18 KiB
Lua
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- margin-notes.lua
-- A Pandoc Lua filter to create templated marginal notes based on classes.
-- Copyright 2022 Bastien Dumont (bastien.dumont [at] posteo.net)
-- This file is under the MIT License: see LICENSE for more details
local find_in_string = string.find
local gsub = string.gsub
local get_substring = string.sub
local unpack_table = table.unpack
local insert_in_table = table.insert
local table_pop = table.remove
local pandoc_to_string = pandoc.utils.stringify
-- No char in the placeholder string should have to be escaped
-- when found in a URL or in a Lua pattern.
local PLACEHOLDER_LABEL = 'MRGNN_PLACEHOLDER'
local PLACEHOLDER_BEGIN = 'BEG_'
local PLACEHOLDER_END = '_END'
local PLACEHOLDER_REGEX = PLACEHOLDER_BEGIN ..
PLACEHOLDER_LABEL .. '(.-)' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END
-- String indicating an indefined value in the output
local UNDEFINED = '??'
local DOCX_TOOLTIP_ANCHOR = '*'
local DOCX_TOOLTIP_ANCHOR_SYTLE = 'Tooltip anchor'
local TEMPLATE_VARIABLE_MARKUP_CHAR = '§'
local mc = TEMPLATE_VARIABLE_MARKUP_CHAR
local TEMPLATE_VAR_REGEX =
'%f[\\' .. mc .. ']' .. mc .. '(.-)%f[\\' .. mc .. ']' .. mc
local DEFAULT_MACROS = {
-- ConTeXt code adapted from
-- https://wiki.contextgarden.net/Footnotes#Footnotes_in_the_margin
context =
'\\define\\placeMrgnn{%\n' ..
' \\inoutermargin{\\vtop{%\n' ..
' \\placelocalnotes[mrgnn][before=,after=]%\n' ..
' }}%\n' ..
'}\n' ..
'\n' ..
'\\definenote\n' ..
' [mrgnn]\n' ..
' [location=text,\n' ..
' bodyfont=x,\n' ..
' next=\\placeMrgnn]\n' ..
'\n' ..
'\\setupnotation\n' ..
' [mrgnn]\n' ..
' [number=no,\n' ..
' alternative=serried]',
latex =
'\\newcommand{\\mrgnn}[1]{%\n' ..
' \\marginpar{{\\footnotesize #1}}%\n' ..
'}'
}
if FORMAT == 'odt' then FORMAT = 'opendocument'
elseif FORMAT == 'docx' then FORMAT = 'openxml' end
local function copy_unidimensional_table(the_table)
return { unpack_table(the_table) }
end
local function add_copy_to_list(value, list)
-- Value is of any type except thread and userdata.
local to_be_added
if type(value) == 'table' then
to_be_added = copy_unidimensional_table(value)
else
to_be_added = value
end
insert_in_table(list, to_be_added)
end
local function to_bool(var)
return not not var
end
local function inlines_to_string(list)
return pandoc_to_string(pandoc.Para(list))
end
local config = {}
function add_rendering(config, class,
body_text, note_text,
docx_body_text, docx_note_text,
odt_body_text, odt_note_text,
csname)
-- The values are plain strings (not the empty string!) or nil.
config[class] = {
body_text = body_text,
note_text = note_text,
docx_body_text = docx_body_text,
docx_note_text = docx_note_text,
odt_body_text = odt_body_text,
odt_note_text = odt_note_text,
csname = csname
}
end
local function meta_to_str(meta_obj)
if meta_obj and meta_obj[1] then
return meta_obj[1].text
else
return nil
end
end
local function get_renderings_config(meta)
for key, value in pairs(meta) do
if key == 'mrgnn-define-renderings' then
for i = 1, #value do
this_value = value[i]
add_rendering(config,
meta_to_str(this_value.class),
meta_to_str(this_value['body-text']),
meta_to_str(this_value['note-text']),
meta_to_str(this_value['docx-body-text']),
meta_to_str(this_value['docx-note-text']),
meta_to_str(this_value['odt-body-text']
or this_value['body-text']),
meta_to_str(this_value['odt-note-text']
or this_value['note-text']),
meta_to_str(this_value['csname'])
)
end
end
end
return meta
end
local function to_pandoc_inlines(markdown_str)
local inlines = {}
if markdown_str ~= '' then
local whole_doc = pandoc.read(markdown_str)
inlines = whole_doc.blocks[1].content
end
return inlines
end
local function template_to_pandoc_fragment(template)
-- The substitution is necessary in order to differentiate
-- the '§' characters that are part of placeholder markup
-- and the litteral ones.
template = gsub((template or ''), TEMPLATE_VAR_REGEX,
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'%1' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END)
return to_pandoc_inlines(template)
end
local function has_children(elem)
return type(elem) == 'table' or type(elem) == 'userdata'
end
local function contains_placeholder(str)
-- Although it would speed up the process,
-- we don't exclude any string based on its key (e.g. "tag")
-- because users could use the same names for attributes.
local result = false
result = to_bool(find_in_string(str,
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'.-'..
PLACEHOLDER_LABEL .. PLACEHOLDER_END))
return result
end
local function register_step_in_path(path, step)
insert_in_table(path, step)
end
local function unregister_last_step(path)
table_pop(path)
end
local function find_paths_to_placeholders(current_path, placeholders_paths, current_table)
--[[
current_path represents a path to a placeholder,
i.e. an unidimensional table of alternated numbers and strings
figuring the successive index and key values
at which the placeholder string is to be found in list.
list is a List of Inlines.
When two placeholders are to be found in the same string,
only one path is returned.
]]--
for index, elem in pairs(current_table) do
if has_children(elem) then
register_step_in_path(current_path, index)
find_paths_to_placeholders(current_path, placeholders_paths, elem)
unregister_last_step(current_path)
elseif type(elem) == 'string' then
if contains_placeholder(elem) then
register_step_in_path(current_path, index)
add_copy_to_list(current_path, placeholders_paths)
unregister_last_step(current_path)
end
end
end
end
local function get_paths_to_placeholders(list)
--[[
list is a List of Inlines
Returns a table of paths (see find_paths_to_placeholders).
If two placeholders are to be found in the same string,
then only one path is returned.
Example:
template = {
pandoc.SmallCaps(pandoc.Str(
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'content' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END)),
pandoc.Str(':'), pandoc.Space(),
pandoc.Emph(pandoc.Str(
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'attr.def' .. '.' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END))
}
return value = {
{ 1, 'content', 1, 'text' },
{ 4, 'content', 1, 'text' }
}
]]--
local placeholders_paths = {}
local current_path = {}
find_paths_to_placeholders(current_path, placeholders_paths, list)
return placeholders_paths
end
local function is_replacement_a_list(parent_object_type)
--[[
If parent_object_type is Str, this means that the object
containing the placeholder is an element in a list of Inlines.
In this case, the replacement should be a list of Inlines
destined to replace this element.
Otherwise (e.g. Link or Image), the replacement should be
a plain string.
]]--
return parent_object_type == 'Str'
end
local function get_replacement(placeholder,
instance_content, instance_attr,
replacement_is_list)
--[[
Returns a list of Inlines if replacement_is_list is true,
a plain string otherwise.
Markdown is interpreted in the first case only.
]]--
if placeholder == 'content' then
if replacement_is_list then
replacement = instance_content
else
replacement = inlines_to_string(instance_content)
end
elseif string.find(placeholder, '^attr%.') then
local key = get_substring(placeholder, #'attr.' + 1)
if replacement_is_list then
local replacement_markdown = instance_attr[key]
or '**' .. UNDEFINED .. '**'
replacement = to_pandoc_inlines(replacement_markdown)
else
replacement = instance_attr[key] or UNDEFINED
end
else
error('Invalid content "' .. placeholder .. '" in the value of a ' ..
'"body-text" or "note-text" metadata variable. ' ..
'It must either be "content" or begin with "attr".')
end
return replacement
end
local function get_strings_around_substring(s, i_beg, i_end)
local before = false
local after = false
if i_beg > 1 then
before = get_substring(s, 1, i_beg - 1)
end
if i_end < #s then
after = get_substring(s, i_end + 1)
end
return before, after
end
local function insert_strings_around_placeholder(replacement,
string_before_placeholder,
string_after_placeholder)
local replacement_is_list = type(replacement) == 'table'
if replacement_is_list then
if string_before_placeholder then
if replacement[1].t == 'Str' then
replacement[1].text = string_before_placeholder ..
replacement[1].text
else
insert_in_table(replacement, 1, pandoc.Str(string_before_placeholder))
end
end
if string_after_placeholder then
if replacement[#replacement].t == 'Str' then
replacement[#replacement].text = replacement[#replacement].text ..
string_after_placeholder
else
insert_in_table(replacement, pandoc.Str(string_after_placeholder))
end
end
else
replacement = (string_before_placeholder or '') ..
replacement ..
(string_after_placeholder or '')
end
return replacement
end
local function insert_replacement_in_elems(replacement,
pandoc_elems, i_object, placeholder_key,
i_placeholder_beg, i_placeholder_end)
local replacement_is_list = type(replacement) == 'table'
local string_with_placeholder = pandoc_elems[i_object][placeholder_key]
local string_before_placeholder, string_after_placeholder =
get_strings_around_substring(string_with_placeholder,
i_placeholder_beg, i_placeholder_end)
replacement = insert_strings_around_placeholder(replacement,
string_before_placeholder, string_after_placeholder)
if replacement_is_list then
for i = #replacement, 1, -1 do
insert_in_table(pandoc_elems, i_object + 1, replacement[i])
end
table_pop(pandoc_elems, i_object)
else
pandoc_elems[i_object][placeholder_key] = replacement
end
end
local function find_placeholders_in_string(str_with_placeholders)
local placeholders_data = {}
local i_data = 1
local i_beg, i_end, placeholder =
find_in_string(str_with_placeholders, PLACEHOLDER_REGEX)
while placeholder do
placeholders_data[i_data] = {
value = placeholder,
beg = i_beg,
['end'] = i_end
}
i_beg, i_end, placeholder =
find_in_string(str_with_placeholders, PLACEHOLDER_REGEX,
i_end)
i_data = i_data + 1
end
return placeholders_data
end
local function replace_placeholders_in_value(pandoc_elems,
i_object, placeholder_key,
instance_content, instance_attr)
--[[
Does not return anything: modifies instead pandoc_elems
by replacing the placeholder in pandoc_elems[key]
with the corresponding values from instance_content (a List of Inlines)
and instance_attr (a table of key/value pairs, where the values may
contain markdown formatting).
]]--
local str_with_placeholders = pandoc_elems[i_object][placeholder_key]
local replacement_is_list = is_replacement_a_list(pandoc_elems[i_object].t)
local placeholders_data = find_placeholders_in_string(str_with_placeholders)
for i = #placeholders_data, 1, -1 do
local placeholder_data = placeholders_data[i]
local replacement = get_replacement(placeholder_data.value,
instance_content, instance_attr,
replacement_is_list)
insert_replacement_in_elems(replacement,
pandoc_elems, i_object, placeholder_key,
placeholder_data.beg, placeholder_data['end'])
end
end
local function replace_placeholders(
-- table of Inlines, generally containing Str objects
-- whose text contains a placeholder
inlines_with_placeholders,
-- table of paths (see find_paths_to_placeholders for a definition)
paths_to_placeholders,
-- List of inlines
instance_content,
-- key-value table of attributes. The values may contain markdown.
instance_attr)
--[[
Replaces the Str objects in inlines_with_placeholders pointed at
by the paths in paths_to_placeholders with the data in
instance_content and instance_attr as required by the placeholder
strings.
If the placeholder string does not makes up the whole text of the Str,
create new Str containing the remaining chars.
Returns a new table containing the resulting Inlines.
]]--
if #paths_to_placeholders > 0 then
for i_path = #paths_to_placeholders, 1, -1 do
local path = paths_to_placeholders[i_path]
local current_scope = inlines_with_placeholders
local i_step = 1
local step = path[i_step]
while i_step < #path - 1 do
current_scope = current_scope[step]
i_step = i_step + 1
step = path[i_step]
end
local last_step = path[i_step + 1]
replace_placeholders_in_value(current_scope, step, last_step,
instance_content, instance_attr)
end
end
return inlines_with_placeholders
end
local function template_to_function(template)
--[[
Returns a function that takes data and inserts it
into the given template.
]]--
--[[
inlines_with_placeholders and paths_to_placeholders
are created and memoized the first time the returned
function is called.
inlines_with_placeholders cannot be used directly
for it is a reference to a table that would be changed
by replace_placeholders. That's why we create a deep
copy of it via the walk function at every call.
paths_to_placeholders can be used directly by
replace_placeholders because it is only traversed.
]]--
if template then
local paths_to_placeholders
local inlines_with_placeholders
return
function(instance_content, instance_attr)
if not inlines_with_placeholders then
inlines_with_placeholders = template_to_pandoc_fragment(template)
end
local inlines_copy = inlines_with_placeholders:walk({})
if not paths_to_placeholders then
paths_to_placeholders =
get_paths_to_placeholders(inlines_with_placeholders)
end
return replace_placeholders(
inlines_copy, paths_to_placeholders,
instance_content, instance_attr)
end
end
end
local function define_rendering_functions(meta)
local format_prefix = ''
if FORMAT == 'opendocument' then
format_prefix = 'odt_'
elseif FORMAT == 'openxml' then
format_prefix = 'docx_'
end
for class_name, class_config in pairs(config) do
config[class_name].render = {}
for _, part in ipairs({'body', 'note'}) do
config[class_name].render[part] =
template_to_function(class_config[format_prefix .. part .. '_text'])
end
end
end
local function set_macro_definition(meta)
if FORMAT == 'context' or FORMAT == 'latex' then
meta['header-includes'] = {
(meta['header-includes'] or pandoc.RawBlock(FORMAT, '')),
pandoc.RawBlock(FORMAT, DEFAULT_MACROS[FORMAT])
}
end
end
local function Meta(meta)
get_renderings_config(meta)
define_rendering_functions(meta)
set_macro_definition(meta)
return meta
end
local i_invocation = 0
local function wrap_in_raw_note_code(content, class_name)
-- content is a List of Inlines (output of replace_placeholders)
local margin_note = content
if FORMAT == 'context' or FORMAT == 'latex' then
local csname = config[class_name].csname or 'mrgnn'
insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT, '\\' .. csname .. '{'))
insert_in_table(margin_note, pandoc.RawInline(FORMAT, '}'))
elseif FORMAT == 'openxml' then
i_invocation = i_invocation + 1
local bookmark_id = 'mrgnn_' .. i_invocation
margin_note = {
pandoc.RawInline(
FORMAT,
'<w:bookmarkStart w:id="' .. bookmark_id ..
'" w:name="' .. bookmark_id .. '"/>' ..
'<w:bookmarkEnd w:id="' .. bookmark_id .. '"/>' ..
'<w:hyperlink w:anchor="' .. bookmark_id .. '" ' ..
'w:tooltip="' .. pandoc_to_string(margin_note) .. '">'),
pandoc.Span(
pandoc.Str(DOCX_TOOLTIP_ANCHOR),
{ ['custom-style'] = DOCX_TOOLTIP_ANCHOR_SYTLE }),
pandoc.RawInline(
FORMAT,
'</w:hyperlink>')
}
elseif FORMAT == 'opendocument' then
insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT,
'<office:annotation><text:p>'))
insert_in_table(margin_note, pandoc.RawInline(FORMAT,
'</text:p></office:annotation>'))
end
return margin_note
end
local function render_margin_notes(span)
for class_name, class_config in pairs(config) do
if span.classes:includes(class_name) then
local render_note = config[class_name].render.note
local render_body = config[class_name].render.body
local margin_note = {}
local body = {}
if render_note then
margin_note = wrap_in_raw_note_code(
render_note(span.content, span.attributes), class_name)
end
if render_body then
body = render_body(span.content, span.attributes)
end
span.content = body
return { span, unpack_table(margin_note) }
end
end
end
return {
{ Meta = Meta },
{ Span = render_margin_notes }
}