pandoc-lua-filters/margin-notes/margin-notes.lua

565 lines
19 KiB
Lua
Raw Permalink Normal View History

2022-04-03 18:57:21 +01:00
-- margin-notes.lua
-- A Pandoc Lua filter to create templated marginal notes based on classes.
-- Copyright 2022 Bastien Dumont (bastien.dumont [at] posteo.net)
-- This file is under the MIT License: see LICENSE for more details
2022-04-03 16:38:42 +01:00
local find_in_string = string.find
local gsub = string.gsub
local get_substring = string.sub
local unpack_table = table.unpack
local insert_in_table = table.insert
local table_pop = table.remove
local pandoc_to_string = pandoc.utils.stringify
-- No char in the placeholder string should have to be escaped
-- when found in a URL or in a Lua pattern.
local PLACEHOLDER_LABEL = 'MRGNN_PLACEHOLDER'
local PLACEHOLDER_BEGIN = 'BEG_'
local PLACEHOLDER_END = '_END'
local PLACEHOLDER_REGEX = PLACEHOLDER_BEGIN ..
PLACEHOLDER_LABEL .. '(.-)' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END
-- String indicating an indefined value in the output
local UNDEFINED = '??'
2022-09-27 21:09:36 +01:00
-- Default value for "position" ("before" or "after").
local POS_DEFAULT = 'after'
2022-04-03 16:38:42 +01:00
local DOCX_TOOLTIP_ANCHOR = '*'
local DOCX_TOOLTIP_ANCHOR_SYTLE = 'Tooltip anchor'
local TEMPLATE_VARIABLE_MARKUP_CHAR = '§'
local mc = TEMPLATE_VARIABLE_MARKUP_CHAR
local TEMPLATE_VAR_REGEX =
'%f[\\' .. mc .. ']' .. mc .. '(.-)%f[\\' .. mc .. ']' .. mc
local DEFAULT_MACROS = {
-- ConTeXt code adapted from
-- https://wiki.contextgarden.net/Footnotes#Footnotes_in_the_margin
context =
'\\define\\placeMrgnn{%\n' ..
' \\inoutermargin{\\vtop{%\n' ..
' \\placelocalnotes[mrgnn][before=,after=]%\n' ..
' }}%\n' ..
'}\n' ..
'\n' ..
'\\definenote\n' ..
' [mrgnn]\n' ..
' [location=text,\n' ..
' bodyfont=x,\n' ..
' next=\\placeMrgnn]\n' ..
'\n' ..
'\\setupnotation\n' ..
' [mrgnn]\n' ..
' [number=no,\n' ..
' alternative=serried]',
latex =
'\\newcommand{\\mrgnn}[1]{%\n' ..
' \\marginpar{{\\footnotesize #1}}%\n' ..
'}'
}
if FORMAT == 'odt' then FORMAT = 'opendocument'
elseif FORMAT == 'docx' then FORMAT = 'openxml' end
local function copy_unidimensional_table(the_table)
return { unpack_table(the_table) }
end
local function add_copy_to_list(value, list)
-- Value is of any type except thread and userdata.
local to_be_added
if type(value) == 'table' then
to_be_added = copy_unidimensional_table(value)
else
to_be_added = value
end
insert_in_table(list, to_be_added)
end
local function to_bool(var)
return not not var
end
local function inlines_to_string(list)
return pandoc_to_string(pandoc.Para(list))
end
local config = {}
function add_rendering(config, class,
body_text, note_text,
docx_body_text, docx_note_text,
odt_body_text, odt_note_text,
2022-09-27 21:09:36 +01:00
csname, position)
2022-04-03 16:38:42 +01:00
-- The values are plain strings (not the empty string!) or nil.
config[class] = {
body_text = body_text,
note_text = note_text,
docx_body_text = docx_body_text,
docx_note_text = docx_note_text,
odt_body_text = odt_body_text,
odt_note_text = odt_note_text,
2022-09-27 21:09:36 +01:00
csname = csname,
position = position
2022-04-03 16:38:42 +01:00
}
end
local function meta_to_str(meta_obj)
if meta_obj and meta_obj[1] then
return meta_obj[1].text
else
return nil
end
end
local function get_renderings_config(meta)
for key, value in pairs(meta) do
if key == 'mrgnn-define-renderings' then
for i = 1, #value do
this_value = value[i]
add_rendering(config,
meta_to_str(this_value.class),
meta_to_str(this_value['body-text']),
meta_to_str(this_value['note-text']),
meta_to_str(this_value['docx-body-text']),
meta_to_str(this_value['docx-note-text']),
meta_to_str(this_value['odt-body-text']
or this_value['body-text']),
meta_to_str(this_value['odt-note-text']
or this_value['note-text']),
2022-09-27 21:09:36 +01:00
meta_to_str(this_value['csname']),
meta_to_str(this_value['position']) or POS_DEFAULT
2022-04-03 16:38:42 +01:00
)
end
end
end
return meta
end
local function to_pandoc_inlines(markdown_str)
local inlines = {}
if markdown_str ~= '' then
local whole_doc = pandoc.read(markdown_str)
inlines = whole_doc.blocks[1].content
end
return inlines
end
local function template_to_pandoc_fragment(template)
-- The substitution is necessary in order to differentiate
-- the '§' characters that are part of placeholder markup
-- and the litteral ones.
template = gsub((template or ''), TEMPLATE_VAR_REGEX,
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'%1' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END)
return to_pandoc_inlines(template)
end
local function has_children(elem)
return type(elem) == 'table' or type(elem) == 'userdata'
end
local function contains_placeholder(str)
-- Although it would speed up the process,
-- we don't exclude any string based on its key (e.g. "tag")
-- because users could use the same names for attributes.
local result = false
result = to_bool(find_in_string(str,
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'.-'..
PLACEHOLDER_LABEL .. PLACEHOLDER_END))
return result
end
local function register_step_in_path(path, step)
insert_in_table(path, step)
end
local function unregister_last_step(path)
table_pop(path)
end
local function find_paths_to_placeholders(current_path, placeholders_paths, current_table)
--[[
current_path represents a path to a placeholder,
i.e. an unidimensional table of alternated numbers and strings
figuring the successive index and key values
at which the placeholder string is to be found in list.
list is a List of Inlines.
When two placeholders are to be found in the same string,
only one path is returned.
]]--
for index, elem in pairs(current_table) do
if has_children(elem) then
register_step_in_path(current_path, index)
find_paths_to_placeholders(current_path, placeholders_paths, elem)
unregister_last_step(current_path)
elseif type(elem) == 'string' then
if contains_placeholder(elem) then
register_step_in_path(current_path, index)
add_copy_to_list(current_path, placeholders_paths)
unregister_last_step(current_path)
end
end
end
end
local function get_paths_to_placeholders(list)
--[[
list is a List of Inlines
Returns a table of paths (see find_paths_to_placeholders).
If two placeholders are to be found in the same string,
then only one path is returned.
Example:
template = {
pandoc.SmallCaps(pandoc.Str(
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'content' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END)),
pandoc.Str(':'), pandoc.Space(),
pandoc.Emph(pandoc.Str(
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
'attr.def' .. '.' ..
PLACEHOLDER_LABEL .. PLACEHOLDER_END))
}
return value = {
{ 1, 'content', 1, 'text' },
{ 4, 'content', 1, 'text' }
}
]]--
local placeholders_paths = {}
local current_path = {}
find_paths_to_placeholders(current_path, placeholders_paths, list)
return placeholders_paths
end
local function is_replacement_a_list(parent_object_type)
--[[
If parent_object_type is Str, this means that the object
containing the placeholder is an element in a list of Inlines.
In this case, the replacement should be a list of Inlines
destined to replace this element.
Otherwise (e.g. Link or Image), the replacement should be
a plain string.
]]--
return parent_object_type == 'Str'
end
local function get_replacement(placeholder,
instance_content, instance_attr,
replacement_is_list)
--[[
Returns a list of Inlines if replacement_is_list is true,
a plain string otherwise.
Markdown is interpreted in the first case only.
]]--
if placeholder == 'content' then
if replacement_is_list then
replacement = instance_content
else
replacement = inlines_to_string(instance_content)
end
elseif string.find(placeholder, '^attr%.') then
local key = get_substring(placeholder, #'attr.' + 1)
if replacement_is_list then
local replacement_markdown = instance_attr[key]
or '**' .. UNDEFINED .. '**'
replacement = to_pandoc_inlines(replacement_markdown)
else
replacement = instance_attr[key] or UNDEFINED
end
else
error('Invalid content "' .. placeholder .. '" in the value of a ' ..
'"body-text" or "note-text" metadata variable. ' ..
'It must either be "content" or begin with "attr".')
end
return replacement
end
local function get_strings_around_substring(s, i_beg, i_end)
local before = false
local after = false
if i_beg > 1 then
before = get_substring(s, 1, i_beg - 1)
end
if i_end < #s then
after = get_substring(s, i_end + 1)
end
return before, after
end
local function insert_strings_around_placeholder(replacement,
string_before_placeholder,
string_after_placeholder)
local replacement_is_list = type(replacement) == 'table'
if replacement_is_list then
if string_before_placeholder then
if replacement[1].t == 'Str' then
replacement[1].text = string_before_placeholder ..
replacement[1].text
else
insert_in_table(replacement, 1, pandoc.Str(string_before_placeholder))
end
end
if string_after_placeholder then
if replacement[#replacement].t == 'Str' then
replacement[#replacement].text = replacement[#replacement].text ..
string_after_placeholder
else
insert_in_table(replacement, pandoc.Str(string_after_placeholder))
end
end
else
replacement = (string_before_placeholder or '') ..
replacement ..
(string_after_placeholder or '')
end
return replacement
end
local function insert_replacement_in_elems(replacement,
pandoc_elems, i_object, placeholder_key,
i_placeholder_beg, i_placeholder_end)
local replacement_is_list = type(replacement) == 'table'
local string_with_placeholder = pandoc_elems[i_object][placeholder_key]
local string_before_placeholder, string_after_placeholder =
get_strings_around_substring(string_with_placeholder,
i_placeholder_beg, i_placeholder_end)
replacement = insert_strings_around_placeholder(replacement,
string_before_placeholder, string_after_placeholder)
if replacement_is_list then
for i = #replacement, 1, -1 do
insert_in_table(pandoc_elems, i_object + 1, replacement[i])
end
table_pop(pandoc_elems, i_object)
else
pandoc_elems[i_object][placeholder_key] = replacement
end
end
local function find_placeholders_in_string(str_with_placeholders)
local placeholders_data = {}
local i_data = 1
local i_beg, i_end, placeholder =
find_in_string(str_with_placeholders, PLACEHOLDER_REGEX)
while placeholder do
placeholders_data[i_data] = {
value = placeholder,
beg = i_beg,
['end'] = i_end
}
i_beg, i_end, placeholder =
find_in_string(str_with_placeholders, PLACEHOLDER_REGEX,
i_end)
i_data = i_data + 1
end
return placeholders_data
end
local function replace_placeholders_in_value(pandoc_elems,
i_object, placeholder_key,
instance_content, instance_attr)
--[[
Does not return anything: modifies instead pandoc_elems
by replacing the placeholder in pandoc_elems[key]
with the corresponding values from instance_content (a List of Inlines)
and instance_attr (a table of key/value pairs, where the values may
contain markdown formatting).
]]--
local str_with_placeholders = pandoc_elems[i_object][placeholder_key]
local replacement_is_list = is_replacement_a_list(pandoc_elems[i_object].t)
local placeholders_data = find_placeholders_in_string(str_with_placeholders)
for i = #placeholders_data, 1, -1 do
local placeholder_data = placeholders_data[i]
local replacement = get_replacement(placeholder_data.value,
instance_content, instance_attr,
replacement_is_list)
insert_replacement_in_elems(replacement,
pandoc_elems, i_object, placeholder_key,
placeholder_data.beg, placeholder_data['end'])
end
end
local function replace_placeholders(
-- table of Inlines, generally containing Str objects
-- whose text contains a placeholder
inlines_with_placeholders,
-- table of paths (see find_paths_to_placeholders for a definition)
paths_to_placeholders,
-- List of inlines
instance_content,
-- key-value table of attributes. The values may contain markdown.
instance_attr)
--[[
Replaces the Str objects in inlines_with_placeholders pointed at
by the paths in paths_to_placeholders with the data in
instance_content and instance_attr as required by the placeholder
strings.
If the placeholder string does not makes up the whole text of the Str,
create new Str containing the remaining chars.
Returns a new table containing the resulting Inlines.
]]--
if #paths_to_placeholders > 0 then
for i_path = #paths_to_placeholders, 1, -1 do
local path = paths_to_placeholders[i_path]
local current_scope = inlines_with_placeholders
local i_step = 1
local step = path[i_step]
while i_step < #path - 1 do
current_scope = current_scope[step]
i_step = i_step + 1
step = path[i_step]
end
local last_step = path[i_step + 1]
replace_placeholders_in_value(current_scope, step, last_step,
instance_content, instance_attr)
end
end
return inlines_with_placeholders
end
local function template_to_function(template)
--[[
Returns a function that takes data and inserts it
into the given template.
]]--
--[[
inlines_with_placeholders and paths_to_placeholders
are created and memoized the first time the returned
function is called.
inlines_with_placeholders cannot be used directly
for it is a reference to a table that would be changed
by replace_placeholders. That's why we create a deep
copy of it via the walk function at every call.
paths_to_placeholders can be used directly by
replace_placeholders because it is only traversed.
2022-04-03 16:38:42 +01:00
]]--
if template then
local paths_to_placeholders
local inlines_with_placeholders
2022-04-03 16:38:42 +01:00
return
function(instance_content, instance_attr)
if not inlines_with_placeholders then
inlines_with_placeholders = template_to_pandoc_fragment(template)
end
local inlines_copy = inlines_with_placeholders:walk({})
2022-04-03 16:38:42 +01:00
if not paths_to_placeholders then
paths_to_placeholders =
get_paths_to_placeholders(inlines_with_placeholders)
end
return replace_placeholders(
inlines_copy, paths_to_placeholders,
2022-04-03 16:38:42 +01:00
instance_content, instance_attr)
end
end
end
local function define_rendering_functions(meta)
local format_prefix = ''
if FORMAT == 'opendocument' then
format_prefix = 'odt_'
elseif FORMAT == 'openxml' then
format_prefix = 'docx_'
end
for class_name, class_config in pairs(config) do
config[class_name].render = {}
for _, part in ipairs({'body', 'note'}) do
config[class_name].render[part] =
template_to_function(class_config[format_prefix .. part .. '_text'])
end
end
end
local function set_macro_definition(meta)
if FORMAT == 'context' or FORMAT == 'latex' then
meta['header-includes'] = {
(meta['header-includes'] or pandoc.RawBlock(FORMAT, '')),
pandoc.RawBlock(FORMAT, DEFAULT_MACROS[FORMAT])
}
end
end
local function Meta(meta)
get_renderings_config(meta)
define_rendering_functions(meta)
set_macro_definition(meta)
return meta
end
local i_invocation = 0
local function wrap_in_raw_note_code(content, class_name)
-- content is a List of Inlines (output of replace_placeholders)
local margin_note = content
if FORMAT == 'context' or FORMAT == 'latex' then
local csname = config[class_name].csname or 'mrgnn'
insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT, '\\' .. csname .. '{'))
insert_in_table(margin_note, pandoc.RawInline(FORMAT, '}'))
elseif FORMAT == 'openxml' then
i_invocation = i_invocation + 1
local bookmark_id = 'mrgnn_' .. i_invocation
margin_note = {
pandoc.RawInline(
FORMAT,
'<w:bookmarkStart w:id="' .. bookmark_id ..
'" w:name="' .. bookmark_id .. '"/>' ..
'<w:bookmarkEnd w:id="' .. bookmark_id .. '"/>' ..
'<w:hyperlink w:anchor="' .. bookmark_id .. '" ' ..
'w:tooltip="' .. pandoc_to_string(margin_note) .. '">'),
pandoc.Span(
pandoc.Str(DOCX_TOOLTIP_ANCHOR),
{ ['custom-style'] = DOCX_TOOLTIP_ANCHOR_SYTLE }),
pandoc.RawInline(
FORMAT,
'</w:hyperlink>')
}
elseif FORMAT == 'opendocument' then
insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT,
'<office:annotation><text:p>'))
insert_in_table(margin_note, pandoc.RawInline(FORMAT,
'</text:p></office:annotation>'))
end
return margin_note
end
local function render_margin_notes(span)
for class_name, class_config in pairs(config) do
if span.classes:includes(class_name) then
local render_note = config[class_name].render.note
local render_body = config[class_name].render.body
2022-09-27 21:09:36 +01:00
local note_position = config[class_name].position
2022-04-03 16:38:42 +01:00
local margin_note = {}
local body = {}
if render_note then
margin_note = wrap_in_raw_note_code(
render_note(span.content, span.attributes), class_name)
end
if render_body then
body = render_body(span.content, span.attributes)
end
span.content = body
2022-09-27 21:09:36 +01:00
local span_with_note = {}
if note_position == 'before' then
span_with_note = margin_note
insert_in_table(span_with_note, span)
elseif note_position == 'after' then
span_with_note = { span, unpack_table(margin_note) }
else
error('Invalid value "' .. note_position .. '" ' ..
'for "position" in the definition of ' ..
'the margin-note class "' .. class_name '". ' ..
'Expected "before" or "after".')
end
return span_with_note
2022-04-03 16:38:42 +01:00
end
end
end
return {
{ Meta = Meta },
{ Span = render_margin_notes }
}