565 lines
19 KiB
Lua
565 lines
19 KiB
Lua
-- margin-notes.lua
|
||
-- A Pandoc Lua filter to create templated marginal notes based on classes.
|
||
-- Copyright 2022 Bastien Dumont (bastien.dumont [at] posteo.net)
|
||
-- This file is under the MIT License: see LICENSE for more details
|
||
|
||
local find_in_string = string.find
|
||
local gsub = string.gsub
|
||
local get_substring = string.sub
|
||
local unpack_table = table.unpack
|
||
local insert_in_table = table.insert
|
||
local table_pop = table.remove
|
||
local pandoc_to_string = pandoc.utils.stringify
|
||
|
||
-- No char in the placeholder string should have to be escaped
|
||
-- when found in a URL or in a Lua pattern.
|
||
local PLACEHOLDER_LABEL = 'MRGNN_PLACEHOLDER'
|
||
local PLACEHOLDER_BEGIN = 'BEG_'
|
||
local PLACEHOLDER_END = '_END'
|
||
local PLACEHOLDER_REGEX = PLACEHOLDER_BEGIN ..
|
||
PLACEHOLDER_LABEL .. '(.-)' ..
|
||
PLACEHOLDER_LABEL .. PLACEHOLDER_END
|
||
-- String indicating an indefined value in the output
|
||
local UNDEFINED = '??'
|
||
-- Default value for "position" ("before" or "after").
|
||
local POS_DEFAULT = 'after'
|
||
|
||
local DOCX_TOOLTIP_ANCHOR = '*'
|
||
local DOCX_TOOLTIP_ANCHOR_SYTLE = 'Tooltip anchor'
|
||
|
||
local TEMPLATE_VARIABLE_MARKUP_CHAR = '§'
|
||
local mc = TEMPLATE_VARIABLE_MARKUP_CHAR
|
||
local TEMPLATE_VAR_REGEX =
|
||
'%f[\\' .. mc .. ']' .. mc .. '(.-)%f[\\' .. mc .. ']' .. mc
|
||
|
||
local DEFAULT_MACROS = {
|
||
-- ConTeXt code adapted from
|
||
-- https://wiki.contextgarden.net/Footnotes#Footnotes_in_the_margin
|
||
context =
|
||
'\\define\\placeMrgnn{%\n' ..
|
||
' \\inoutermargin{\\vtop{%\n' ..
|
||
' \\placelocalnotes[mrgnn][before=,after=]%\n' ..
|
||
' }}%\n' ..
|
||
'}\n' ..
|
||
'\n' ..
|
||
'\\definenote\n' ..
|
||
' [mrgnn]\n' ..
|
||
' [location=text,\n' ..
|
||
' bodyfont=x,\n' ..
|
||
' next=\\placeMrgnn]\n' ..
|
||
'\n' ..
|
||
'\\setupnotation\n' ..
|
||
' [mrgnn]\n' ..
|
||
' [number=no,\n' ..
|
||
' alternative=serried]',
|
||
latex =
|
||
'\\newcommand{\\mrgnn}[1]{%\n' ..
|
||
' \\marginpar{{\\footnotesize #1}}%\n' ..
|
||
'}'
|
||
}
|
||
|
||
|
||
if FORMAT == 'odt' then FORMAT = 'opendocument'
|
||
elseif FORMAT == 'docx' then FORMAT = 'openxml' end
|
||
|
||
local function copy_unidimensional_table(the_table)
|
||
return { unpack_table(the_table) }
|
||
end
|
||
|
||
local function add_copy_to_list(value, list)
|
||
-- Value is of any type except thread and userdata.
|
||
local to_be_added
|
||
if type(value) == 'table' then
|
||
to_be_added = copy_unidimensional_table(value)
|
||
else
|
||
to_be_added = value
|
||
end
|
||
insert_in_table(list, to_be_added)
|
||
end
|
||
|
||
local function to_bool(var)
|
||
return not not var
|
||
end
|
||
|
||
local function inlines_to_string(list)
|
||
return pandoc_to_string(pandoc.Para(list))
|
||
end
|
||
|
||
local config = {}
|
||
|
||
function add_rendering(config, class,
|
||
body_text, note_text,
|
||
docx_body_text, docx_note_text,
|
||
odt_body_text, odt_note_text,
|
||
csname, position)
|
||
-- The values are plain strings (not the empty string!) or nil.
|
||
config[class] = {
|
||
body_text = body_text,
|
||
note_text = note_text,
|
||
docx_body_text = docx_body_text,
|
||
docx_note_text = docx_note_text,
|
||
odt_body_text = odt_body_text,
|
||
odt_note_text = odt_note_text,
|
||
csname = csname,
|
||
position = position
|
||
}
|
||
end
|
||
|
||
local function meta_to_str(meta_obj)
|
||
if meta_obj and meta_obj[1] then
|
||
return meta_obj[1].text
|
||
else
|
||
return nil
|
||
end
|
||
end
|
||
|
||
local function get_renderings_config(meta)
|
||
for key, value in pairs(meta) do
|
||
if key == 'mrgnn-define-renderings' then
|
||
for i = 1, #value do
|
||
this_value = value[i]
|
||
add_rendering(config,
|
||
meta_to_str(this_value.class),
|
||
meta_to_str(this_value['body-text']),
|
||
meta_to_str(this_value['note-text']),
|
||
meta_to_str(this_value['docx-body-text']),
|
||
meta_to_str(this_value['docx-note-text']),
|
||
meta_to_str(this_value['odt-body-text']
|
||
or this_value['body-text']),
|
||
meta_to_str(this_value['odt-note-text']
|
||
or this_value['note-text']),
|
||
meta_to_str(this_value['csname']),
|
||
meta_to_str(this_value['position']) or POS_DEFAULT
|
||
)
|
||
end
|
||
end
|
||
end
|
||
return meta
|
||
end
|
||
|
||
local function to_pandoc_inlines(markdown_str)
|
||
local inlines = {}
|
||
if markdown_str ~= '' then
|
||
local whole_doc = pandoc.read(markdown_str)
|
||
inlines = whole_doc.blocks[1].content
|
||
end
|
||
return inlines
|
||
end
|
||
|
||
local function template_to_pandoc_fragment(template)
|
||
-- The substitution is necessary in order to differentiate
|
||
-- the '§' characters that are part of placeholder markup
|
||
-- and the litteral ones.
|
||
template = gsub((template or ''), TEMPLATE_VAR_REGEX,
|
||
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
|
||
'%1' ..
|
||
PLACEHOLDER_LABEL .. PLACEHOLDER_END)
|
||
return to_pandoc_inlines(template)
|
||
end
|
||
|
||
local function has_children(elem)
|
||
return type(elem) == 'table' or type(elem) == 'userdata'
|
||
end
|
||
|
||
local function contains_placeholder(str)
|
||
-- Although it would speed up the process,
|
||
-- we don't exclude any string based on its key (e.g. "tag")
|
||
-- because users could use the same names for attributes.
|
||
local result = false
|
||
result = to_bool(find_in_string(str,
|
||
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
|
||
'.-'..
|
||
PLACEHOLDER_LABEL .. PLACEHOLDER_END))
|
||
return result
|
||
end
|
||
|
||
local function register_step_in_path(path, step)
|
||
insert_in_table(path, step)
|
||
end
|
||
|
||
local function unregister_last_step(path)
|
||
table_pop(path)
|
||
end
|
||
|
||
local function find_paths_to_placeholders(current_path, placeholders_paths, current_table)
|
||
--[[
|
||
current_path represents a path to a placeholder,
|
||
i.e. an unidimensional table of alternated numbers and strings
|
||
figuring the successive index and key values
|
||
at which the placeholder string is to be found in list.
|
||
list is a List of Inlines.
|
||
When two placeholders are to be found in the same string,
|
||
only one path is returned.
|
||
]]--
|
||
for index, elem in pairs(current_table) do
|
||
if has_children(elem) then
|
||
register_step_in_path(current_path, index)
|
||
find_paths_to_placeholders(current_path, placeholders_paths, elem)
|
||
unregister_last_step(current_path)
|
||
elseif type(elem) == 'string' then
|
||
if contains_placeholder(elem) then
|
||
register_step_in_path(current_path, index)
|
||
add_copy_to_list(current_path, placeholders_paths)
|
||
unregister_last_step(current_path)
|
||
end
|
||
end
|
||
end
|
||
end
|
||
|
||
local function get_paths_to_placeholders(list)
|
||
--[[
|
||
list is a List of Inlines
|
||
Returns a table of paths (see find_paths_to_placeholders).
|
||
If two placeholders are to be found in the same string,
|
||
then only one path is returned.
|
||
|
||
Example:
|
||
template = {
|
||
pandoc.SmallCaps(pandoc.Str(
|
||
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
|
||
'content' ..
|
||
PLACEHOLDER_LABEL .. PLACEHOLDER_END)),
|
||
pandoc.Str(':'), pandoc.Space(),
|
||
pandoc.Emph(pandoc.Str(
|
||
PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL ..
|
||
'attr.def' .. '.' ..
|
||
PLACEHOLDER_LABEL .. PLACEHOLDER_END))
|
||
}
|
||
return value = {
|
||
{ 1, 'content', 1, 'text' },
|
||
{ 4, 'content', 1, 'text' }
|
||
}
|
||
]]--
|
||
local placeholders_paths = {}
|
||
local current_path = {}
|
||
find_paths_to_placeholders(current_path, placeholders_paths, list)
|
||
return placeholders_paths
|
||
end
|
||
|
||
local function is_replacement_a_list(parent_object_type)
|
||
--[[
|
||
If parent_object_type is Str, this means that the object
|
||
containing the placeholder is an element in a list of Inlines.
|
||
In this case, the replacement should be a list of Inlines
|
||
destined to replace this element.
|
||
Otherwise (e.g. Link or Image), the replacement should be
|
||
a plain string.
|
||
]]--
|
||
return parent_object_type == 'Str'
|
||
end
|
||
|
||
local function get_replacement(placeholder,
|
||
instance_content, instance_attr,
|
||
replacement_is_list)
|
||
--[[
|
||
Returns a list of Inlines if replacement_is_list is true,
|
||
a plain string otherwise.
|
||
Markdown is interpreted in the first case only.
|
||
]]--
|
||
if placeholder == 'content' then
|
||
if replacement_is_list then
|
||
replacement = instance_content
|
||
else
|
||
replacement = inlines_to_string(instance_content)
|
||
end
|
||
elseif string.find(placeholder, '^attr%.') then
|
||
local key = get_substring(placeholder, #'attr.' + 1)
|
||
if replacement_is_list then
|
||
local replacement_markdown = instance_attr[key]
|
||
or '**' .. UNDEFINED .. '**'
|
||
replacement = to_pandoc_inlines(replacement_markdown)
|
||
else
|
||
replacement = instance_attr[key] or UNDEFINED
|
||
end
|
||
else
|
||
error('Invalid content "' .. placeholder .. '" in the value of a ' ..
|
||
'"body-text" or "note-text" metadata variable. ' ..
|
||
'It must either be "content" or begin with "attr".')
|
||
end
|
||
return replacement
|
||
end
|
||
|
||
local function get_strings_around_substring(s, i_beg, i_end)
|
||
local before = false
|
||
local after = false
|
||
if i_beg > 1 then
|
||
before = get_substring(s, 1, i_beg - 1)
|
||
end
|
||
if i_end < #s then
|
||
after = get_substring(s, i_end + 1)
|
||
end
|
||
return before, after
|
||
end
|
||
|
||
local function insert_strings_around_placeholder(replacement,
|
||
string_before_placeholder,
|
||
string_after_placeholder)
|
||
local replacement_is_list = type(replacement) == 'table'
|
||
if replacement_is_list then
|
||
if string_before_placeholder then
|
||
if replacement[1].t == 'Str' then
|
||
replacement[1].text = string_before_placeholder ..
|
||
replacement[1].text
|
||
else
|
||
insert_in_table(replacement, 1, pandoc.Str(string_before_placeholder))
|
||
end
|
||
end
|
||
if string_after_placeholder then
|
||
if replacement[#replacement].t == 'Str' then
|
||
replacement[#replacement].text = replacement[#replacement].text ..
|
||
string_after_placeholder
|
||
else
|
||
insert_in_table(replacement, pandoc.Str(string_after_placeholder))
|
||
end
|
||
end
|
||
else
|
||
replacement = (string_before_placeholder or '') ..
|
||
replacement ..
|
||
(string_after_placeholder or '')
|
||
end
|
||
return replacement
|
||
end
|
||
|
||
local function insert_replacement_in_elems(replacement,
|
||
pandoc_elems, i_object, placeholder_key,
|
||
i_placeholder_beg, i_placeholder_end)
|
||
local replacement_is_list = type(replacement) == 'table'
|
||
local string_with_placeholder = pandoc_elems[i_object][placeholder_key]
|
||
local string_before_placeholder, string_after_placeholder =
|
||
get_strings_around_substring(string_with_placeholder,
|
||
i_placeholder_beg, i_placeholder_end)
|
||
replacement = insert_strings_around_placeholder(replacement,
|
||
string_before_placeholder, string_after_placeholder)
|
||
if replacement_is_list then
|
||
for i = #replacement, 1, -1 do
|
||
insert_in_table(pandoc_elems, i_object + 1, replacement[i])
|
||
end
|
||
table_pop(pandoc_elems, i_object)
|
||
else
|
||
pandoc_elems[i_object][placeholder_key] = replacement
|
||
end
|
||
end
|
||
|
||
local function find_placeholders_in_string(str_with_placeholders)
|
||
local placeholders_data = {}
|
||
local i_data = 1
|
||
local i_beg, i_end, placeholder =
|
||
find_in_string(str_with_placeholders, PLACEHOLDER_REGEX)
|
||
while placeholder do
|
||
placeholders_data[i_data] = {
|
||
value = placeholder,
|
||
beg = i_beg,
|
||
['end'] = i_end
|
||
}
|
||
i_beg, i_end, placeholder =
|
||
find_in_string(str_with_placeholders, PLACEHOLDER_REGEX,
|
||
i_end)
|
||
i_data = i_data + 1
|
||
end
|
||
return placeholders_data
|
||
end
|
||
|
||
local function replace_placeholders_in_value(pandoc_elems,
|
||
i_object, placeholder_key,
|
||
instance_content, instance_attr)
|
||
--[[
|
||
Does not return anything: modifies instead pandoc_elems
|
||
by replacing the placeholder in pandoc_elems[key]
|
||
with the corresponding values from instance_content (a List of Inlines)
|
||
and instance_attr (a table of key/value pairs, where the values may
|
||
contain markdown formatting).
|
||
]]--
|
||
local str_with_placeholders = pandoc_elems[i_object][placeholder_key]
|
||
local replacement_is_list = is_replacement_a_list(pandoc_elems[i_object].t)
|
||
local placeholders_data = find_placeholders_in_string(str_with_placeholders)
|
||
for i = #placeholders_data, 1, -1 do
|
||
local placeholder_data = placeholders_data[i]
|
||
local replacement = get_replacement(placeholder_data.value,
|
||
instance_content, instance_attr,
|
||
replacement_is_list)
|
||
insert_replacement_in_elems(replacement,
|
||
pandoc_elems, i_object, placeholder_key,
|
||
placeholder_data.beg, placeholder_data['end'])
|
||
end
|
||
end
|
||
|
||
local function replace_placeholders(
|
||
-- table of Inlines, generally containing Str objects
|
||
-- whose text contains a placeholder
|
||
inlines_with_placeholders,
|
||
-- table of paths (see find_paths_to_placeholders for a definition)
|
||
paths_to_placeholders,
|
||
-- List of inlines
|
||
instance_content,
|
||
-- key-value table of attributes. The values may contain markdown.
|
||
instance_attr)
|
||
--[[
|
||
Replaces the Str objects in inlines_with_placeholders pointed at
|
||
by the paths in paths_to_placeholders with the data in
|
||
instance_content and instance_attr as required by the placeholder
|
||
strings.
|
||
If the placeholder string does not makes up the whole text of the Str,
|
||
create new Str containing the remaining chars.
|
||
Returns a new table containing the resulting Inlines.
|
||
]]--
|
||
if #paths_to_placeholders > 0 then
|
||
for i_path = #paths_to_placeholders, 1, -1 do
|
||
local path = paths_to_placeholders[i_path]
|
||
local current_scope = inlines_with_placeholders
|
||
local i_step = 1
|
||
local step = path[i_step]
|
||
while i_step < #path - 1 do
|
||
current_scope = current_scope[step]
|
||
i_step = i_step + 1
|
||
step = path[i_step]
|
||
end
|
||
local last_step = path[i_step + 1]
|
||
replace_placeholders_in_value(current_scope, step, last_step,
|
||
instance_content, instance_attr)
|
||
end
|
||
end
|
||
return inlines_with_placeholders
|
||
end
|
||
|
||
local function template_to_function(template)
|
||
--[[
|
||
Returns a function that takes data and inserts it
|
||
into the given template.
|
||
]]--
|
||
--[[
|
||
– inlines_with_placeholders and paths_to_placeholders
|
||
are created and memoized the first time the returned
|
||
function is called.
|
||
– inlines_with_placeholders cannot be used directly
|
||
for it is a reference to a table that would be changed
|
||
by replace_placeholders. That's why we create a deep
|
||
copy of it via the walk function at every call.
|
||
– paths_to_placeholders can be used directly by
|
||
replace_placeholders because it is only traversed.
|
||
]]--
|
||
if template then
|
||
local paths_to_placeholders
|
||
local inlines_with_placeholders
|
||
return
|
||
function(instance_content, instance_attr)
|
||
if not inlines_with_placeholders then
|
||
inlines_with_placeholders = template_to_pandoc_fragment(template)
|
||
end
|
||
local inlines_copy = inlines_with_placeholders:walk({})
|
||
if not paths_to_placeholders then
|
||
paths_to_placeholders =
|
||
get_paths_to_placeholders(inlines_with_placeholders)
|
||
end
|
||
return replace_placeholders(
|
||
inlines_copy, paths_to_placeholders,
|
||
instance_content, instance_attr)
|
||
end
|
||
end
|
||
end
|
||
|
||
local function define_rendering_functions(meta)
|
||
local format_prefix = ''
|
||
if FORMAT == 'opendocument' then
|
||
format_prefix = 'odt_'
|
||
elseif FORMAT == 'openxml' then
|
||
format_prefix = 'docx_'
|
||
end
|
||
for class_name, class_config in pairs(config) do
|
||
config[class_name].render = {}
|
||
for _, part in ipairs({'body', 'note'}) do
|
||
config[class_name].render[part] =
|
||
template_to_function(class_config[format_prefix .. part .. '_text'])
|
||
end
|
||
end
|
||
end
|
||
|
||
local function set_macro_definition(meta)
|
||
if FORMAT == 'context' or FORMAT == 'latex' then
|
||
meta['header-includes'] = {
|
||
(meta['header-includes'] or pandoc.RawBlock(FORMAT, '')),
|
||
pandoc.RawBlock(FORMAT, DEFAULT_MACROS[FORMAT])
|
||
}
|
||
end
|
||
end
|
||
|
||
local function Meta(meta)
|
||
get_renderings_config(meta)
|
||
define_rendering_functions(meta)
|
||
set_macro_definition(meta)
|
||
return meta
|
||
end
|
||
|
||
local i_invocation = 0
|
||
|
||
local function wrap_in_raw_note_code(content, class_name)
|
||
-- content is a List of Inlines (output of replace_placeholders)
|
||
local margin_note = content
|
||
if FORMAT == 'context' or FORMAT == 'latex' then
|
||
local csname = config[class_name].csname or 'mrgnn'
|
||
insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT, '\\' .. csname .. '{'))
|
||
insert_in_table(margin_note, pandoc.RawInline(FORMAT, '}'))
|
||
elseif FORMAT == 'openxml' then
|
||
i_invocation = i_invocation + 1
|
||
local bookmark_id = 'mrgnn_' .. i_invocation
|
||
margin_note = {
|
||
pandoc.RawInline(
|
||
FORMAT,
|
||
'<w:bookmarkStart w:id="' .. bookmark_id ..
|
||
'" w:name="' .. bookmark_id .. '"/>' ..
|
||
'<w:bookmarkEnd w:id="' .. bookmark_id .. '"/>' ..
|
||
'<w:hyperlink w:anchor="' .. bookmark_id .. '" ' ..
|
||
'w:tooltip="' .. pandoc_to_string(margin_note) .. '">'),
|
||
pandoc.Span(
|
||
pandoc.Str(DOCX_TOOLTIP_ANCHOR),
|
||
{ ['custom-style'] = DOCX_TOOLTIP_ANCHOR_SYTLE }),
|
||
pandoc.RawInline(
|
||
FORMAT,
|
||
'</w:hyperlink>')
|
||
}
|
||
elseif FORMAT == 'opendocument' then
|
||
insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT,
|
||
'<office:annotation><text:p>'))
|
||
insert_in_table(margin_note, pandoc.RawInline(FORMAT,
|
||
'</text:p></office:annotation>'))
|
||
end
|
||
return margin_note
|
||
end
|
||
|
||
local function render_margin_notes(span)
|
||
for class_name, class_config in pairs(config) do
|
||
if span.classes:includes(class_name) then
|
||
local render_note = config[class_name].render.note
|
||
local render_body = config[class_name].render.body
|
||
local note_position = config[class_name].position
|
||
local margin_note = {}
|
||
local body = {}
|
||
if render_note then
|
||
margin_note = wrap_in_raw_note_code(
|
||
render_note(span.content, span.attributes), class_name)
|
||
end
|
||
if render_body then
|
||
body = render_body(span.content, span.attributes)
|
||
end
|
||
span.content = body
|
||
local span_with_note = {}
|
||
if note_position == 'before' then
|
||
span_with_note = margin_note
|
||
insert_in_table(span_with_note, span)
|
||
elseif note_position == 'after' then
|
||
span_with_note = { span, unpack_table(margin_note) }
|
||
else
|
||
error('Invalid value "' .. note_position .. '" ' ..
|
||
'for "position" in the definition of ' ..
|
||
'the margin-note class "' .. class_name '". ' ..
|
||
'Expected "before" or "after".')
|
||
end
|
||
return span_with_note
|
||
end
|
||
end
|
||
end
|
||
|
||
return {
|
||
{ Meta = Meta },
|
||
{ Span = render_margin_notes }
|
||
}
|