-- margin-notes.lua -- A Pandoc Lua filter to create templated marginal notes based on classes. -- Copyright 2022 Bastien Dumont (bastien.dumont [at] posteo.net) -- This file is under the MIT License: see LICENSE for more details local find_in_string = string.find local gsub = string.gsub local get_substring = string.sub local unpack_table = table.unpack local insert_in_table = table.insert local table_pop = table.remove local pandoc_to_string = pandoc.utils.stringify -- No char in the placeholder string should have to be escaped -- when found in a URL or in a Lua pattern. local PLACEHOLDER_LABEL = 'MRGNN_PLACEHOLDER' local PLACEHOLDER_BEGIN = 'BEG_' local PLACEHOLDER_END = '_END' local PLACEHOLDER_REGEX = PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL .. '(.-)' .. PLACEHOLDER_LABEL .. PLACEHOLDER_END -- String indicating an indefined value in the output local UNDEFINED = '??' -- Default value for "position" ("before" or "after"). local POS_DEFAULT = 'after' local DOCX_TOOLTIP_ANCHOR = '*' local DOCX_TOOLTIP_ANCHOR_SYTLE = 'Tooltip anchor' local TEMPLATE_VARIABLE_MARKUP_CHAR = '§' local mc = TEMPLATE_VARIABLE_MARKUP_CHAR local TEMPLATE_VAR_REGEX = '%f[\\' .. mc .. ']' .. mc .. '(.-)%f[\\' .. mc .. ']' .. mc local DEFAULT_MACROS = { -- ConTeXt code adapted from -- https://wiki.contextgarden.net/Footnotes#Footnotes_in_the_margin context = '\\define\\placeMrgnn{%\n' .. ' \\inoutermargin{\\vtop{%\n' .. ' \\placelocalnotes[mrgnn][before=,after=]%\n' .. ' }}%\n' .. '}\n' .. '\n' .. '\\definenote\n' .. ' [mrgnn]\n' .. ' [location=text,\n' .. ' bodyfont=x,\n' .. ' next=\\placeMrgnn]\n' .. '\n' .. '\\setupnotation\n' .. ' [mrgnn]\n' .. ' [number=no,\n' .. ' alternative=serried]', latex = '\\newcommand{\\mrgnn}[1]{%\n' .. ' \\marginpar{{\\footnotesize #1}}%\n' .. '}' } if FORMAT == 'odt' then FORMAT = 'opendocument' elseif FORMAT == 'docx' then FORMAT = 'openxml' end local function copy_unidimensional_table(the_table) return { unpack_table(the_table) } end local function add_copy_to_list(value, list) -- Value is of any type except thread and userdata. local to_be_added if type(value) == 'table' then to_be_added = copy_unidimensional_table(value) else to_be_added = value end insert_in_table(list, to_be_added) end local function to_bool(var) return not not var end local function inlines_to_string(list) return pandoc_to_string(pandoc.Para(list)) end local config = {} function add_rendering(config, class, body_text, note_text, docx_body_text, docx_note_text, odt_body_text, odt_note_text, csname, position) -- The values are plain strings (not the empty string!) or nil. config[class] = { body_text = body_text, note_text = note_text, docx_body_text = docx_body_text, docx_note_text = docx_note_text, odt_body_text = odt_body_text, odt_note_text = odt_note_text, csname = csname, position = position } end local function meta_to_str(meta_obj) if meta_obj and meta_obj[1] then return meta_obj[1].text else return nil end end local function get_renderings_config(meta) for key, value in pairs(meta) do if key == 'mrgnn-define-renderings' then for i = 1, #value do this_value = value[i] add_rendering(config, meta_to_str(this_value.class), meta_to_str(this_value['body-text']), meta_to_str(this_value['note-text']), meta_to_str(this_value['docx-body-text']), meta_to_str(this_value['docx-note-text']), meta_to_str(this_value['odt-body-text'] or this_value['body-text']), meta_to_str(this_value['odt-note-text'] or this_value['note-text']), meta_to_str(this_value['csname']), meta_to_str(this_value['position']) or POS_DEFAULT ) end end end return meta end local function to_pandoc_inlines(markdown_str) local inlines = {} if markdown_str ~= '' then local whole_doc = pandoc.read(markdown_str) inlines = whole_doc.blocks[1].content end return inlines end local function template_to_pandoc_fragment(template) -- The substitution is necessary in order to differentiate -- the '§' characters that are part of placeholder markup -- and the litteral ones. template = gsub((template or ''), TEMPLATE_VAR_REGEX, PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL .. '%1' .. PLACEHOLDER_LABEL .. PLACEHOLDER_END) return to_pandoc_inlines(template) end local function has_children(elem) return type(elem) == 'table' or type(elem) == 'userdata' end local function contains_placeholder(str) -- Although it would speed up the process, -- we don't exclude any string based on its key (e.g. "tag") -- because users could use the same names for attributes. local result = false result = to_bool(find_in_string(str, PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL .. '.-'.. PLACEHOLDER_LABEL .. PLACEHOLDER_END)) return result end local function register_step_in_path(path, step) insert_in_table(path, step) end local function unregister_last_step(path) table_pop(path) end local function find_paths_to_placeholders(current_path, placeholders_paths, current_table) --[[ current_path represents a path to a placeholder, i.e. an unidimensional table of alternated numbers and strings figuring the successive index and key values at which the placeholder string is to be found in list. list is a List of Inlines. When two placeholders are to be found in the same string, only one path is returned. ]]-- for index, elem in pairs(current_table) do if has_children(elem) then register_step_in_path(current_path, index) find_paths_to_placeholders(current_path, placeholders_paths, elem) unregister_last_step(current_path) elseif type(elem) == 'string' then if contains_placeholder(elem) then register_step_in_path(current_path, index) add_copy_to_list(current_path, placeholders_paths) unregister_last_step(current_path) end end end end local function get_paths_to_placeholders(list) --[[ list is a List of Inlines Returns a table of paths (see find_paths_to_placeholders). If two placeholders are to be found in the same string, then only one path is returned. Example: template = { pandoc.SmallCaps(pandoc.Str( PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL .. 'content' .. PLACEHOLDER_LABEL .. PLACEHOLDER_END)), pandoc.Str(':'), pandoc.Space(), pandoc.Emph(pandoc.Str( PLACEHOLDER_BEGIN .. PLACEHOLDER_LABEL .. 'attr.def' .. '.' .. PLACEHOLDER_LABEL .. PLACEHOLDER_END)) } return value = { { 1, 'content', 1, 'text' }, { 4, 'content', 1, 'text' } } ]]-- local placeholders_paths = {} local current_path = {} find_paths_to_placeholders(current_path, placeholders_paths, list) return placeholders_paths end local function is_replacement_a_list(parent_object_type) --[[ If parent_object_type is Str, this means that the object containing the placeholder is an element in a list of Inlines. In this case, the replacement should be a list of Inlines destined to replace this element. Otherwise (e.g. Link or Image), the replacement should be a plain string. ]]-- return parent_object_type == 'Str' end local function get_replacement(placeholder, instance_content, instance_attr, replacement_is_list) --[[ Returns a list of Inlines if replacement_is_list is true, a plain string otherwise. Markdown is interpreted in the first case only. ]]-- if placeholder == 'content' then if replacement_is_list then replacement = instance_content else replacement = inlines_to_string(instance_content) end elseif string.find(placeholder, '^attr%.') then local key = get_substring(placeholder, #'attr.' + 1) if replacement_is_list then local replacement_markdown = instance_attr[key] or '**' .. UNDEFINED .. '**' replacement = to_pandoc_inlines(replacement_markdown) else replacement = instance_attr[key] or UNDEFINED end else error('Invalid content "' .. placeholder .. '" in the value of a ' .. '"body-text" or "note-text" metadata variable. ' .. 'It must either be "content" or begin with "attr".') end return replacement end local function get_strings_around_substring(s, i_beg, i_end) local before = false local after = false if i_beg > 1 then before = get_substring(s, 1, i_beg - 1) end if i_end < #s then after = get_substring(s, i_end + 1) end return before, after end local function insert_strings_around_placeholder(replacement, string_before_placeholder, string_after_placeholder) local replacement_is_list = type(replacement) == 'table' if replacement_is_list then if string_before_placeholder then if replacement[1].t == 'Str' then replacement[1].text = string_before_placeholder .. replacement[1].text else insert_in_table(replacement, 1, pandoc.Str(string_before_placeholder)) end end if string_after_placeholder then if replacement[#replacement].t == 'Str' then replacement[#replacement].text = replacement[#replacement].text .. string_after_placeholder else insert_in_table(replacement, pandoc.Str(string_after_placeholder)) end end else replacement = (string_before_placeholder or '') .. replacement .. (string_after_placeholder or '') end return replacement end local function insert_replacement_in_elems(replacement, pandoc_elems, i_object, placeholder_key, i_placeholder_beg, i_placeholder_end) local replacement_is_list = type(replacement) == 'table' local string_with_placeholder = pandoc_elems[i_object][placeholder_key] local string_before_placeholder, string_after_placeholder = get_strings_around_substring(string_with_placeholder, i_placeholder_beg, i_placeholder_end) replacement = insert_strings_around_placeholder(replacement, string_before_placeholder, string_after_placeholder) if replacement_is_list then for i = #replacement, 1, -1 do insert_in_table(pandoc_elems, i_object + 1, replacement[i]) end table_pop(pandoc_elems, i_object) else pandoc_elems[i_object][placeholder_key] = replacement end end local function find_placeholders_in_string(str_with_placeholders) local placeholders_data = {} local i_data = 1 local i_beg, i_end, placeholder = find_in_string(str_with_placeholders, PLACEHOLDER_REGEX) while placeholder do placeholders_data[i_data] = { value = placeholder, beg = i_beg, ['end'] = i_end } i_beg, i_end, placeholder = find_in_string(str_with_placeholders, PLACEHOLDER_REGEX, i_end) i_data = i_data + 1 end return placeholders_data end local function replace_placeholders_in_value(pandoc_elems, i_object, placeholder_key, instance_content, instance_attr) --[[ Does not return anything: modifies instead pandoc_elems by replacing the placeholder in pandoc_elems[key] with the corresponding values from instance_content (a List of Inlines) and instance_attr (a table of key/value pairs, where the values may contain markdown formatting). ]]-- local str_with_placeholders = pandoc_elems[i_object][placeholder_key] local replacement_is_list = is_replacement_a_list(pandoc_elems[i_object].t) local placeholders_data = find_placeholders_in_string(str_with_placeholders) for i = #placeholders_data, 1, -1 do local placeholder_data = placeholders_data[i] local replacement = get_replacement(placeholder_data.value, instance_content, instance_attr, replacement_is_list) insert_replacement_in_elems(replacement, pandoc_elems, i_object, placeholder_key, placeholder_data.beg, placeholder_data['end']) end end local function replace_placeholders( -- table of Inlines, generally containing Str objects -- whose text contains a placeholder inlines_with_placeholders, -- table of paths (see find_paths_to_placeholders for a definition) paths_to_placeholders, -- List of inlines instance_content, -- key-value table of attributes. The values may contain markdown. instance_attr) --[[ Replaces the Str objects in inlines_with_placeholders pointed at by the paths in paths_to_placeholders with the data in instance_content and instance_attr as required by the placeholder strings. If the placeholder string does not makes up the whole text of the Str, create new Str containing the remaining chars. Returns a new table containing the resulting Inlines. ]]-- if #paths_to_placeholders > 0 then for i_path = #paths_to_placeholders, 1, -1 do local path = paths_to_placeholders[i_path] local current_scope = inlines_with_placeholders local i_step = 1 local step = path[i_step] while i_step < #path - 1 do current_scope = current_scope[step] i_step = i_step + 1 step = path[i_step] end local last_step = path[i_step + 1] replace_placeholders_in_value(current_scope, step, last_step, instance_content, instance_attr) end end return inlines_with_placeholders end local function template_to_function(template) --[[ Returns a function that takes data and inserts it into the given template. ]]-- --[[ – inlines_with_placeholders and paths_to_placeholders are created and memoized the first time the returned function is called. – inlines_with_placeholders cannot be used directly for it is a reference to a table that would be changed by replace_placeholders. That's why we create a deep copy of it via the walk function at every call. – paths_to_placeholders can be used directly by replace_placeholders because it is only traversed. ]]-- if template then local paths_to_placeholders local inlines_with_placeholders return function(instance_content, instance_attr) if not inlines_with_placeholders then inlines_with_placeholders = template_to_pandoc_fragment(template) end local inlines_copy = inlines_with_placeholders:walk({}) if not paths_to_placeholders then paths_to_placeholders = get_paths_to_placeholders(inlines_with_placeholders) end return replace_placeholders( inlines_copy, paths_to_placeholders, instance_content, instance_attr) end end end local function define_rendering_functions(meta) local format_prefix = '' if FORMAT == 'opendocument' then format_prefix = 'odt_' elseif FORMAT == 'openxml' then format_prefix = 'docx_' end for class_name, class_config in pairs(config) do config[class_name].render = {} for _, part in ipairs({'body', 'note'}) do config[class_name].render[part] = template_to_function(class_config[format_prefix .. part .. '_text']) end end end local function set_macro_definition(meta) if FORMAT == 'context' or FORMAT == 'latex' then meta['header-includes'] = { (meta['header-includes'] or pandoc.RawBlock(FORMAT, '')), pandoc.RawBlock(FORMAT, DEFAULT_MACROS[FORMAT]) } end end local function Meta(meta) get_renderings_config(meta) define_rendering_functions(meta) set_macro_definition(meta) return meta end local i_invocation = 0 local function wrap_in_raw_note_code(content, class_name) -- content is a List of Inlines (output of replace_placeholders) local margin_note = content if FORMAT == 'context' or FORMAT == 'latex' then local csname = config[class_name].csname or 'mrgnn' insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT, '\\' .. csname .. '{')) insert_in_table(margin_note, pandoc.RawInline(FORMAT, '}')) elseif FORMAT == 'openxml' then i_invocation = i_invocation + 1 local bookmark_id = 'mrgnn_' .. i_invocation margin_note = { pandoc.RawInline( FORMAT, '' .. '' .. ''), pandoc.Span( pandoc.Str(DOCX_TOOLTIP_ANCHOR), { ['custom-style'] = DOCX_TOOLTIP_ANCHOR_SYTLE }), pandoc.RawInline( FORMAT, '') } elseif FORMAT == 'opendocument' then insert_in_table(margin_note, 1, pandoc.RawInline(FORMAT, '')) insert_in_table(margin_note, pandoc.RawInline(FORMAT, '')) end return margin_note end local function render_margin_notes(span) for class_name, class_config in pairs(config) do if span.classes:includes(class_name) then local render_note = config[class_name].render.note local render_body = config[class_name].render.body local note_position = config[class_name].position local margin_note = {} local body = {} if render_note then margin_note = wrap_in_raw_note_code( render_note(span.content, span.attributes), class_name) end if render_body then body = render_body(span.content, span.attributes) end span.content = body local span_with_note = {} if note_position == 'before' then span_with_note = margin_note insert_in_table(span_with_note, span) elseif note_position == 'after' then span_with_note = { span, unpack_table(margin_note) } else error('Invalid value "' .. note_position .. '" ' .. 'for "position" in the definition of ' .. 'the margin-note class "' .. class_name '". ' .. 'Expected "before" or "after".') end return span_with_note end end end return { { Meta = Meta }, { Span = render_margin_notes } }