docs/mdx_spantables.py

"""
SpanTables Extension for Python-Markdown
========================================
This is a slightly modified version of the tables extension that comes with
python-markdown.
To span cells across multiple columns make sure the cells end with multiple
consecutive vertical bars. To span cells across rows fill the cell on the last
row with at least one underscore at the start or end of its content and no
other characters than spaces or underscores.
For example:
    | head1           | head2 |
    |-----------------|-------|
    | span two cols          ||
    | span two rows   |       |
    |_                |       |
See <https://pythonhosted.org/Markdown/extensions/tables.html>
for documentation of the original extension.
Original code Copyright 2009 [Waylan Limberg](http://achinghead.com)
SpanTables changes Copyright 2016 [Maurice van der Pot](griffon26@kfk4ever.com)
License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
"""

from __future__ import unicode_literals
from markdown.extensions.tables import TableProcessor
from markdown.extensions import Extension
import xml.etree.ElementTree as etree


class SpanTableProcessor(TableProcessor):
    """ Process Tables. """
    def test(self, parent, block):
        rows = block.split('\n')
        return (len(rows) > 1 and '|' in rows[0] and
                '|' in rows[1] and '-' in rows[1] and
                rows[1].strip()[0] in ['|', ':', '-'])

    def is_end_of_rowspan(self, td):
        return ((td is not None) and
                (td.text.startswith('^') or td.text.endswith('^')) and
                (td.text.strip('^ ') == ''))

    def apply_rowspans(self, tbody):
        table_cells = {}

        rows = tbody.findall('tr')
        max_cols = 0
        max_rows = len(rows)
        for y, tr in enumerate(rows):

            cols = tr.findall('td')

            x = 0
            for td in cols:

                colspan_str = td.get('colspan')
                colspan = int(colspan_str) if colspan_str else 1

                # Insert the td together with its parent
                table_cells[(x, y)] = (tr, td)

                x += colspan

            max_cols = max(max_cols, x)

        for x in range(max_cols):
            possible_cells_in_rowspan = 0
            current_colspan = None

            for y in range(max_rows):
                _, td = table_cells.get((x, y), (None, None))

                if td is None:
                    possible_cells_in_rowspan = 0

                else:
                    colspan = td.get('colspan')
                    if colspan != current_colspan:
                        current_colspan = colspan
                        possible_cells_in_rowspan = 0

                    if not td.text:
                        possible_cells_in_rowspan += 1

                    elif self.is_end_of_rowspan(td):
                        td.text = ''
                        possible_cells_in_rowspan += 1
                        first_cell_of_rowspan_y = y - (possible_cells_in_rowspan - 1)
                        for del_y in range(y, first_cell_of_rowspan_y, -1):
                            tr, td = table_cells.get((x, del_y))
                            tr.remove(td)
                        _, first_cell = table_cells.get((x, first_cell_of_rowspan_y))
                        first_cell.set('rowspan', str(possible_cells_in_rowspan))

                        possible_cells_in_rowspan = 0

                    else:
                        possible_cells_in_rowspan = 1

    def run(self, parent, blocks):
        """ Parse a table block and build table. """
        block = blocks.pop(0).split('\n')
        header = block[0].strip()
        seperator = block[1].strip()
        rows = [] if len(block) < 3 else block[2:]
        # Get format type (bordered by pipes or not)
        border = False
        if header.startswith('|'):
            border = True
        # Get alignment of columns
        align = []
        for c in self._split_row(seperator, border):
            if c.startswith(':') and c.endswith(':'):
                align.append('center')
            elif c.startswith(':'):
                align.append('left')
            elif c.endswith(':'):
                align.append('right')
            else:
                align.append(None)
        # Build table
        table = etree.SubElement(parent, 'table')
        thead = etree.SubElement(table, 'thead')
        self._build_row(header, thead, align, border)
        tbody = etree.SubElement(table, 'tbody')
        for row in rows:
            self._build_row(row.strip(), tbody, align, border)

        self.apply_rowspans(tbody)

    def _build_row(self, row, parent, align, border):
        """ Given a row of text, build table cells. """
        tr = etree.SubElement(parent, 'tr')
        tag = 'td'
        if parent.tag == 'thead':
            tag = 'th'
        cells = self._split_row(row, border)
        c = None
        # We use align here rather than cells to ensure every row
        # contains the same number of columns.
        for i, a in enumerate(align):

            # After this None indicates that the cell before it should span
            # this column and '' indicates an cell without content
            try:
                text = cells[i]
                if text == '':
                    text = None
            except IndexError:  # pragma: no cover
                text = ''

            # No text after split indicates colspan
            if text is None or text.strip() == "<":
                if c is not None:
                    colspan_str = c.get('colspan')
                    colspan = int(colspan_str) if colspan_str else 1
                    c.set('colspan', str(colspan + 1))
                else:
                    # if this is the first cell, then fall back to creating an empty cell
                    text = ''
            else:
                c = etree.SubElement(tr, tag)
                c.text = text.strip()

            if a:
                c.set('align', a)

    def _split_row(self, row, border):
        """ split a row of text into list of cells. """
        if border:
            if row.startswith('|'):
                row = row[1:]
            if row.endswith('|'):
                row = row[:-1]
        return self._split(row)


class TableExtension(Extension):
    """ Add tables to Markdown. """

    def extendMarkdown(self, md):
        """ Add an instance of SpanTableProcessor to BlockParser. """
        md.parser.blockprocessors.register(SpanTableProcessor(md.parser, {}), 'spantable', 1000)


def makeExtension(*args, **kwargs):
    return TableExtension(*args, **kwargs)