#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# ~license~
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
import re, os.path
from collections import UserDict
from appy import utils
from appy.pod import *
from appy.utils import css
from appy.utils.string import randomName
from appy.pod.odf_parser import OdfEnvironment, OdfParser
# Possible states for the styles parser
READING = 0 # Default state
PARSING_STYLE = 1 # Parsing a style definition
PARSING_M_STYLES = 2 # Parsing section "master-styles"
PARSING_P_LAYOUT = 3 # Parsing a page layout
# Dict types
dictTypes = (dict, UserDict)
bn = '\n'
# Error-related constants - - - - - - - - - - - - - - - - - - - - - - - - - - -
MAP_KO = 'The styles mapping must be a dictionary or a UserDict instance.'
KEY_STR_KO = "The styles mapping dictionary's keys must be strings."
VAL_STR_KO = 'The styles mapping value for key "%s" must be a string.'
VAL_EMPTY = 'In your styles mapping, you inserted an empty key and/or value.'
VAL_TYP_KO = 'For key "%s", the value must be of type "%s".'
UNSTYL_TAG = 'You can\'t associate a style to element "%s". Unstylable ' \
'elements are: %s'
STYLE_N_F = 'OpenDocument style "%s" was not found in your template. Note ' \
'that the styles names ("Heading 1", "Standard"...) that appear ' \
'when opening your template with OpenOffice, for example, are a ' \
'super-set of the styles that are really recorded into your ' \
'document. Indeed, only styles that are in use within your ' \
'template are actually recorded into the document. You may ' \
'consult the list of available styles programmatically by ' \
'calling your pod renderer\'s "getStyles" method.'
P_ODT_TXT = 'For XHTML element "%s", you must associate a paragraph-wide ' \
'OpenDocument style. "%s" is a "text" style (that applies to ' \
'only a chunk of text within a paragraph).'
TXT_ODT_P = 'For XHTML element "%s", you must associate an OpenDocument ' \
'"text" style (that applies to only a chunk of text within a ' \
'paragraph). "%s" is a paragraph-wide style.'
IMPLIC_PS = '%s: no explicit page style is defined in it, but attribute ' \
'"managePageStyles" has been set with an integer value. In that ' \
'case, you must explicitly define the default page style on the ' \
'first page of your POD template.'
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Properties:
'''Abstract base class for table and list properties'''
# HTML elements whose styles are defined by Property instances instead of
# Style instances.
elems = ('table', 'ol', 'ul')
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class TableProperties(Properties):
'''In a styles mapping, the value @key "table" must be an instance of this
class.'''
defaultMargins = (0.3, 0.0, 0.3, 0.0)
columnModifiersPrefixes = {'optimize': 'OCW', 'distribute': 'DC'}
# Predefined width of 100%
width100 = css.Value('width', '100%')
def __init__(self, pageWidth=None, px2cm=css.px2cm, cellPx2cm=10.0,
wideAbove=495, minColumnWidth=0.07, columnModifier=None,
minCellPadding=0.1, cellContentStyle='podCellContent',
headerContentStyle='podHeaderCellContent', margins=defaultMargins,
unbreakable=False, unbreakableRows=False, border=None,
prevails=False):
# pod computes, in cm, the width of the master page for a pod template.
# Table widths expressed as percentages will be based on it. But if your
# XHTML table(s) lie(s) within a section that has a specific page style
# with another width, specify it here (as a float value, in cm).
self.pageWidth = pageWidth
# Table widths expressed as pixels will use a "pixels to cm" ratio as
# defined in css.px2cm. If this is wrong for you, specify another ratio
# here. The width in cm will be computed as:
#
# (table width in pixels) / px2cm
#
self.px2cm = px2cm
# Table cell paddings may use another px / cm ratio. Indeed,
# cellspacing="1" is converted to 0.02cm with the standard ratio, which
# is low.
self.cellPx2cm = cellPx2cm
# Every table with no specified width will be "wide" (=100% width).
# If a table width is specified in px and is above the value defined
# here, it will be forced to 100%.
self.wideAbove = wideAbove
# pod ensures that every column will at least get a minimum width
# (expressed as a percentage: a float value between 0.0 and 1.0). You
# can change this minimum here.
self.minColumnWidth = minColumnWidth
# If a column modifier is specified, any parameter related to table and
# column widths is ignored: we will let LibreOffice (LO) compute himself
# the table and column widths via its algorithm
# "SetOptimalColumnWidths" if p_columnModifier is "optimize" or
# "DistributeColumns" if p_columnModifier is "distribute".
# This requires LO to run in server mode and the
# appy.pod.renderer.Renderer being launched with parameters
# optimalColumnWidths="OCW_.*"
# and distributeColumns="DC_.*"
self.columnModifier = columnModifier
# When cell padding is defined (CSS table property "border-spacing" or
# HTML table attribute "cellspacing"), a minimum value can be defined
# here, as a float value (cm). If no padding is defined, the default one
# from pod default style "podCell" is used and is 0.1cm.
self.minCellPadding = minCellPadding
# The styles to use for cell and cell header content. The default values
# correspond to styles defined in styles.xmlt.
self.cellContentStyle = cellContentStyle
self.headerContentStyle = headerContentStyle
# The table margins, as a tuple of 4 float values (cm):
# top, right, bottom and left margins.
self.margins = margins
# May the table be spread on several pages ?
self.unbreakable = unbreakable
# May a single table row be spread on several pages ?
self.unbreakableRows = unbreakableRows
# Table-wide border properties can be defined, for example:
# '0.018cm solid #000000'
# If defined, it will override the potential CSS value defined on tables
self.border = border
# If CSS attributes and corresponding TableProperties attributes are
# both encountered, who prevails ? If p_prevails is True,
# TableProperties attributes prevail.
self.prevails = prevails
def getWidth(self, attrs, original=False):
'''Return the table width as a css.Value object. p_attrs is a css.Styles
object containing parsed table attributes. If p_original is False,
p_self.wideAbove is not taken into account.'''
# Widths being "0" or not being floats are simply ignored
if not hasattr(attrs, 'width') or attrs.width.value == 0 or \
isinstance(attrs.width.value, str):
return self.width100
r = attrs.width
if original: return r
if self.wideAbove is not None and r.unit == 'px' and \
r.value > self.wideAbove:
return self.width100
return r
def getCellPadding(self, value):
'''CSS "border-spacing" is defined in p_value. This method gets the
final value, taking into account self.minCellPadding.'''
unit = value.unit
# We must get p_value in cm
if unit == 'cm':
val = value.value
elif unit == 'px':
val = float(value.value) / self.cellPx2cm
else:
# We do not support this
val = self.minCellPadding
# Return the max between the p_value and the minimum value
return max(val, self.minCellPadding)
def getMargins(self, attrs, getWidth=False):
'''Returns ODF properties allowing to define margins as specified by
self.margins or by CSS p_attrs. If no margin is defined, r_ is an
empty string.'''
# If p_getWidth is True, the result is a tuple, whose second element is
# the width (as a float representing cm) taken by potential left and/or
# right margins.
r = ''
i = -1
width = 0.0
for direction in css.Styles.directions:
i += 1
# Get the value from CSS attributes
cssValue = getattr(attrs, f'margin{direction}', None)
if cssValue: cssValue = cssValue.cm(formatted=False)
# Get the value as defined on p_self
tbValue = self.margins[i]
# Choose the prevailing value
if not self.prevails:
value = cssValue if cssValue is not None else tbValue
else:
value = tbValue if tbValue is not None else cssValue
if value is None: continue
# Determine the name of the corresponding ODF property
name = f'fo:margin-{direction}'
r = f'{r} {name}="{value:.2f}cm"'
if direction in css.Styles.directionsW:
width += value
return (r, width) if getWidth else r
@classmethod
def initStylesMapping(class_, stylesMapping, ocw, dc):
'''If our special regular expressions are in use in parameters
p_ocw ("optimalColumnWidths") or p_dc ("distributeColumns"), we
must provide specific style mapping entries allowing to map CSS
attribute "table-layout" and its values to column modifiers
"optimize" or "distribute".'''
if (ocw == class_.ocwRex) or (dc == class_.dcRex):
stylesMapping['table[table-layout=auto]'] = class_.ocw
stylesMapping['table[table-layout=fixed]'] = class_.dc
stylesMapping['table[table-layout=none]'] = class_.default
@classmethod
def init(class_):
'''Sets, on this p_class_, some static attributes related to column
width optimization.'''
# The regular expressions to give to converter.py for it to recognize
# tables whose column widths must be optimized or evenly distributed.
class_.ocwRex = 'OCW_.*'
class_.dcRex = 'DC_.*'
# The default TableProperties instance
class_.default = TableProperties()
# TableProperties instances with OCW/DC enabled
class_.ocw = TableProperties(columnModifier='optimize')
class_.dc = TableProperties(columnModifier='distribute')
TableProperties.init()
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class ListProperties(Properties):
'''Base abstract class for defining properties of a XHTML list'''
def __init__(self, levels, formats, delta, firstDelta, space, paraStyle):
# The number of indentation levels supported
self.levels = levels
# The list of format characters for bullets or numbers
self.formats = formats
# The number of inches to increment at each level (as a float)
self.delta = delta
# The first delta can be different than any other
self.firstDelta = firstDelta
# The space, in inches (as a float), between the bullet/number and the
# text.
self.space = space
# A specific style to apply to the inner paragraphs
self.paraStyle = paraStyle
# The number of levels can be > or < to the number of formats. In those
# cases, formats will be applied partially or cyclically to levels.
def dumpStyle(self, name):
'''Returns the OpenDocument style definition corresponding to this
instance.'''
r = []
fmt = utils.formatNumber
spaceBefore = 0
space = fmt(self.space, sep='.', removeTrailingZeros=True)
for i in range(self.levels):
# Determine if "delta" or "firstDelta" must be used
if i == 0 and self.firstDelta is not None:
delta = self.firstDelta
else:
delta = self.delta
spaceBefore += delta
sb = fmt(spaceBefore, sep='.', removeTrailingZeros=True)
attr = self.getLevelAttributes(i)
props = self.getTextProperties(i)
level = f' {bn} ' \
f'{props}{bn} ' \
f''
r.append(level)
return f'{bn}{bn.join(r)}{bn}' \
f''
def getTextProperties(self, i):
'''Allows to define text properties at level p_i'''
return ''
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class BulletedProperties(ListProperties):
'''In a styles mapping, the value @key "ul" must be an instance of this
class.'''
type = 'bullet'
defaultFormats = ('•', '◦', '▪')
textStyle = 'podBulletStyle'
def __init__(self, levels=4, formats=defaultFormats,
delta=0.32, firstDelta=0.10, space=0.30, paraStyle=None):
ListProperties.__init__(self, levels, formats, delta, firstDelta,
space, paraStyle)
def getLevelAttributes(self, i):
'''Dumps bullet-specific attributes for level p_i'''
# Get the bullet to render at this level
bullet = utils.getElementAt(self.formats, i)
return f'text:bullet-char="{bullet}"'
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class NumberedProperties(ListProperties):
'''In a styles mapping, the value @key "ol" must be an instance of this
class.'''
type = 'number'
defaultFormats = ('1',)
defaultSuffixes = ('.',)
textStyle = 'podNumberStyle'
def __init__(self, levels=4, formats=defaultFormats,
suffixes=defaultSuffixes, delta=0.32, firstDelta=0.08,
space=0.32, paraStyle=None):
ListProperties.__init__(self, levels, formats, delta, firstDelta,
space, paraStyle)
# The list of suffixes
self.suffixes = suffixes
def getLevelAttributes(self, i):
'''Dumps number-specific attributes for level p_i'''
# Get the number type and suffix to render at this level
suffix = utils.getElementAt(self.suffixes, i)
fmt = utils.getElementAt(self.formats, i)
return f'style:num-suffix="{suffix}" style:num-format="{fmt}"'
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class Style:
'''Represents an ODF style. Either parsed from an ODF file or used for
dumping a style into an ODF file.'''
numberRex = re.compile(r'(\d+)(.*)')
def __init__(self, name, family, defaults=None, outlineLevel=None):
self.name = name
self.family = family # May be 'paragraph', etc.
self.displayName = name
self.styleClass = None # May be 'text', 'list', etc.
self.fontSize = None
self.fontSizeUnit = None # May be pt, %, ...
# Were the styles lies within styles and substyles hierarchy
self.outlineLevel = outlineLevel
# A possible delta to apply to the outline level. Do not confuse this
# notion of delta, that applies when the outline level needs to be
# dumped as attribute to a "text:h" paragraph whose style is p_self,
# with the notion of delta being in use for a styles mapping (key "h*"),
# allowing to fine-tune the mapping between outline levels implicitly
# expressed on XHTML tags ("h1", "h2", etc) and ODF styles's outline
# levels.
self.outlineDelta = 0
# Namespace for the ODF "style-name" attribute corresponding to this
# style
self.styleNameNs = 'table' if family == 'table-cell' else 'text'
# Default ODF attributes for this style
self.defaults = defaults
# For some unknown reason, ODF parent-child links don't work
self.inheritWorks = family != 'table-cell'
def setFontSize(self, fontSize):
rexRes = self.numberRex.search(fontSize)
self.fontSize = int(rexRes.group(1))
self.fontSizeUnit = rexRes.group(2)
def __repr__(self):
'''p_self's short string representation'''
r = f'