#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # ~license~ #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - import re, os.path from collections import UserDict from appy import utils from appy.pod import * from appy.utils import css from appy.utils.string import randomName from appy.pod.odf_parser import OdfEnvironment, OdfParser # Possible states for the styles parser READING = 0 # Default state PARSING_STYLE = 1 # Parsing a style definition PARSING_M_STYLES = 2 # Parsing section "master-styles" PARSING_P_LAYOUT = 3 # Parsing a page layout # Dict types dictTypes = (dict, UserDict) bn = '\n' # Error-related constants - - - - - - - - - - - - - - - - - - - - - - - - - - - MAP_KO = 'The styles mapping must be a dictionary or a UserDict instance.' KEY_STR_KO = "The styles mapping dictionary's keys must be strings." VAL_STR_KO = 'The styles mapping value for key "%s" must be a string.' VAL_EMPTY = 'In your styles mapping, you inserted an empty key and/or value.' VAL_TYP_KO = 'For key "%s", the value must be of type "%s".' UNSTYL_TAG = 'You can\'t associate a style to element "%s". Unstylable ' \ 'elements are: %s' STYLE_N_F = 'OpenDocument style "%s" was not found in your template. Note ' \ 'that the styles names ("Heading 1", "Standard"...) that appear ' \ 'when opening your template with OpenOffice, for example, are a ' \ 'super-set of the styles that are really recorded into your ' \ 'document. Indeed, only styles that are in use within your ' \ 'template are actually recorded into the document. You may ' \ 'consult the list of available styles programmatically by ' \ 'calling your pod renderer\'s "getStyles" method.' P_ODT_TXT = 'For XHTML element "%s", you must associate a paragraph-wide ' \ 'OpenDocument style. "%s" is a "text" style (that applies to ' \ 'only a chunk of text within a paragraph).' TXT_ODT_P = 'For XHTML element "%s", you must associate an OpenDocument ' \ '"text" style (that applies to only a chunk of text within a ' \ 'paragraph). "%s" is a paragraph-wide style.' IMPLIC_PS = '%s: no explicit page style is defined in it, but attribute ' \ '"managePageStyles" has been set with an integer value. In that ' \ 'case, you must explicitly define the default page style on the ' \ 'first page of your POD template.' #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Properties: '''Abstract base class for table and list properties''' # HTML elements whose styles are defined by Property instances instead of # Style instances. elems = ('table', 'ol', 'ul') #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class TableProperties(Properties): '''In a styles mapping, the value @key "table" must be an instance of this class.''' defaultMargins = (0.3, 0.0, 0.3, 0.0) columnModifiersPrefixes = {'optimize': 'OCW', 'distribute': 'DC'} # Predefined width of 100% width100 = css.Value('width', '100%') def __init__(self, pageWidth=None, px2cm=css.px2cm, cellPx2cm=10.0, wideAbove=495, minColumnWidth=0.07, columnModifier=None, minCellPadding=0.1, cellContentStyle='podCellContent', headerContentStyle='podHeaderCellContent', margins=defaultMargins, unbreakable=False, unbreakableRows=False, border=None, prevails=False): # pod computes, in cm, the width of the master page for a pod template. # Table widths expressed as percentages will be based on it. But if your # XHTML table(s) lie(s) within a section that has a specific page style # with another width, specify it here (as a float value, in cm). self.pageWidth = pageWidth # Table widths expressed as pixels will use a "pixels to cm" ratio as # defined in css.px2cm. If this is wrong for you, specify another ratio # here. The width in cm will be computed as: # # (table width in pixels) / px2cm # self.px2cm = px2cm # Table cell paddings may use another px / cm ratio. Indeed, # cellspacing="1" is converted to 0.02cm with the standard ratio, which # is low. self.cellPx2cm = cellPx2cm # Every table with no specified width will be "wide" (=100% width). # If a table width is specified in px and is above the value defined # here, it will be forced to 100%. self.wideAbove = wideAbove # pod ensures that every column will at least get a minimum width # (expressed as a percentage: a float value between 0.0 and 1.0). You # can change this minimum here. self.minColumnWidth = minColumnWidth # If a column modifier is specified, any parameter related to table and # column widths is ignored: we will let LibreOffice (LO) compute himself # the table and column widths via its algorithm # "SetOptimalColumnWidths" if p_columnModifier is "optimize" or # "DistributeColumns" if p_columnModifier is "distribute". # This requires LO to run in server mode and the # appy.pod.renderer.Renderer being launched with parameters # optimalColumnWidths="OCW_.*" # and distributeColumns="DC_.*" self.columnModifier = columnModifier # When cell padding is defined (CSS table property "border-spacing" or # HTML table attribute "cellspacing"), a minimum value can be defined # here, as a float value (cm). If no padding is defined, the default one # from pod default style "podCell" is used and is 0.1cm. self.minCellPadding = minCellPadding # The styles to use for cell and cell header content. The default values # correspond to styles defined in styles.xmlt. self.cellContentStyle = cellContentStyle self.headerContentStyle = headerContentStyle # The table margins, as a tuple of 4 float values (cm): # top, right, bottom and left margins. self.margins = margins # May the table be spread on several pages ? self.unbreakable = unbreakable # May a single table row be spread on several pages ? self.unbreakableRows = unbreakableRows # Table-wide border properties can be defined, for example: # '0.018cm solid #000000' # If defined, it will override the potential CSS value defined on tables self.border = border # If CSS attributes and corresponding TableProperties attributes are # both encountered, who prevails ? If p_prevails is True, # TableProperties attributes prevail. self.prevails = prevails def getWidth(self, attrs, original=False): '''Return the table width as a css.Value object. p_attrs is a css.Styles object containing parsed table attributes. If p_original is False, p_self.wideAbove is not taken into account.''' # Widths being "0" or not being floats are simply ignored if not hasattr(attrs, 'width') or attrs.width.value == 0 or \ isinstance(attrs.width.value, str): return self.width100 r = attrs.width if original: return r if self.wideAbove is not None and r.unit == 'px' and \ r.value > self.wideAbove: return self.width100 return r def getCellPadding(self, value): '''CSS "border-spacing" is defined in p_value. This method gets the final value, taking into account self.minCellPadding.''' unit = value.unit # We must get p_value in cm if unit == 'cm': val = value.value elif unit == 'px': val = float(value.value) / self.cellPx2cm else: # We do not support this val = self.minCellPadding # Return the max between the p_value and the minimum value return max(val, self.minCellPadding) def getMargins(self, attrs, getWidth=False): '''Returns ODF properties allowing to define margins as specified by self.margins or by CSS p_attrs. If no margin is defined, r_ is an empty string.''' # If p_getWidth is True, the result is a tuple, whose second element is # the width (as a float representing cm) taken by potential left and/or # right margins. r = '' i = -1 width = 0.0 for direction in css.Styles.directions: i += 1 # Get the value from CSS attributes cssValue = getattr(attrs, f'margin{direction}', None) if cssValue: cssValue = cssValue.cm(formatted=False) # Get the value as defined on p_self tbValue = self.margins[i] # Choose the prevailing value if not self.prevails: value = cssValue if cssValue is not None else tbValue else: value = tbValue if tbValue is not None else cssValue if value is None: continue # Determine the name of the corresponding ODF property name = f'fo:margin-{direction}' r = f'{r} {name}="{value:.2f}cm"' if direction in css.Styles.directionsW: width += value return (r, width) if getWidth else r @classmethod def initStylesMapping(class_, stylesMapping, ocw, dc): '''If our special regular expressions are in use in parameters p_ocw ("optimalColumnWidths") or p_dc ("distributeColumns"), we must provide specific style mapping entries allowing to map CSS attribute "table-layout" and its values to column modifiers "optimize" or "distribute".''' if (ocw == class_.ocwRex) or (dc == class_.dcRex): stylesMapping['table[table-layout=auto]'] = class_.ocw stylesMapping['table[table-layout=fixed]'] = class_.dc stylesMapping['table[table-layout=none]'] = class_.default @classmethod def init(class_): '''Sets, on this p_class_, some static attributes related to column width optimization.''' # The regular expressions to give to converter.py for it to recognize # tables whose column widths must be optimized or evenly distributed. class_.ocwRex = 'OCW_.*' class_.dcRex = 'DC_.*' # The default TableProperties instance class_.default = TableProperties() # TableProperties instances with OCW/DC enabled class_.ocw = TableProperties(columnModifier='optimize') class_.dc = TableProperties(columnModifier='distribute') TableProperties.init() #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class ListProperties(Properties): '''Base abstract class for defining properties of a XHTML list''' def __init__(self, levels, formats, delta, firstDelta, space, paraStyle): # The number of indentation levels supported self.levels = levels # The list of format characters for bullets or numbers self.formats = formats # The number of inches to increment at each level (as a float) self.delta = delta # The first delta can be different than any other self.firstDelta = firstDelta # The space, in inches (as a float), between the bullet/number and the # text. self.space = space # A specific style to apply to the inner paragraphs self.paraStyle = paraStyle # The number of levels can be > or < to the number of formats. In those # cases, formats will be applied partially or cyclically to levels. def dumpStyle(self, name): '''Returns the OpenDocument style definition corresponding to this instance.''' r = [] fmt = utils.formatNumber spaceBefore = 0 space = fmt(self.space, sep='.', removeTrailingZeros=True) for i in range(self.levels): # Determine if "delta" or "firstDelta" must be used if i == 0 and self.firstDelta is not None: delta = self.firstDelta else: delta = self.delta spaceBefore += delta sb = fmt(spaceBefore, sep='.', removeTrailingZeros=True) attr = self.getLevelAttributes(i) props = self.getTextProperties(i) level = f' {bn} ' \ f'{props}{bn} ' \ f'' r.append(level) return f'{bn}{bn.join(r)}{bn}' \ f'' def getTextProperties(self, i): '''Allows to define text properties at level p_i''' return '' #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class BulletedProperties(ListProperties): '''In a styles mapping, the value @key "ul" must be an instance of this class.''' type = 'bullet' defaultFormats = ('•', '◦', '▪') textStyle = 'podBulletStyle' def __init__(self, levels=4, formats=defaultFormats, delta=0.32, firstDelta=0.10, space=0.30, paraStyle=None): ListProperties.__init__(self, levels, formats, delta, firstDelta, space, paraStyle) def getLevelAttributes(self, i): '''Dumps bullet-specific attributes for level p_i''' # Get the bullet to render at this level bullet = utils.getElementAt(self.formats, i) return f'text:bullet-char="{bullet}"' #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class NumberedProperties(ListProperties): '''In a styles mapping, the value @key "ol" must be an instance of this class.''' type = 'number' defaultFormats = ('1',) defaultSuffixes = ('.',) textStyle = 'podNumberStyle' def __init__(self, levels=4, formats=defaultFormats, suffixes=defaultSuffixes, delta=0.32, firstDelta=0.08, space=0.32, paraStyle=None): ListProperties.__init__(self, levels, formats, delta, firstDelta, space, paraStyle) # The list of suffixes self.suffixes = suffixes def getLevelAttributes(self, i): '''Dumps number-specific attributes for level p_i''' # Get the number type and suffix to render at this level suffix = utils.getElementAt(self.suffixes, i) fmt = utils.getElementAt(self.formats, i) return f'style:num-suffix="{suffix}" style:num-format="{fmt}"' #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - class Style: '''Represents an ODF style. Either parsed from an ODF file or used for dumping a style into an ODF file.''' numberRex = re.compile(r'(\d+)(.*)') def __init__(self, name, family, defaults=None, outlineLevel=None): self.name = name self.family = family # May be 'paragraph', etc. self.displayName = name self.styleClass = None # May be 'text', 'list', etc. self.fontSize = None self.fontSizeUnit = None # May be pt, %, ... # Were the styles lies within styles and substyles hierarchy self.outlineLevel = outlineLevel # A possible delta to apply to the outline level. Do not confuse this # notion of delta, that applies when the outline level needs to be # dumped as attribute to a "text:h" paragraph whose style is p_self, # with the notion of delta being in use for a styles mapping (key "h*"), # allowing to fine-tune the mapping between outline levels implicitly # expressed on XHTML tags ("h1", "h2", etc) and ODF styles's outline # levels. self.outlineDelta = 0 # Namespace for the ODF "style-name" attribute corresponding to this # style self.styleNameNs = 'table' if family == 'table-cell' else 'text' # Default ODF attributes for this style self.defaults = defaults # For some unknown reason, ODF parent-child links don't work self.inheritWorks = family != 'table-cell' def setFontSize(self, fontSize): rexRes = self.numberRex.search(fontSize) self.fontSize = int(rexRes.group(1)) self.fontSizeUnit = rexRes.group(2) def __repr__(self): '''p_self's short string representation''' r = f'