#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090308
# eLyXer main script


import sys
import codecs
#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090131
# eLyXer trace library

import sys

class Trace(object):
  "A tracing class"

  debugmode = False
  quietmode = False

  def debug(cls, message):
    "Show a debug message"
    if Trace.debugmode and not Trace.quietmode:
      print message

  def message(cls, message):
    "Show a trace message"
    if not Trace.quietmode:
      print message

  def error(cls, message):
    "Show an error message"
    message = message.encode('utf-8')
    sys.stderr.write(message + '\n')

  debug = classmethod(debug)
  message = classmethod(message)
  error = classmethod(error)

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090308
# File line management for eLyXer

import sys


class LineReader(object):
  "Reads a file line by line"

  def __init__(self, file):
    self.file = file
    self.linenumber = 0
    self.current = None
    self.split = None

  def currentline(self):
    "Get the current line"
    if not self.current:
      self.current = self.file.readline()
      if self.file == sys.stdin:
        self.current = self.current.decode('utf-8')
    return self.current

  def currentnonblank(self):
    "Get the current nonblank line"
    while (self.currentline() == '\n'):
      self.nextline()
    return self.currentline()

  def currentsplit(self):
    "Get the current nonblank line, split into words"
    if not self.split:
      self.split = self.currentnonblank().split()
    return self.split

  def nextline(self):
    "Go to next line"
    self.current = None
    self.split = None
    self.linenumber += 1
    if self.linenumber % 1000 == 0:
      Trace.message('Parsing line ' + str(self.linenumber))

  def finished(self):
    "Have we finished reading the file"
    if len(self.currentline()) == 0:
      return True
    return False

  def close(self):
    self.file.close()

class HtmlWriter(object):
  "Writes an HTML file as a series of lists"

  def __init__(self, file):
    self.file = file

  def write(self, html):
    "Write a list of lines"
    for line in html:
      self.writeline(line)

  def writeline(self, line):
    "Write a line to file"
    if self.file == sys.stdout:
      line = line.encode('utf-8')
    self.file.write(line)

  def close(self):
    self.file.close()

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090311
# LyX styles in containers

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090203
# eLyXer parsers

import codecs
#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090313
# eLyXer runtime options

import codecs


class Options(object):
  "A set of runtime options"

  nocopy = False
  debug = False
  quiet = False
  css = 'http://www.nongnu.org/elyxer/lyx.css'
  title = 'Converted document'
  directory = '.'
  branches = dict()

  def parseoptions(self, args):
    "Parse command line options"
    while args[0].startswith('--'):
      if args[0] == '--help':
        return 'eLyXer help'
      key, value = self.readoption(args)
      if not key:
        return 'Option ' + original + ' not recognized'
      if not value:
        return 'Option ' + key + ' needs a value'
      self.setoption(key, value)
    return None

  def readoption(self, args):
    "Read the key and value for an option"
    arg = args[0][2:]
    del args[0]
    if '=' in arg:
      return self.readequals(arg, args)
    key = arg
    if not hasattr(Options, key):
      return None, None
    current = getattr(Options, key)
    if current.__class__ == bool:
      return key, True
    # read value
    if len(args) == 0:
      return key, None
    if args[0].startswith('"'):
      initial = args[0]
      del args[0]
      return key, self.readquoted(args, initial)
    value = args[0]
    del args[0]
    return key, value

  def readquoted(self, args, initial):
    "Read a value between quotes"
    value = initial[1:]
    while len(args) > 0 and not args[0].endswith('"') and not args[0].startswith('--'):
      value += ' ' + args[0]
      del args[0]
    if len(args) == 0 or args[0].startswith('--'):
      return None
    value += ' ' + args[0:-1]
    return value

  def readequals(self, arg, args):
    "Read a value with equals"
    split = arg.split('=', 1)
    key = split[0]
    value = split[1]
    if not value.startswith('"'):
      return key, value
    return key, self.readquoted(args, value)

  def setoption(self, key, value):
    "Set an option value"
    setattr(Options, key, value)
    if hasattr(Trace, key + 'mode'):
      setattr(Trace, key + 'mode', value)

class BranchOptions(object):
  "A set of options for a branch"

  def __init__(self):
    self.selected = 0
    self.color = '#ffffff'

  def set(self, key, value):
    "Set a branch option"
    if not key.startswith('\\'):
      Trace.error('Invalid branch option ' + key)
      return
    key = key.replace('\\', '')
    setattr(self, key, value)



class ParseTree(object):
  "A parsing tree"

  default = '~~default~~'

  def __init__(self, types):
    "Create the parse tree"
    self.root = dict()
    for type in types:
      if hasattr(type, 'start'):
        self.addstart(type, type.start)
      elif hasattr(type, 'starts'):
        for start in type.starts:
          self.addstart(type, start)

  def addstart(self, type, start):
    "Add a start piece to the tree"
    tree = self.root
    for piece in start.split():
      if not piece in tree:
        tree[piece] = dict()
      tree = tree[piece]
    if ParseTree.default in tree:
      Trace.error('Start ' + start + ' duplicated')
    tree[ParseTree.default] = type

  def find(self, reader):
    "Find the current sentence in the tree"
    branches = [self.root]
    for piece in reader.currentsplit():
      current = branches[-1]
      piece = piece.rstrip('>')
      if piece in current:
        branches.append(current[piece])
    while not ParseTree.default in branches[-1]:
      Trace.error('Line ' + reader.currentline().strip() + ' not found')
      branches.pop()
    last = branches[-1]
    return last[ParseTree.default]

class Parser(object):
  "A generic parser"

  def __init__(self):
    self.begin = 0
    self.parameters = dict()

  def parseheader(self, reader):
    "Parse the header"
    header = reader.currentsplit()
    reader.nextline()
    self.begin = reader.linenumber + 1
    return header

  def parseparameter(self, reader):
    "Parse a parameter"
    split = reader.currentline().strip().split(' ', 1)
    if len(split) == 0:
      return
    key = split[0]
    if len(split) == 1:
      self.parameters[key] = True
    else:
      self.parameters[key] = split[1].replace('"', '')
    reader.nextline()

  def __str__(self):
    "Return a description"
    return self.__class__.__name__ + ' (' + self.begin + ')'

class LoneCommand(Parser):
  "A parser for just one command line"

  def parse(self,reader):
    "Read nothing"
    return []

class TextParser(Parser):
  "A parser for a command and a bit of text"

  endings = ['\\end_layout', '\\end_inset', '\\emph', '\\family', '\\noun',
      '\\color', '\\size', '\\series']

  def parse(self, reader):
    "Parse lines as long as they are text"
    contents = []
    while not self.isending(reader):
      container = self.factory.create(reader)
      contents.append(container)
    return contents

  def isending(self, reader):
    "Check if text is ending"
    current = reader.currentsplit()
    if len(current) == 0:
      return True
    return current[0] in TextParser.endings

class ExcludingParser(Parser):
  "A parser that excludes the final line"

  def parse(self, reader):
    "Parse everything up to (and excluding) the final line"
    contents = []
    while not reader.currentnonblank().startswith(self.ending):
      container = self.factory.create(reader)
      contents.append(container)
    return contents

class BoundedParser(ExcludingParser):
  "A parser bound by a final line"

  def parse(self, reader):
    "Parse everything, including the final line"
    contents = ExcludingParser.parse(self, reader)
    # skip last line
    reader.nextline()
    return contents

class BoundedDummy(Parser):
  "A bound parser that ignores everything"

  def parse(self, reader):
    "Parse the contents of the container"
    while not reader.currentline().startswith(self.ending):
      reader.nextline()
    # skip last line
    reader.nextline()
    return []

class StringParser(Parser):
  "Parses just a string"

  def parseheader(self, reader):
    "Do nothing, just take note"
    self.begin = reader.linenumber + 1
    return []

  def parse(self, reader):
    "Parse a single line"
    contents = [reader.currentline()]
    reader.nextline()
    return contents

class InsetParser(BoundedParser):
  "Parses a LyX inset"

  def parse(self, reader):
    "Parse inset parameters into a dictionary"
    while reader.currentline() != '\n' and not reader.currentline().startswith('\\'):
      self.parseparameter(reader)
    return BoundedParser.parse(self, reader)

class HeaderParser(Parser):
  "Parses the LyX header"

  def parse(self, reader):
    "Parse header parameters into a dictionary"
    while not reader.currentline().startswith(self.ending):
      self.parseline(reader)
      reader.nextline()
    # skip last line
    reader.nextline()
    return []

  def parseline(self, reader):
    "Parse a single line as a parameter or as a start"
    line = reader.currentline()
    for key in HeaderParser.openings:
      if line.startswith(key):
        HeaderParser.openings[key](self, reader)
        return
    # no match
    self.parseparameter(reader)

  def parsebranch(self, reader):
    branch = reader.currentline().split()[1]
    Trace.debug('Header branch ' + branch)
    reader.nextline()
    subparser = HeaderParser().complete('\\end_branch')
    subparser.parse(reader)
    options = BranchOptions()
    for key in subparser.parameters:
      options.set(key, subparser.parameters[key])
    Options.branches[branch] = options

  def complete(self, ending):
    self.ending = ending
    return self

  openings = {'\\branch':parsebranch}

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090203
# eLyXer html outputters

import codecs
import datetime


class EmptyOutput(object):
  "The output for some container"

  def gethtml(self, container):
    "Return empty HTML code"
    return []

class FixedOutput(object):
  "Fixed output"

  def gethtml(self, container):
    "Return constant HTML code"
    return container.html

class ContentsOutput(object):
  "Outputs the contents converted to HTML"

  def gethtml(self, container):
    "Return the HTML code"
    html = []
    if container.contents == None:
      return html
    for element in container.contents:
      html += element.gethtml()
    return html

class TagOutput(ContentsOutput):
  "Outputs an HTML tag surrounding the contents"

  def gethtml(self, container):
    "Return the HTML code"
    html = [self.getopen(container)]
    html += ContentsOutput.gethtml(self, container)
    html.append(self.getclose(container))
    return html

  def getopen(self, container):
    "Get opening line"
    if container.tag == '':
      return ''
    open = '<' + container.tag + '>'
    if container.breaklines:
      return open + '\n'
    return open

  def getclose(self, container):
    "Get closing line"
    if container.tag == '':
      return ''
    close = '</' + container.tag.split()[0] + '>'
    if container.breaklines:
      return '\n' + close + '\n'
    return close

class MirrorOutput(object):
  "Returns as output whatever comes along"

  def gethtml(self, container):
    "Return what is put in"
    return container.contents

class HeaderOutput(object):
  "Returns the HTML headers"

  def gethtml(self, container):
    "Return a constant header"
    html = [u'<?xml version="1.0" encoding="UTF-8"?>\n']
    html.append(u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n')
    html.append(u'<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n')
    html.append(u'<head>\n')
    html.append(u'<meta name="generator" content="http://www.nongnu.org/elyxer/"/>\n')
    html.append(u'<meta name="create-date" content="' + datetime.date.today().isoformat() + '"/>\n')
    html.append(u'<link rel="stylesheet" href="' + Options.css + '" type="text/css" media="screen"/>\n')
    html.append(u'<title>' + Options.title + '</title>\n')
    html.append('</head>\n')
    html.append('<body>\n')
    html.append('<div id="globalWrapper">\n')
    return html

class FooterOutput(object):
  "Return the HTML code for the footer"

  author = None

  def gethtml(self, container):
    "Footer HTML"
    html = []
    if FooterOutput.author and not Options.nocopy:
      html.append('<hr/>\n')
      year = datetime.date.today().year
      html.append('<p>Copyright (C) ' + str(year) + ' ' + FooterOutput.author
          + '</p>\n')
    html.append('</div>\n')
    html.append('</body>\n')
    html.append('</html>\n')
    return html

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090131
# eLyXer containers for Lyx data that output HTML

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090320
# eLyXer general configuration


class FormulaConfig(object):
  "Configuration for formulae"

  unmodified = ['.', '*', u'€', '(', ')', '[', ']', ':', u'·']
  modified = {'\'':u'’', '=':u' = ', ' ':'', '<':u' &lt; ', '-':u' − ', '+':u' + ',
      ',':u', ', '/':u' ⁄ ', '\n':''}
  commands = {
      # spacing
      '\\, ':' ', '& ':u'', '\\\\':'<br/>',
      # typographical
      '\\%':'%', '\\prime':u'′', '\\_':'_',
      '\\left(':u'<span class="bigsymbol">(</span>',
      '\\right)':u'<span class="bigsymbol">)</span>',
      # regional
      '\\tilde{n}':u'ñ', '\\acute{o}':u'ó', '\\acute{a}':u'á',
      # greek
      '\\alpha':u'α', '\\beta':u'β', '\\gamma':u'γ', '\\delta':u'δ',
      '\\epsilon':u'ε', '\\lambda':u'λ', '\\Delta':u'Δ', '\\sum':u'∑',
      '\\sigma':u'σ', '\\pi':u'π',
      # mathematical
      '\\times':u' × ', '\\propto':u' ∝ ', '\\cdot':u'⋅', '\\approx':u' ≈ ',
      '\\pm':u'±', '\\sim':u' ~ ', '\\implies':u'  ⇒  ', '\\int':u'∫',
      '\\intop':u'∫', '\\infty':u'∞', '\\not':u'¬',
      # symbols
      '\\rightarrow':u' → ', '\\rightsquigarrow':u' ⇝ ', '\\Rightarrow':u'⇒',
      '\\leftarrow':u' ← ',
      '\\dashrightarrow':u' ⇢ ', '\\blacktriangleright':u'▶', '\\bullet':u'•',
      '\\dagger':u'†', '\\ddagger':u'‡', '\\bigstar':u'★',
      '\\to':u'→', '\\gets':u'←',
      # common functions
      '\\log':'log', '\\exp':'exp', '\\ln':'ln', '\\lim':'lim', '\\sin':'sin',
      '\\cos':'cos',
      # hyperbolic functions
      '\\tanh':'tanh', '\\sinh':'sinh', '\\cosh':'cosh',
      # LaTeX (ignored)
      '\\nonumber':''
      }
  onefunctions = {
      # typographical
      '\\mathsf':'span class="mathsf"', '\\mathbf':'b', '^':'sup',
      '_':'sub', '\\underline':'u', '\\overline':'span class="overline"',
      '\\dot':'span class="overdot"',
      '\\bar':'span class="bar"', '\\mbox':'span class="mbox"',
      '\\textrm':'span class="mathrm"', '\\mathrm':'span class="mathrm"',
      '\\text':'span class="text"', '\\textipa':'span class="textipa"',
      '\\boldsymbol':'b', '\\mathit':'i', '\\mathtt':'tt',
      '\\mathbb':'span class="blackboard"',
      '\\mathfrak':'span class="fraktur"', '\\mathcal':'span class="script"',
      # functions
      '\\sqrt':'span class="sqrt"',
      # LaTeX (ignored)
      '\\label':''
      }
  twofunctions = {
      '\\frac':['span class="fraction"', 'span class="numerator"', 'span class="denominator"'],
      '\\nicefrac':['span class="fraction"', 'span class="numerator"', 'span class="denominator"']
      }

class ContainerConfig(object):
  "Low-level configuration for containers"

  escapes = {'&':'&amp;', '<':'&lt;', '>':'&gt;'}
  replaces = { '`':u'‘', '\'':u'’', '\n':'', '--':u'—' }
  commands = { '\\SpecialChar \\ldots{}':u'…', '\\InsetSpace ~':'&nbsp;',
      '\\InsetSpace \\space{}':'&nbsp;', '\\InsetSpace \\thinspace{}':u' ',
      '\\backslash':'\\', '\\SpecialChar \\@.':'.',
      '\\SpecialChar \\menuseparator':u'&nbsp;▷&nbsp;',
      '\\SpecialChar \\textcompwordmark{}':u'', '\\SpecialChar \\nobreakdash-':'-',
      '\\SpecialChar \\slash{}':'/'}

class BlackBoxConfig(object):
  "Configuration for lines ignored"

  starts = ['\\lyxformat', '\\begin_document', '\\begin_body',
      '\\family default', '\\color inherit',
      '\\shape default', '\\series default', '\\emph off',
      '\\bar no', '\\noun off', '\\emph default', '\\bar default',
      '\\noun default', '\\family roman', '\\series medium',
      '\\shape up', '\\size normal', '\\color none', '#LyX', '\\noindent',
      '\\labelwidthstring', '\\paragraph_spacing', '\\length']

class SpaceConfig(object):
  "Configuration for spaces"

  spaces = {
      '~':'&nbsp;', '\\space{}':'&nbsp;', '\\thinspace{}':u' ',
      '\\hfill{}':u' ', '\\hspace*{\\fill}':u' ', '\\hspace{}':u' ',
      '\\negthinspace{}':u'', '\\enskip{}':u' ', '\\quad{}': u' ',
      '\\qquad{}':u'  '
      }



class Container(object):
  "A container for text and objects in a lyx file"

  def __init__(self):
    self.contents = list()

  def comesnext(cls, reader):
    "Return if the current line matches"
    line = reader.currentline()
    if hasattr(cls, 'start'):
      return line.startswith(cls.start)
    if hasattr(cls, 'starts'):
      for start in cls.starts:
        if line.startswith(start):
          return True
    return False

  comesnext = classmethod(comesnext)

  def parse(self, reader):
    "Parse by lines"
    if hasattr(self, 'ending'):
      self.parser.ending = self.ending
    self.parser.factory = self.factory
    self.header = self.parser.parseheader(reader)
    self.begin = self.parser.begin
    self.contents = self.parser.parse(reader)
    self.process()
    self.parser = []

  def process(self):
    "Process contents"
    pass

  def finished(self, reader):
    "Find out if we are at the end"
    return reader.currentline().startswith(self.ending)

  def gethtml(self):
    "Get the resulting HTML"
    return self.output.gethtml(self)

  def __str__(self):
    "Get a description"
    return self.__class__.__name__ + '@' + str(self.begin)

  def escape(self, line, escapes = ContainerConfig.escapes):
    "Escape a line to appear in HTML"
    pieces = escapes.keys()
    # do the '&' first
    pieces.sort()
    for piece in pieces:
      if piece in line:
        line = line.replace(piece, escapes[piece])
    return line

  def searchfor(self, type):
    "Search for an embedded container of a given type recursively"
    for element in self.contents:
      if isinstance(element, Container):
        if isinstance(element, type):
          return element
        result = element.searchfor(type)
        if result:
          return result
    return None

  def restyle(self, type, restyler):
    "Restyle contents with a restyler function"
    i = 0
    while i < len(self.contents):
      element = self.contents[i]
      if isinstance(element, type):
        restyler(self, i)
      if isinstance(element, Container):
        element.restyle(type, restyler)
      i += 1

  def group(self, index, group, isingroup):
    "Group some adjoining elements into a group"
    if index >= len(self.contents):
      return
    if hasattr(self.contents[index], 'grouped'):
      return
    while index < len(self.contents) and isingroup(self.contents[index]):
      self.contents[index].grouped = True
      group.contents.append(self.contents[index])
      self.contents.pop(index)
    self.contents.insert(index, group)

  def remove(self, index):
    "Remove a container but leave its contents"
    container = self.contents[index]
    self.contents.pop(index)
    while len(container.contents) > 0:
      self.contents.insert(index, container.contents.pop())

class BlackBox(Container):
  "A container that does not output anything"

  starts = BlackBoxConfig.starts

  def __init__(self):
    self.parser = LoneCommand()
    self.output = EmptyOutput()

class StringContainer(Container):
  "A container for a single string"

  start = ''

  def __init__(self):
    self.parser = StringParser()
    self.output = MirrorOutput()

  def process(self):
    "Replace special chars"
    line = self.contents[0]
    replaced = self.escape(line)
    replaced = self.changeline(replaced)
    self.contents = [replaced]
    if '\\' in replaced and len(replaced) > 1:
      # unprocessed commands
      Trace.error('Unknown command at ' + str(self.parser.begin) + ': '
          + replaced.strip())

  def changeline(self, line):
    line = self.replacemap(line, ContainerConfig.replaces)
    if not '\\' in line:
      return line
    line = self.replacemap(line, ContainerConfig.commands)
    return line

  def replacemap(self, line, map):
    for piece in map:
      if piece in line:
        line = line.replace(piece, map[piece])
    return line
  
  def __str__(self):
    length = ''
    descr = ''
    if len(self.contents) > 0:
      length = str(len(self.contents[0]))
      descr = self.contents[0].strip()
    return 'StringContainer@' + str(self.begin) + '(' + str(length) + ')'

class Constant(StringContainer):
  "A constant string"

  def __init__(self, text):
    self.contents = [text]
    self.output = MirrorOutput()

  def __str__(self):
    return 'Constant'

class LangLine(Container):
  "A line with language information"

  start = '\\lang'

  def __init__(self):
    self.parser = LoneCommand()
    self.output = EmptyOutput()

  def process(self):
    self.lang = self.header[1]

class ERT(Container):
  "Evil Red Text"

  start = '\\begin_inset ERT'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = EmptyOutput()

class TaggedText(Container):
  "Text inside a tag"

  def __init__(self):
    self.parser = TextParser()
    self.output = TagOutput()
    self.breaklines = False

  def complete(self, contents, tag, breaklines=False):
    "Complete the tagged text and return it"
    self.contents = contents
    self.tag = tag
    self.breaklines = breaklines
    return self

  def constant(self, text, tag):
    "Complete the tagged text with a constant"
    constant = Constant(text)
    return self.complete([constant], tag)

  def __str__(self):
    return 'Tagged <' + self.tag + '>'

class ContainerFactory(object):
  "Creates containers depending on the first line"

  types = [BlackBox, LangLine, StringContainer, ERT]

  def __init__(self):
    self.tree = ParseTree(ContainerFactory.types)

  def create(self, reader):
    "Get the container and parse it"
    #Trace.debug('processing "' + reader.currentline() + '"')
    type = self.tree.find(reader)
    container = type.__new__(type)
    container.__init__()
    container.factory = self
    container.parse(reader)
    return container



class QuoteContainer(Container):
  "A container for a pretty quote"

  start = '\\begin_inset Quotes'
  ending = '\\end_inset'
  outputs = {
      'eld':u'“', 'erd':u'”', 'els':u'‘', 'ers':u'’',
      'sld':u'”', 'srd':u'”',
      'gld':u'„', 'grd':u'“', 'gls':u'‚', 'grs':u'‘',
      'pld':u'„', 'prd':u'”', 'pls':u'‚', 'prs':u'’',
      'fld':u'«', 'frd':u'»', 'fls':u'‹', 'frs':u'›',
      'ald':u'»', 'ard':u'«', 'als':u'›', 'ars':u'‹'
      }

  def __init__(self):
    self.parser = BoundedParser()
    self.output = FixedOutput()

  def process(self):
    "Process contents"
    self.type = self.header[2]
    if not self.type in QuoteContainer.outputs:
      Trace.error('Quote type ' + self.type + ' not found')
      self.html = '"'
      return
    self.html = QuoteContainer.outputs[self.type]

class LyxLine(Container):
  "A Lyx line"

  start = '\\lyxline'

  def __init__(self):
    self.parser = LoneCommand()
    self.output = FixedOutput()

  def process(self):
    self.html = ['<hr class="line" />']

class EmphaticText(TaggedText):
  "Text with emphatic mode"

  start = '\\emph on'

  def process(self):
    self.tag = 'i'

class ShapedText(TaggedText):
  "Text shaped (italic, slanted)"

  start = '\\shape'

  tags = {'slanted':'i', 'italic':'i', 'smallcaps':'span class="versalitas"'}

  def process(self):
    self.type = self.header[1]
    if not self.type in ShapedText.tags:
      Trace.error('Unrecognized shape ' + self.header[1])
      self.tag = 'span'
      return
    self.tag = ShapedText.tags[self.type]

class VersalitasText(TaggedText):
  "Text in versalitas"

  start = '\\noun on'

  def process(self):
    self.tag = 'span class="versalitas"'

class ColorText(TaggedText):
  "Colored text"

  start = '\\color'

  def process(self):
    self.color = self.header[1]
    self.tag = 'span class="' + self.color + '"'

class SizeText(TaggedText):
  "Sized text"

  start = '\\size'

  def process(self):
    self.size = self.header[1]
    self.tag = 'span class="' + self.size + '"'

class BoldText(TaggedText):
  "Bold text"

  start = '\\series bold'

  def process(self):
    self.tag = 'b'

class TextFamily(TaggedText):
  "A bit of text from a different family"

  start = '\\family'
  typetags = { 'typewriter':'tt', 'sans':'span class="sans"' }

  def process(self):
    "Parse the type of family"
    self.type = self.header[1]
    self.tag = TextFamily.typetags[self.type]

class Hfill(TaggedText):
  "Horizontall fill"

  start = '\\hfill'

  def process(self):
    self.tag = 'span class="right"'

class BarredText(TaggedText):
  "Text with a bar somewhere"

  start = '\\bar'
  typetags = { 'under':'u' }

  def process(self):
    "Parse the type of bar"
    self.type = self.header[1]
    if not self.type in BarredText.typetags:
      Trace.error('Unknown bar type ' + self.type)
      self.tag = 'span'
      return
    self.tag = BarredText.typetags[self.type]

class FlexCode(Container):
  "A bit of inset code"

  start = '\\begin_inset Flex CharStyle:Code'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = TagOutput()
    self.breaklines = True
    self.tag = 'span class="code"'

class ListItem(Container):
  "An element in a list"

  starts = ['\\begin_layout Enumerate', '\\begin_layout Itemize']
  ending = '\\end_layout'

  def __init__(self):
    self.contents = list()
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True

  tags = {'Enumerate':'ol', 'Itemize':'ul'}

  def process(self):
    self.tag = ListItem.tags[self.header[1]]
    tag = TaggedText().complete(self.contents, 'li', True)
    self.contents = [tag]

class DeeperList(Container):
  "A nested list"

  start = '\\begin_deeper'
  ending = '\\end_deeper'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True
    self.tag = 'ul'

ContainerFactory.types += [QuoteContainer, LyxLine, EmphaticText, ShapedText,
    VersalitasText, ColorText, SizeText, BoldText, TextFamily, Hfill,
    FlexCode, ListItem, DeeperList, BarredText]

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090218
# eLyXer links and nodes



class Link(Container):
  "A link to another part of the document"

  def __init__(self):
    self.contents = list()
    self.output = LinkOutput()

  def complete(self, text, anchor, url, type = None):
    self.contents = [Constant(text)]
    if anchor:
      self.anchor = anchor
    if url:
      self.url = url
    if type:
      self.type = type
    return self

class Label(Container):
  "A label to be referenced"

  starts = ['\\begin_inset LatexCommand label', '\\begin_inset CommandInset label']
  ending = '\\end_inset'

  labels = dict()

  def __init__(self):
    self.parser = InsetParser()
    self.output = LinkOutput()

  def process(self):
    self.anchor = self.parser.parameters['name']
    Label.labels[self.anchor] = self
    self.contents = [Constant(' ')]

class Reference(Link):
  "A reference to a label"

  starts = ['\\begin_inset LatexCommand ref', '\\begin_inset CommandInset ref']
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = LinkOutput()
    self.direction = u'↓'

  def process(self):
    key = self.parser.parameters['reference']
    self.url = '#' + key
    if key in Label.labels:
      # already seen
      self.direction = u'↑'
    self.contents = [Constant(self.direction)]

class BiblioCite(Container):
  "Cite of a bibliography entry"

  starts = ['\\begin_inset LatexCommand cite', '\\begin_inset CommandInset citation']
  ending = '\\end_inset'

  index = 0
  entries = dict()

  def __init__(self):
    self.parser = InsetParser()
    self.output = TagOutput()
    self.tag = 'sup'
    self.breaklines = False

  def process(self):
    "Add a cite to every entry"
    self.contents = list()
    keys = self.parser.parameters['key'].split(',')
    for key in keys:
      BiblioCite.index += 1
      number = str(BiblioCite.index)
      link = Link().complete(number, 'cite-' + number, '#' + number)
      self.contents.append(link)
      self.contents.append(Constant(','))
      if not key in BiblioCite.entries:
        BiblioCite.entries[key] = []
      BiblioCite.entries[key].append(number)
    if len(keys) > 0:
      # remove trailing ,
      self.contents.pop()

class Bibliography(Container):
  "A bibliography layout containing an entry"

  start = '\\begin_layout Bibliography'
  ending = '\\end_layout'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True
    self.tag = 'p class="biblio"'

class BiblioEntry(Container):
  "A bibliography entry"

  starts = ['\\begin_inset LatexCommand bibitem', '\\begin_inset CommandInset bibitem']
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = TagOutput()
    self.tag = 'span class="entry"'
    self.breaklines = False

  def process(self):
    "Get all the cites of the entry"
    cites = list()
    key = self.parser.parameters['key']
    if key in BiblioCite.entries:
      cites = BiblioCite.entries[key]
    self.contents = [Constant('[')]
    for cite in cites:
      link = Link().complete(cite, cite, '#cite-' + cite)
      self.contents.append(link)
      self.contents.append(Constant(','))
    if len(cites) > 0:
      self.contents.pop(-1)
    self.contents.append(Constant('] '))

class ListOf(Container):
  "A list of entities (figures, tables, algorithms)"

  start = '\\begin_inset FloatList'
  ending = '\\end_inset'

  names = {'figure':'figures', 'table':'tables', 'algorithm':'listings'}

  def __init__(self):
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True

  def process(self):
    "Parse the header and get the type"
    self.type = self.header[2]
    self.tag = 'div class="list"'
    self.contents = [Constant(u'List of ' + ListOf.names[self.type])]

class TableOfContents(Container):
  "Table of contents"

  starts = ['\\begin_inset LatexCommand tableofcontents',
      '\\begin_inset CommandInset toc']
  ending = '\\end_inset'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True

  def process(self):
    "Parse the header and get the type"
    self.tag = 'div class="toc"'
    self.contents = [Constant(u'Table of Contents')]

class IndexEntry(Link):
  "An entry in the alphabetical index"

  start = '\\begin_inset LatexCommand index'
  ending = '\\end_inset'

  entries = dict()

  namescapes = {'!':'', '|':', ', '  ':' '}
  keyescapes = {' ':'-', '--':'-', ',':''}

  def __init__(self):
    self.parser = InsetParser()
    self.output = LinkOutput()
    self.breaklines = False

  def process(self):
    "Put entry in index"
    name = self.parser.parameters['name'].strip()
    self.name = self.escape(name, IndexEntry.namescapes)
    self.key = self.escape(self.name, IndexEntry.keyescapes)
    if not self.key in IndexEntry.entries:
      # no entry; create
      IndexEntry.entries[self.key] = list()
    self.index = len(IndexEntry.entries[self.key])
    IndexEntry.entries[self.key].append(self)
    self.anchor = 'entry-' + self.key + '-' + str(self.index)
    self.url = '#index-' + self.key
    self.contents = [Constant(u'↓')]

class LayoutIndexEntry(IndexEntry):
  "An entry with the name in a layout"

  start = '\\begin_inset Index'
  ending = '\\end_inset'

  def process(self):
    "Read entry from layout and put in index"
    name = ''
    layout = self.contents[0]
    for element in layout.contents:
      if isinstance(element, StringContainer):
        name += element.contents[0]
      else:
        name += ' '
    self.parser.parameters['name'] = name
    IndexEntry.process(self)

class PrintIndex(Container):
  "Command to print an index"

  starts = ['\\begin_inset LatexCommand printindex',
      '\\begin_inset CommandInset index_print']
  ending = '\\end_inset'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = ContentsOutput()

  def process(self):
    "Create the alphabetic index"
    self.contents = [TaggedText().constant('Index', 'h1 class="index"'),
        Constant('\n')]
    for key in self.sortentries():
      name = IndexEntry.entries[key][0].name
      entry = [Link().complete(name, 'index-' + key, None, 'printindex'),
          Constant(': ')]
      contents = [TaggedText().complete(entry, 'i')]
      contents += self.createarrows(key, IndexEntry.entries[key])
      self.contents.append(TaggedText().complete(contents, 'p class="printindex"',
          True))

  def sortentries(self):
    "Sort all entries in the index"
    keys = IndexEntry.entries.keys()
    # sort by name
    keys.sort()
    return keys

  def createarrows(self, key, entries):
    "Create an entry in the index"
    arrows = []
    for entry in entries:
      link = Link().complete(u'↑', 'index-' + entry.key,
          '#entry-' + entry.key + '-' + str(entry.index))
      arrows += [link, Constant(u', \n')]
    return arrows[:-1]

class NomenclatureEntry(Link):
  "An entry of LyX nomenclature"

  start = '\\begin_inset CommandInset nomenclature'
  ending = '\\end_inset'

  entries = {}

  def __init__(self):
    self.parser = InsetParser()
    self.output = LinkOutput()

  def process(self):
    "Put entry in index"
    self.symbol = self.parser.parameters['symbol']
    self.description = self.parser.parameters['description']
    self.key = self.symbol.replace(' ', '-').lower()
    NomenclatureEntry.entries[self.key] = self
    self.anchor = 'noment-' + self.key
    self.url = '#nom-' + self.key
    self.contents = [Constant(u'↓')]

class NomenclaturePrint(Container):
  "Print all nomenclature entries"

  start = '\\begin_inset CommandInset nomencl_print'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = ContentsOutput()

  def process(self):
    self.keys = self.sortentries()
    self.contents = [TaggedText().constant('Nomenclature', 'h1 class="nomenclature"')]
    for key in self.keys:
      entry = NomenclatureEntry.entries[key]
      contents = [Link().complete(u'↑', 'nom-' + key, '#noment-' + key)]
      contents.append(Constant(entry.symbol + u' '))
      contents.append(Constant(entry.description))
      text = TaggedText().complete(contents, 'div class="Nomenclated"', True)
      self.contents.append(text)

  def sortentries(self):
    "Sort all entries in the index"
    keys = NomenclatureEntry.entries.keys()
    # sort by name
    keys.sort()
    return keys

class URL(Link):
  "A clickable URL"

  starts = ['\\begin_inset LatexCommand url',
      '\\begin_inset LatexCommand htmlurl', '\\begin_inset CommandInset href']
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = LinkOutput()

  def process(self):
    "Read URL from parameters"
    self.url = self.escape(self.parser.parameters['target'])
    if 'type' in self.parser.parameters:
      self.url = self.escape(self.parser.parameters['type']) + self.url
    name = self.url
    if 'name' in self.parser.parameters:
      name = self.parser.parameters['name']
    self.contents = [Constant(name)]

class FlexURL(URL):
  "A flexible URL"

  start = '\\begin_inset Flex URL'
  ending = '\\end_inset'

  def process(self):
    "Read URL from contents"
    text = self.searchfor(StringContainer).contents[0]
    self.url = self.escape(text)
    self.contents = [Constant(self.url)]

class LinkOutput(object):
  "A link pointing to some destination"
  "Or an anchor (destination)"

  def gethtml(self, container):
    "Get the HTML code for the link"
    type = container.__class__.__name__
    if hasattr(container, 'type'):
      type = container.type
    tag = 'a class="' + type + '"'
    if hasattr(container, 'anchor'):
      tag += ' name="' + container.anchor + '"'
    if hasattr(container, 'url'):
      tag += ' href="' + container.url + '"'
    text = TaggedText().complete(container.contents, tag)
    return text.gethtml()

ContainerFactory.types += [Label, Reference, BiblioCite, Bibliography,
    BiblioEntry, ListOf, TableOfContents, IndexEntry, PrintIndex, URL,
    FlexURL, NomenclatureEntry, NomenclaturePrint, LayoutIndexEntry]

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090207
# eLyXer formula processing

import sys


class Formula(Container):
  "A Latex formula"

  start = '\\begin_inset Formula'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = FormulaParser()
    self.output = TagOutput()

  def process(self):
    "Convert the formula to HTML"
    text = self.contents[0]
    original, result = self.convert(text, 0)
    #Trace.debug('Formula ' + original + ' -> ' + result)
    self.contents = result
    self.restyle(TaggedText, self.restyletagged)
    if self.header[0] == 'inline':
      self.tag = 'span class="formula"'
      self.breaklines = False
    else:
      self.tag = 'div class="formula"'
      self.breaklines = True
  
  def convert(self, text, pos):
    "Convert a bit of text to HTML"
    processed = ''
    result = list()
    while pos < len(text) and text[pos] != '}':
      original, converted = self.convertchars(text, pos)
      #Trace.debug('converted: ' + unicode(converted))
      processed += original
      pos += len(original)
      result += converted
    return processed, result

  def convertchars(self, text, pos):
    "Convert one or more characters, return the conversion"
    #Trace.debug('Formula ' + text + ' @' + str(pos))
    for reader in Formula.readers:
      bit, result = reader(self, text, pos)
      if bit:
        return bit, result
    Trace.error('Unrecognized function at ' + str(self.parser.begin) + ' in ' +
        unicode(text[pos:]))
    return '\\', [Constant('\\')]

  def readalpha(self, text, pos):
    "Read alphabetic sequence"
    alpha = str()
    while pos < len(text) and text[pos].isalpha():
      alpha += text[pos]
      pos += 1
    return alpha, [TaggedText().constant(alpha, 'i')]

  def readsymbols(self, text, pos):
    "Read a string of symbols"
    symbols = unicode()
    result = unicode()
    while pos + len(symbols) < len(text):
      char = text[pos + len(symbols)]
      if char.isdigit() or char in FormulaConfig.unmodified:
        symbols += char
        result += char
      elif char in FormulaConfig.modified:
        symbols += char
        result += FormulaConfig.modified[char]
      else:
        break
    if len(symbols) == 0:
      return None, None
    return symbols, [Constant(result)]

  def command(self, text, pos):
    "read a command"
    command, translated = self.find(text, pos, FormulaConfig.commands)
    if not command:
      return None, None
    return command, [Constant(translated)]

  def readone(self, text, pos):
    "read a one-parameter function"
    function, tag = self.find(text, pos, FormulaConfig.onefunctions)
    if not function:
      return None, None
    pos += len(function)
    bracket, result = self.readbracket(text, pos)
    if len(tag) == 0:
      return function + bracket, []
    return function + bracket, [TaggedText().complete(result, tag)]

  def readtwo(self, text, pos):
    "read a two-parameter function"
    function, tags = self.find(text, pos, FormulaConfig.twofunctions)
    if not function:
      return None, None
    pos += len(function)
    bracket1, result1 = self.readbracket(text, pos)
    pos += len(bracket1)
    bracket2, result2 = self.readbracket(text, pos)
    original =  function + bracket1 + bracket2
    tagged1 = TaggedText().complete(result1, tags[1])
    tagged2 = TaggedText().complete(result2, tags[2])
    tagged0 = TaggedText().complete([tagged1, tagged2], tags[0])
    return original, [tagged0]

  def readbracket(self, text, pos):
    "Read a bracket as {result}"
    if text[pos] != u'{':
      Trace.error(u'Missing { in ' + text + '@' + str(pos))
      return '', [Constant('')]
    original, converted = self.convert(text, pos + 1)
    if text[pos + 1 + len(original)] != u'}':
      Trace.error(u'Missing } in ' + text + '@' + str(pos))
    return '{' + original + '}', converted

  def find(self, text, pos, map):
    "Read TeX command or function"
    bit = text[pos:]
    for element in map:
      if bit.startswith(element):
        return element, map[element]
    return None, []

  readers = [readalpha, readsymbols, command, readone, readtwo]

  def restyletagged(self, container, index):
    "Restyle tagged text"
    tagged = container.contents[index]
    if tagged.tag == 'span class="mathsf"' or tagged.tag == 'span class="text"':
      tagged.restyle(TaggedText, self.removeitalics)
      first = tagged.contents[0]
      if self.mustspaceunits(container.contents, index):
        first.contents[0] = u' ' + first.contents[0]
    elif tagged.tag == 'span class="sqrt"':
      tagged.tag = 'span class="root"'
      radical = TaggedText().constant(u'√', 'span class="radical"')
      container.contents.insert(index, radical)
    elif tagged.tag == 'span class="overdot"':
      dot = TaggedText().constant(u'⋅', 'span class="dot"')
      tagged.tag = 'span class="dotted"'
      container.contents.insert(index, dot)
    elif tagged.tag == 'i':
      group = TaggedText().complete([], 'i')
      self.group(index, group, self.isalpha)
      group.restyle(TaggedText, self.removeitalics)

  def removeitalics(self, container, index):
    "Remove italics tag"
    if container.contents[index].tag == 'i':
      container.remove(index)

  def isalpha(self, element):
    "Check if the element is all text"
    if isinstance(element, StringContainer):
      return element.contents[0].isalpha()
    for item in element.contents:
      if not self.isalpha(item):
        return False
    return True

  def mustspaceunits(self, contents, index):
    "Check if units must be spaced"
    if index == 0:
      return False
    first = contents[index].contents[0]
    if not isinstance(first, Constant):
      return False
    last = contents[index - 1]
    if isinstance(last, Constant):
      string = last.contents[-1]
      if len(string) == 0:
        return False
      if string[-1].isdigit():
        return True
    return False

class FormulaParser(Parser):
  "Parses a formula"

  def parseheader(self, reader):
    "See if the formula is inlined"
    self.begin = reader.linenumber + 1
    if reader.currentline().find('$') > 0:
      return ['inline']
    else:
      return ['block']
  
  def parse(self, reader):
    "Parse the formula"
    if '$' in reader.currentline():
      rest = reader.currentline().split('$', 1)[1]
      if '$' in rest:
        # formula is $...$
        formula = reader.currentline().split('$')[1]
        reader.nextline()
      else:
        # formula is multiline $...$
        formula = self.parsemultiliner(reader, '$')
    elif '\\[' in reader.currentline():
      # formula of the form \[...\]
      formula = self.parsemultiliner(reader, '\\]')
    elif '\\begin{' in reader.currentline() and reader.currentline().endswith('}\n'):
      current = reader.currentline().strip()
      endsplit = current.split('\\begin{')[1].split('}')
      endpiece = '\\end{' + endsplit[0] + '}'
      formula = self.parsemultiliner(reader, endpiece)
    else:
      Trace.error('Formula beginning ' + reader.currentline().strip +
          ' is unknown')
    while not reader.currentline().startswith(self.ending):
      stripped = reader.currentline().strip()
      if len(stripped) > 0:
        Trace.error('Unparsed formula line ' + stripped)
      reader.nextline()
    reader.nextline()
    return [formula]

  def parsemultiliner(self, reader, ending):
    "Parse a formula in multiple lines"
    reader.nextline()
    formula = ''
    while not reader.currentline().endswith(ending + '\n'):
      formula += reader.currentline()
      reader.nextline()
    formula += reader.currentline()[:-len(ending) - 1]
    reader.nextline()
    return formula

ContainerFactory.types.append(Formula)

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090207
# eLyXer tables



class Table(Container):
  "A lyx table"

  start = '\\begin_inset Tabular'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True
    self.tag = 'table'

class TableHeader(Container):
  "The headers for the table"

  starts = ['<lyxtabular', '<features', '<column', '</lyxtabular']

  def __init__(self):
    self.parser = LoneCommand()
    self.output = EmptyOutput()

class Row(Container):
  "A row in a table"

  start = '<row'
  ending = '</row'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True
    self.tag = 'tr'

  def process(self):
    if len(self.header) > 1:
      self.tag += ' class="header"'

class Cell(Container):
  "A cell in a table"

  start = '<cell'
  ending = '</cell'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True
    self.tag = 'td'

ContainerFactory.types += [Table, TableHeader, Row, Cell]

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090308
# eLyXer image treatment

import os
import os.path
import array


class Image(Container):
  "An embedded image"

  start = '\\begin_inset Graphics'
  ending = '\\end_inset'

  converter = True

  def __init__(self):
    self.parser = InsetParser()
    self.output = ImageOutput()

  def process(self):
    self.url = self.parser.parameters['filename']
    origin = self.getpath(self.url)
    if not os.path.exists(origin):
      Trace.error('Image ' + origin + ' not found')
      return
    self.destination = self.getdestination(self.url)
    destination = self.getpath(self.destination)
    density = 100
    if 'scale' in self.parser.parameters:
      density = int(self.parser.parameters['scale'])
    Trace.debug('Image from ' + origin + ' to ' + destination)
    self.convert(origin, destination, density)
    self.width, self.height = self.getdimensions(destination)

  def getpath(self, path):
    "Get the correct path for the image"
    if os.path.isabs(path):
      return path
    return Options.directory + os.path.sep + path

  def getdestination(self, origin):
    "Get the destination URL for an image URL"
    if os.path.isabs(origin):
      dest = os.path.basename(origin)
    else:
      dest = origin
    return os.path.splitext(dest)[0] + '.png'

  def convert(self, origin, destination, density):
    "Convert an image to PNG"
    if not Image.converter:
      return
    if origin == destination:
      return
    if os.path.exists(destination):
      if os.path.getmtime(origin) <= os.path.getmtime(destination):
        # file has not changed; do not convert
        return
    dir = os.path.dirname(destination)
    if len(dir) > 0 and not os.path.exists(dir):
      os.makedirs(dir)
    try:
      result = os.system('convert -density ' + str(density) + ' "' + origin +
          '" "' + destination + '"')
      if result != 0:
        Trace.error('ImageMagick not installed; images will not be processed')
        Image.converter = False
        return
      Trace.message('Converted ' + origin + ' to ' + destination + ' at ' +
          str(density) + '%')
    except OSError:
      Trace.error('Error while converting image ' + origin)

  dimensions = dict()

  def getdimensions(self, filename):
    "Get the dimensions of a PNG image"
    if not os.path.exists(filename):
      return None, None
    if filename in Image.dimensions:
      return Image.dimensions[filename]
    pngfile = codecs.open(filename, 'rb')
    pngfile.seek(16)
    dimensions = array.array('l')
    dimensions.fromfile(pngfile, 2)
    dimensions.byteswap()
    pngfile.close()
    Image.dimensions[filename] = dimensions
    return dimensions

class ImageOutput(object):
  "Returns an image in the output"

  def gethtml(self, container):
    "Get the HTML output of the image as a list"
    cssclass = 'embedded'
    html = ['<img class="' + cssclass + '"']
    if hasattr(container, 'destination'):
      html.append(' src="' + container.destination +
          '" alt="figure ' + container.destination + '" width="' +
          str(container.width) + '" height="' + str(container.height) + '"')
    else:
      html.append(' src="' + container.url + '"')
    html.append('/>\n')
    return html

ContainerFactory.types.append(Image)

#! /usr/bin/env python
# -*- coding: utf-8 -*-

#   eLyXer -- convert LyX source files to HTML output.
#
#   Copyright (C) 2009 Alex Fernández
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

# --end--
# Alex 20090312
# LyX structure in containers



class LyxHeader(Container):
  "Reads the header, outputs the HTML header"

  start = '\\begin_header'
  ending = '\\end_header'

  def __init__(self):
    self.parser = HeaderParser()
    self.output = HeaderOutput()

  def process(self):
    "Find pdf title"
    if '\\pdf_title' in self.parser.parameters:
      Options.title = self.parser.parameters['\\pdf_title']
      Trace.debug('PDF Title: ' + Options.title)

class LyxFooter(Container):
  "Reads the footer, outputs the HTML footer"

  start = '\\end_body'
  ending = '\\end_document'

  def __init__(self):
    self.parser = BoundedDummy()
    self.output = FooterOutput()

class Float(Container):
  "A floating inset"

  start = '\\begin_inset Float'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = TagOutput()
    self.breaklines = True

  def process(self):
    "Get the float type"
    self.type = self.header[2]
    self.tag = 'div class="' + self.type + '"'

class InsetText(Container):
  "An inset of text in a lyx file"

  start = '\\begin_inset Text'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = BoundedParser()
    self.output = ContentsOutput()

class Caption(Container):
  "A caption for a figure or a table"

  start = '\\begin_inset Caption'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = TagOutput()
    self.tag = 'div class="caption"'
    self.breaklines = True

class Align(Container):
  "Bit of aligned text"

  start = '\\align'
  ending = '\\end_layout'

  def __init__(self):
    self.parser = ExcludingParser()
    self.output = TagOutput()
    self.breaklines = True

  def process(self):
    self.tag = 'div class="' + self.header[1] + '"'

class Layout(Container):
  "A layout (block of text) inside a lyx file"

  start = '\\begin_layout'
  ending = '\\end_layout'

  typetags = { 'Quote':'blockquote', 'Standard':'div',
        'Chapter':'h1', 'Section':'h2', 'Subsection':'h3', 'Subsubsection':'h4',
        'Quotation':'blockquote', 'Center':'div', 'Paragraph':'div',
        'Part':'h1'}

  def __init__(self):
    self.contents = list()
    self.parser = BoundedParser()
    self.output = TagOutput()
    self.breaklines = True
    self.numbered = False

  def process(self):
    self.type = self.header[1]
    if False: #self.searchfor(Align):
      align = self.searchfor(Align)
      self.tag = 'div class="' + align.header[1] + '"'
    elif self.type in Layout.typetags:
      self.numbered = True
      self.tag = Layout.typetags[self.type] + ' class="' + self.type + '"'
    elif self.type.replace('*', '') in Layout.typetags:
      self.tag = Layout.typetags[self.type.replace('*', '')] + ' class="' +  self.type.replace('*', '-') + '"'
    else:
      self.tag = 'div class="' + self.type + '"'

  def __str__(self):
    return 'Layout of type ' + self.type

class Title(Layout):
  "The title of the whole document"

  start = '\\begin_layout Title'
  ending = '\\end_layout'

  def process(self):
    self.tag = 'h1 class="title"'
    string = self.searchfor(StringContainer)
    self.title = string.contents[0]
    Trace.message('Title: ' + self.title)

class Author(Layout):
  "The document author"

  start = '\\begin_layout Author'
  ending = '\\end_layout'

  def process(self):
    self.tag = 'h2 class="author"'
    string = self.searchfor(StringContainer)
    FooterOutput.author = string.contents[0]
    Trace.debug('Author: ' + FooterOutput.author)

class Description(Layout):
  "A description layout"

  start = '\\begin_layout Description'
  ending = '\\end_layout'

  def process(self):
    "Set the first word to bold"
    self.tag = 'div class="Description"'
    firstword, found = self.extractfirst(self.contents)
    if not firstword:
      return
    firstword.append(Constant(u' '))
    tag = 'span class="Description-entry"'
    self.contents.insert(0, TaggedText().complete(firstword, tag))

  def extractfirst(self, contents):
    "Extract the first word as a list"
    firstcontents = []
    index = 0
    while index < len(contents):
      first, found = self.extractfirstcontainer(contents[index])
      if first:
        firstcontents += first
      if found:
        return firstcontents, True
      else:
        del contents[index]
    return firstcontents, False

  def extractfirstcontainer(self, container):
    "Extract the first word from a string container"
    if len(container.contents) == 0:
      # empty container
      return [container], False
    if isinstance(container, StringContainer):
      return self.extractfirststring(container)
    elif isinstance(container, ERT):
      return [container], False
    if len(container.contents) == 0:
      # empty container
      return None, False
    first, found = self.extractfirst(container.contents)
    if isinstance(container, TaggedText) and hasattr(container, 'tag'):
      newtag = TaggedText().complete(first, container.tag)
      return [newtag], found
    return first, found

  def extractfirststring(self, container):
    "Extract the first word from a string container"
    string = container.contents[0]
    if not ' ' in string:
      return [container], False
    split = string.split(' ', 1)
    container.contents[0] = split[1]
    return [Constant(split[0])], True

class Space(Container):
  "A space of several types"

  start = '\\begin_inset space'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = FixedOutput()

  def process(self):
    self.type = self.header[2]
    if self.type not in SpaceConfig.spaces:
      Trace.error('Unknown space type ' + self.type)
      self.html = [' ']
      return
    self.html = [SpaceConfig.spaces[self.type]]

class Inset(Container):
  "A generic inset in a LyX document"

  start = '\\begin_inset'
  ending = '\\end_inset'

  def __init__(self):
    self.contents = list()
    self.parser = InsetParser()
    self.output = TagOutput()
    self.breaklines = True

  def process(self):
    self.type = self.header[1]
    self.tag = 'span class="' + self.type + '"'

  def __str__(self):
    return 'Inset of type ' + self.type

class Newline(Container):
  "A newline"

  start = '\\newline'

  def __init__(self):
    self.parser = LoneCommand()
    self.output = FixedOutput()

  def process(self):
    "Process contents"
    self.html = '<br/>'

class NewlineInset(Newline):
  "A newline or line break in an inset"

  start = '\\begin_inset Newline'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = FixedOutput()

class Branch(Container):
  "A branch within a LyX document"

  start = '\\begin_inset Branch'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = TagOutput()
    self.breaklines = True

  def process(self):
    "Disable inactive branches"
    self.branch = self.header[2]
    self.tag = 'span class="branch"'
    if not self.isactive():
      self.output = EmptyOutput()

  def isactive(self):
    "Check if the branch is active"
    if not self.branch in Options.branches:
      Trace.error('Invalid branch ' + self.branch)
      return True
    branch = Options.branches[self.branch]
    return branch.selected == 1

class ShortTitle(Container):
  "A short title to display (always hidden)"

  start = '\\begin_inset OptArg'
  ending = '\\end_inset'

  def __init__(self):
    self.parser = InsetParser()
    self.output = EmptyOutput()

class Footnote(Container):
  "A footnote to the main text"

  starts = ['\\begin_inset Foot', '\\begin_inset Marginal']
  ending = '\\end_inset'

  order = 0
  list = 'ABCDEFGHIJKLMNOPQRSTUVW'

  def __init__(self):
    self.parser = InsetParser()
    self.output = ContentsOutput()

  def process(self):
    "Add a letter for the order, rotating"
    letter = Footnote.list[Footnote.order % len(Footnote.list)]
    span = 'span class="FootMarker"'
    fromfoot = TaggedText().constant(u'[→' + letter + u'] ', span)
    self.contents.insert(0, fromfoot)
    tag = TaggedText().complete(self.contents, 'span class="Foot"', True)
    tofoot = TaggedText().constant(' [' + letter + u'→] ', span)
    self.contents = [tofoot, tag]
    Footnote.order += 1

ContainerFactory.types += [
    LyxHeader, LyxFooter, InsetText, Caption, Inset,
    Align, Layout, Float, Title, Author, Description, Newline, Space,
    NewlineInset, Branch, ShortTitle, Footnote
    ]



class Book(object):
  "book in a lyx file"

  def parsecontents(self, reader, writer):
    "Parse the contents of the reader and write them"
    factory = ContainerFactory()
    while not reader.finished():
      container = factory.create(reader)
      writer.write(container.gethtml())

def createbook(args):
  "Read a whole book, write it"
  filein = sys.stdin
  fileout = sys.stdout
  if len(args) < 2:
    Options.quiet = True
  if len(args) > 0:
    Options.directory = os.path.dirname(args[0])
    if Options.directory == '':
      Options.directory = '.'
    filein = codecs.open(args[0], 'r', "utf-8")
    del args[0]
  if len(args) > 0:
    fileout = codecs.open(args[0], 'w', "utf-8")
    del args[0]
  if len(args) > 0:
    usage('Too many arguments')
    return
  reader = LineReader(filein)
  writer = HtmlWriter(fileout)
  book = Book()
  book.parsecontents(reader, writer)

def usage(error):
  "Show an error message and correct usage"
  if not error:
    return
  Trace.error(error)
  Trace.error('Usage: eLyXer [filein] [fileout].')
  Trace.error('  Options:')
  Trace.error('    --nocopy: disables the copyright notice at the bottom')
  Trace.error('    --quiet: disables all runtime messages')
  Trace.error('    --debug: enable debugging messages (for developers)')
  Trace.error('    --title <title>: set the generated page title')
  Trace.error('    --css <file.css>: use a custom CSS file')
  exit()

biblio = dict()
args = sys.argv
del args[0]
error = Options().parseoptions(args)
if error:
  usage(error)
  exit()
createbook(args)

