#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
from xml.dom import minidom
import sys, codecs
map = {
"emphasis": ( "/", "/"),
"chapter": ( "+ ", ""),
"sect1": ("++ ", ""),
"sect2": ("+++ ", ""),
"sect3": ("++++ ", ""),
"title": ( "", "\n"),
"para": ( "", "\n\n"),
"listitem" : ("* ", ""),
"screen" : ( "\n
\n"),
"note" : ( "\n\n"),
"important" : ( "\n\n"),
"warning" : ( "\n\n"),
"itemizedlist" : ( "" , ""),
"table" : ( "\n\n"),
"entry" : ( "| " , ""),
"thead" : ( "" , ""),
"tbody" : ( "" , ""),
"row" : ( "\n" , ""),
"userinput" : ( "" , ""),
"tgroup" : ( "" , ""),
"tip" : ( "\n\n"),
}
def unxml (node):
"The transcoding function"
# Handle text-only nodes
if (node.nodeType == node.TEXT_NODE):
return node.data.replace ("\n", "")
if (node.nodeType == node.CDATA_SECTION_NODE):
return node.data.replace ("\n", "")
# More complex node, use recursion
res = ""
for child in node.childNodes:
res += unxml (child)
if (node.nodeName == "ulink"):
# Dirty hack to handle urls
url = node.getAttribute ("url")
res = "((%s)(%s))" % (url, res)
elif (node.nodeName in map):
# Normal mapping
res = map[node.nodeName][0] + res + map[node.nodeName][1]
return res
def clean (str):
"Cleans the string"
return str.replace (" ", "")
# Main
if (len (sys.argv) != 2):
print "Syntax:", sys.argv[0], " "
sys.exit (1)
# Dirty hack for UTF-8
source = open (sys.argv[1], "r")
utf8 = codecs.EncodedFile (source, "utf-8", "iso8859-1")
# Process
doc = minidom.parse (utf8)
res = clean (unxml (doc))
# Dirty hack for iso8859-1 output
stdout = codecs.getwriter ("iso8859-1")(sys.stdout, "replace")
#stdout = codecs.getwriter ("iso8859-1")(sys.stdout)
stdout.write (res)