#!/bin/sh
#
# download the AllInOne page of the release-manual as docbook
# and transform it into release-manual.xml
#
# very loosly based on the moinmoin2pdf script from Petter Reinholdtsen
#
# Author/Copyright:	Holger Levsen
# Licence:		GPL2+
# first edited:		2006-07-06
# last edited:		2007-11-14
# 
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

url="http://wiki.skolelinux.no/DebianEdu/Documentation/Etch/"
name="release-manual.xml"

DEBIAN_EDU_DOC_BUILDDATE=`date -u +%Y-%m-%d`
DEBIAN_EDU_DOC_TITLE="Debian Edu / Skolelinux Terra 3.0 Release Manual"

# the last but one sed "preserves" the 2nd matched regex
# the last sed does the same as dos2unix
GET "${url}AllInOne?action=raw"|sed "s%\[\[Include(%%g" | sed "s%)]]%%g" | sed "s%DebianEdu/Documentation/Etch%%g"|sed 's/.$//'> id

for i in `cat id` ; do
	TARGET=`echo "${i}" |sed "s/\(.*\)\/\(.*\)/\2/" `.xml
	echo "$TARGET		${url}${i}?action=format&mimetype=xml/docbook"
	# download the docbook version of the release manual from the wiki and pipe it through sed to
	#   - insert the build date
	#   - convert <code> tag to <computeroutput> as this is understood by docbook (tools)
	#   - the last sed command deletes the first 4 lines
	GET "${url}${i}?action=format&mimetype=xml/docbook" | 
	sed  "s%<para/>%%g"  |
	sed "s%<\/%\n<\/%g" |
	sed "s%<title>%\n<title>%g" |
	sed "s%<section>%\n\n<section>%g" |
	sed "s%<para>%\n<para>%g" |
	sed "s/\$DEBIAN_EDU_DOC_BUILDDATE/<code>$DEBIAN_EDU_DOC_BUILDDATE<\/code>/" |
	sed "s%code>%computeroutput>%g" |
	sed "s%/wiki/rightsidebar/img/%./images/%g" |
	sed "s%</article>%%" |
	sed '1,4d' > $TARGET
done

# now only keep the page name (equals section id) without path
# (replace with the second match of the regular expression)
sed -i "s/\(.*\)\/\(.*\)/\2/" id

# add id= to <section>s
for i in `cat id` ; do
	sed -i "0,/<section>/ s/<section>/<section id=\"$i\">/" ${i}.xml 
done

# paste it together
rm -f $name
for i in `cat id` ; do
	cat ${i}.xml >> $name
	rm ${i}.xml
done
rm id

# get images and modify $name
./get_images

# turn links into internal references if appropriate
# this needs to run after ./get_images
#
#  -0\777  read multiple lines
perl -0\777 -pi -e  "s/<ulink url='\/DebianEdu\/Documentation\/(.*)\/(.*)'>(.*)\n<\/ulink>/<link linkend=\"\2\">\3<\/link>/g" $name

# make it a docbook article again
sed -i "1,/</ s#<#<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE article PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\"><article><articleinfo><title>$DEBIAN_EDU_DOC_TITLE</title></articleinfo>\n<#" $name
sed -i "$ s#>#>\n</article>#" $name
# remove the first empty line
sed -i "1d" $name

# motivate
echo `GET "${url}AllInOne?action=format&mimetype=text"|grep -i FIXME |wc -l` FIXMEs left to fix

