Fill in the config section at the top of this shell and run it. You must have created a DokuWiki site and have access to the MediaWiki database.
#!/bin/bash
# dokuwiki home
DW_HOME="/home/user/www/dbadocs"
#Mediawiki database connection
WIKIDB="user_dbawiki"
WIKIPASS="xxxxxx"
#where to store the extracted and converted pages
DEST="mediawiki_pages"
# program that does the conversion from MW to DW
PHARSER="./mediawiki2dokuwiki_parser.sh"
# --------------------------------------
# Thats it, nothing to change after here
# --------------------------------------
if [[ ! -d ${DW_HOME}/data/pages ]]; then
echo "No valid DokuWiki installation found at ${DW_HOME}, please check."
exit 1
fi
if [[ ! -d "${DEST}" ]]; then
mkdir -p "${DEST}"
if [[ $? -ne 0 ]]; then
echo "Could not create directory ${DEST}. Are you sure this is where you wanted to store the MediaWiki pages?"
exit 1
fi
fi
if [[ ! -e "${PHARSER}" ]]; then
echo "The conversion program ${PHARSER} is not executable (or just not there)"
fi
# fetch a list of all page titles in MediaWiki
titles=$(mysql --password=$WIKIPASS $WIKIDB -e 'select page_title from page;')
# for each page title found in database
for baretitle in $titles; do
echo "Processing ${baretitle}..."
# change any unusable characters
cleantitle=$(echo "${baretitle}" | sed 's/\//_/g' | sed 's/[()!#]//g')
lowertitle=$(echo "${cleantitle}" | tr "[:upper:]" "[:lower:]")
# single quotes in baretitle must be escaped
baretitle=$(echo "${baretitle}"|sed -e "s/'/\'/g")
# fetch the contents of this page title and write it out to the destination directory with a '.mw' file extension
mysql --password=$WIKIPASS $WIKIDB -e \
"SELECT old_text FROM revision,page,text WHERE revision.rev_id=page.page_latest AND text.old_id=revision.rev_text_id and page_title = '$baretitle';" \
| sed s/'\\n'/\\n/g | grep -v old_text > "${DEST}/${cleantitle}.mw"
# process the mediawiki file and return it as a dokuwiki one putting it in the destination directory with a '.txt' file extension
$PHARSER "${DEST}/${cleantitle}.mw" "${DW_HOME}/data/pages/${lowertitle}.txt"
done
mv "${DW_HOME}/data/pages/main_page.txt" "${DW_HOME}/data/pages/start.txt"
# suspiciously short pages
for f in $DEST/*; do
[ $(cat $f | wc -w) -lt 25 ] && \
{ echo "Deleting $f, too short"; echo rm -f $f;}
done
echo ""
echo "Done. Put the contents of $DEST to Path_Of_dokuwiki/data/pages/"
This is called from the shell script for each extracted page
#! /bin/sh
# Mediawiki2Dokuwiki Converter
# originally by Johannes Buchner <buchner.johannes [at] gmx.at>
# License: GPL (http://www.gnu.org/licenses/gpl.txt)
# Modified by Stuart Barkley to:
# - ensure MediaWiki pages with a single quote in the title are treated correctly
# - wrap <pre> and </pre> tags around indented MediaWiki code so that the blocks are processed correctly later.
# - [[url links]] are treated differently from [[page links]]
sourcefile="$1"
destfile="$2"
# Headings
cat "$sourcefile" | \
perl -pe 's/^[ ]*=([^=])/<h1> ${1}/g' | \
perl -pe 's/([^=])=[ ]*$/${1} <\/h1>/g' | \
perl -pe 's/^[ ]*==([^=])/<h2> ${1}/g' | \
perl -pe 's/([^=])==[ ]*$/${1} <\/h2>/g' | \
perl -pe 's/^[ ]*===([^=])/<h3> ${1}/g' | \
perl -pe 's/([^=])===[ ]*$/${1} <\/h3>/g' | \
perl -pe 's/^[ ]*====([^=])/<h4> ${1}/g' | \
perl -pe 's/([^=])====[ ]*$/${1} <\/h4>/g' | \
perl -pe 's/^[ ]*=====([^=])/<h5> ${1}/g' | \
perl -pe 's/([^=])=====[ ]*$/${1} <\/h5>/g' | \
perl -pe 's/^[ ]*======([^=])/<h6> ${1}/g' | \
perl -pe 's/([^=])======[ ]*$/${1} <\/h6>/g' \
> mediawiki1
cat mediawiki1 | \
perl -pe 's/<\/?h1>/======/g' | \
perl -pe 's/<\/?h2>/=====/g' | \
perl -pe 's/<\/?h3>/====/g' | \
perl -pe 's/<\/?h4>/===/g' | \
perl -pe 's/<\/?h5>/==/g' | \
perl -pe 's/<\/?h6>/=/g' | \
cat > mediawiki2
rm -f mediawiki1
# anything that starts with a space that is outside preformatted code blocks is MediaWiki code
cat mediawiki2 |
perl -e '
my $in_pre_block = false;
my $pre_printed = false;
while (<>) {
$_ =~ s/ +$//;
if (m/\<pre\>/) {
$in_pre_block = true;
}
if (m/\<\/pre\>/) {
$in_pre_block = false;
}
if (m/^\ /) {
if ($in_pre_block eq true) {
print;
} else {
if ($pre_printed eq false) {
print "<pre>\n";
$pre_printed = true;
}
print;
}
} else {
if ($pre_printed eq true) {
print "</pre>\n";
$pre_printed = false;
}
print;
}
}
' > mediawiki3
rm -f mediawiki2
# lists
cat mediawiki3 |
perl -pe 's/^[\*#]{4}\*/ * /g' | \
perl -pe 's/^[\*#]{3}\*/ * /g' | \
perl -pe 's/^[\*#]{2}\*/ * /g' | \
perl -pe 's/^[\*#]{1}\*/ * /g' | \
perl -pe 's/^\*/ * /g' | \
perl -pe 's/^[\*#]{4}#/ \- /g' | \
perl -pe 's/^[\*\#]{3}\#/ \- /g' | \
perl -pe 's/^[\*\#]{2}\#/ \- /g' | \
perl -pe 's/^[\*\#]{1}\#/ \- /g' | \
perl -pe 's/^\#/ - /g' | \
cat > mediawiki4
rm -f mediawiki3
#[link] => [[link]]
cat mediawiki4 |
perl -pe 's/([^\[])\[([^\[])/${1}[[${2}/g' |
perl -pe 's/^\[([^\[])/[[${1}/g' |
perl -pe 's/([^\]])\]([^\]])/${1}]]${2}/g' |
perl -pe 's/([^\]])\]$/${1}]]/g' \
> mediawiki5
rm -f mediawiki4
#[[url text]] => [[url|text]]
cat mediawiki5 |
perl -pe 's/(\[\[http[^| \]]*) ([^|\]]*\]\])/${1}|${2}/g' \
> mediawiki6
rm -f mediawiki5
# bold, italic
cat mediawiki6 |
perl -pe "s/'''/**/g" |
perl -pe "s/''/\/\//g" \
> mediawiki7
rm -f mediawiki6
# talks
cat mediawiki7 |
perl -pe "s/^[ ]*:/>/g" |
perl -pe "s/>:/>>/g" |
perl -pe "s/>>:/>>>/g" |
perl -pe "s/>>>:/>>>>/g" |
perl -pe "s/>>>>:/>>>>>/g" |
perl -pe "s/>>>>>:/>>>>>>/g" |
perl -pe "s/>>>>>>:/>>>>>>>/g" \
> mediawiki8
rm -f mediawiki7
# preformatted code blocks
cat mediawiki8|
perl -pe "s/<pre>/<code>/g" |
perl -pe "s/<\/pre>/<\/code>/g" \
> mediawiki9
rm -f mediawiki8
cat mediawiki9 > "$destfile"
rm -f mediawiki9