#!/usr/bin/python
"""
  This script is provided AS IS, without any warranty!
  Copyright lio@lunesu.com, placed in the public domain
  Modified by Koen Werdler @ 01-05-2007
  Modified by Jason Dusek, 2008-07-15Z
 
  Requirements:
 .  python 2.5
 .  python-mysqldb

  This script dumps every version of every page as an INSERT statement that
  is compatible with the SQLite Trac schema (don't know about other
  databases). It omits some functionality, like recognizing edits by IP
  address users.

  You can set the connection parameters towards the bottom of the script
  (following "main program").
                                                                           """

import re
import os
import sys
import time
import string
import _mysql




def q(s):
    return "'" + s.replace("'", "''") + "'"

def time_fixer(mw_time):
    """ convert from mediawiki dates to epoch seconds """
    t = time.mktime(time.strptime(mw_time, "%Y%m%d%H%M%S"))
    return int(t)

pairs = [
    ("\n***","\n   *"),
    ("\n**", "\n  *"),
    ("\n*",  "\n *"),
    ("<br>","[[BR]]"),
    ("\n:","\n "),
    ]

wiki_link_catcher = re.compile(r"""
    \[\[
      \s*
        ([^|\[\]]+?)        ##  non-greedy `+?` to chop trailing space
      \s*
        (\|
          \s*
            ([^\[\]]+?)     ##  non-greedy `+?` to chop trailing space
          \s*
        )?
    \]\]
    """, 
    re.VERBOSE,
    )

def link_rewriter(match):
    (link, label) = match.group(1, 3)
    def wrap(a, b=()):
        return '[wiki:' + a.replace(' ', '_') + ' ' + (b or a) + ' ]'
    if link.startswith("Image:"):
        return '[[Image(wiki:Image/' + link[6:] + ':' + link[6:] + ')]]'
    return wrap(link, label) 

def text_fixer(mw_text):
    """ convert from mediawiki text to trac text """
    for (mw, tw) in pairs:
        mw_text = mw_text.replace(mw, tw)
    return q(wiki_link_catcher.sub(link_rewriter, mw_text))

def title_fixer(namespace, title):
    if namespace is 0:
        return q(title)
    if namespace is 1:
        return q('Talk:' + title)
    if namespace is 2:
        return q('User:' + title)
    if namespace is 6:
        return q('Image/' + title)

def comment_fixer(mw_comment):
    if mw_comment == '':
        return 'NULL'
    else:
        return q(mw_comment)

def row_writer(row):
    inserts = [(
        'wiki',
        [   title_fixer(int(row[-2]), row[0]),
            str(row[1]),
            str(time_fixer(row[2])),
            q(row[3]),
            "'127.0.0.1'",
            text_fixer(row[4]),
            comment_fixer(row[5]),
            'NULL'
        ])]
    if inserts[0][1][0].startswith("'Image/"):  ##  mind the quote
        inserts += [(
            'attachment',
            [   q('wiki'), 
                inserts[0][1][0],
                "'" + inserts[0][1][0][7:],     ##  mind the quote
                str(row[-3]),
                inserts[0][1][2],
                inserts[0][1][6],
                inserts[0][1][3],
                "'127.0.0.1'",
            ])]
    def ins(stuff):
        (t, e) = stuff
        return "INSERT INTO " + t + " VALUES\n(\t" + ",\n\t".join(e) + "\n);"
    return "\n".join([ ins(insert) for insert in inserts ]) 

"""
  The tables in the Trac schema that we are trying to fake:

CREATE TABLE wiki (
    name text,
    version integer,
    time integer,
    author text,
    ipnr text,
    text text,
    comment text,
    readonly integer,
    UNIQUE (name,version)
);

CREATE TABLE attachment (
    type text,
    id text,
    filename text,
    size integer,
    time integer,
    description text,
    author text,
    ipnr text,
    UNIQUE (type,id,filename)
);
                                                                           """




"""
  Failings of this query:
 .  Does not handle "IP address" (anonymous) users.
 .  Does not handle User_talk pages. 
                                                                           """
query = string.Template("""
    SELECT
        ${p}page.page_title,
        ${p}revision.rev_timestamp,
        ${p}user.user_name,
        ${p}text.old_text,
        ${p}revision.rev_comment,
        /* the 'meta' elements follow */
        ${p}image.img_size,
        ${p}page.page_namespace,
        ${p}revision.rev_page
      FROM
        ${p}page,
        ${p}revision,
        ${p}user,
        ${p}text
      LEFT JOIN ${p}image ON
        ${p}page.page_title = ${p}image.img_name
      WHERE
        ${p}page.page_id = ${p}revision.rev_page
       AND
        ${p}revision.rev_user = ${p}user.user_id
       AND
        ${p}revision.rev_text_id = ${p}text.old_id
       AND
        ( ${p}page.page_namespace <3
         OR
          ${p}page.page_namespace = 6
         )
      ORDER BY
        ${p}page.page_namespace,
        ${p}revision.rev_page,
        ${p}revision.rev_id
      ;
    """).safe_substitute({
        "p" : ""
        })
"""
  Peruse these MediaWiki config file variables:
 .  $wgDBprefix
                                                                           """








"""
                  _
                 (_)
    ____   _____  _  ____     ____    ____   ___    ____   ____  _____  ____
   |    \ (____ || ||  _ \   |  _ \  / ___) / _ \  / _  | / ___)(____ ||    \
   | | | |/ ___ || || | | |  | |_| || |    | |_| |( (_| || |    / ___ || | | |
   |_|_|_|\_____||_||_| |_|  |  __/ |_|     \___/  \___ ||_|    \_____||_|_|_|
                             |_|                  (_____|                     

                                                                           """

def the_end():
    db.close()
    sys.exit()

res = ()
row = ()
def set_row():
    global res
    global row
    res = rs.fetch_row()
    if res == ():
        the_end()        ## !! This is the point at which we exit the program.
    row = res[0]

cur = ()
ver = ()
def reset_versioning():
    global cur
    global ver
    cur = row[-1]
    ver = 0

db = _mysql.connect(
     "localhost",
     "wikiuser",
     "wuwuwuwu",
     "wikidb",
     port=5432,
     )
"""
  Peruse these MediaWiki config file variables:
 .  $wgDBserver
 .  $wgDBname
 .  $wgDBuser
 .  $wgDBpassword
 .  $wgDBport
                                                                           """

db.query(query)
rs = db.use_result()
set_row()
reset_versioning()
while True:
    while row[-1] == cur:
        ver += 1
        print row_writer(row[0:1] + (ver,) + row[1:])
        set_row()
    reset_versioning()


