Edgewall Software

Ticket #5241: mediawiki2trac.py

File mediawiki2trac.py, 4.9 KB (added by jason.dusek@…, 4 years ago)

This script handles revisions, User: pages and Talk: pages. It exports to SQL.

Line 
1#!/usr/bin/python
2"""
3  This script is provided AS IS, without any warranty!
4  Copyright lio@lunesu.com, placed in the public domain
5  Modified by Koen Werdler @ 01-05-2007
6  Modified by Jason Dusek, 2008-07-14Z
7 
8  Requirements:
9 .  python 2.5
10 .  python-mysqldb
11
12  This script dumps every version of every page as an INSERT statement that
13  is compatible with the SQLite Trac schema (don't know about other
14  databases). It omits some functionality, like recognizing edits by IP
15  address users. It does not handle images. It separates `User:` and `Talk:`
16  pages from the rest of the bunch and handles them appropriately.
17
18  You may set the connection parameters towards the bottom of the script
19  (following "main program").
20                                                                           """
21
22import os
23import sys
24import time
25import string
26import _mysql
27
28
29
30
31def q(s):
32    return "'" + s.replace("'", "''") + "'"
33
34def time_fixer(mw_time):
35    """ convert from mediawiki dates to epoch seconds """
36    t = time.mktime(time.strptime(mw_time, "%Y%m%d%H%M%S"))
37    return int(t)
38
39def text_fixer(mw_text):
40    """ convert from mediawiki text to trac text """
41    mw_text = mw_text.replace("\n***","\n   *")
42    mw_text = mw_text.replace("\n**", "\n  *")
43    mw_text = mw_text.replace("\n*",  "\n *")
44    mw_text = mw_text.replace("[[","wiki:")
45    mw_text = mw_text.replace("]]","")
46    mw_text = mw_text.replace("<br>","[[BR]]")
47    mw_text = mw_text.replace("\n:","\n ")
48    return q(mw_text)
49
50def title_fixer(namespace, title):
51    if namespace is 0:
52        return q(title)
53    if namespace is 1:
54        return q('Talk:' + title)
55    if namespace is 2:
56        return q('User:' + title)
57
58def comment_fixer(mw_comment):
59    if mw_comment == '':
60        return 'NULL'
61    else:
62        return q(mw_comment)
63
64def row_writer(row):
65    tuple = ",\n\t".join([
66        title_fixer(int(row[-2]), row[0]),
67        str(row[1]),
68        str(time_fixer(row[2])),
69        q(row[3].lower()),
70        "'127.0.0.1'",
71        text_fixer(row[4]),
72        comment_fixer(row[5]),
73        'NULL'
74        ])
75    return "INSERT INTO wiki VALUES\n(\t" + tuple + "\n);"
76
77"""
78  Trac's "wiki" table is layed out as follows:
79
80CREATE TABLE wiki (
81    name text,
82    version integer,
83    time integer,
84    author text,
85    ipnr text,
86    text text,
87    comment text,
88    readonly integer,
89    UNIQUE (name,version)
90);
91                                                                           """
92
93
94
95
96"""
97  Failings of this query:
98 .  Does not handle "IP address" (anonymous) users.
99 .  Does not handle User_talk pages.
100                                                                           """
101query = string.Template("""
102    SELECT
103        ${p}page.page_title,
104        ${p}revision.rev_timestamp,
105        ${p}user.user_name,
106        ${p}text.old_text,
107        ${p}revision.rev_comment,
108        ${p}page.page_namespace,
109        ${p}revision.rev_page
110      FROM
111        ${p}page,
112        ${p}revision,
113        ${p}user,
114        ${p}text
115      WHERE
116        ${p}page.page_id = ${p}revision.rev_page
117       AND
118        ${p}revision.rev_user = ${p}user.user_id
119       AND
120        ${p}revision.rev_text_id = ${p}text.old_id
121       AND
122        ${p}page.page_namespace <3
123      ORDER BY
124        ${p}page.page_namespace,
125        ${p}revision.rev_page,
126        ${p}revision.rev_id
127      ;
128    """).safe_substitute({
129        "p" : ""
130        })
131"""
132  Peruse these MediaWiki config file variables:
133 .  $wgDBprefix
134                                                                           """
135
136
137
138
139
140
141
142
143"""
144                  _
145                 (_)
146    ____   _____  _  ____     ____    ____   ___    ____   ____  _____  ____
147   |    \ (____ || ||  _ \   |  _ \  / ___) / _ \  / _  | / ___)(____ ||    \
148   | | | |/ ___ || || | | |  | |_| || |    | |_| |( (_| || |    / ___ || | | |
149   |_|_|_|\_____||_||_| |_|  |  __/ |_|     \___/  \___ ||_|    \_____||_|_|_|
150                             |_|                  (_____|                     
151
152                                                                           """
153
154def the_end():
155    db.close()
156    sys.exit()
157
158res = ()
159row = ()
160def set_row():
161    global res
162    global row
163    res = rs.fetch_row()
164    if res == ():
165        the_end()        ## !! This is the point at which we exit the program.
166    row = res[0]
167
168cur = ()
169ver = ()
170def reset_versioning():
171    global cur
172    global ver
173    cur = row[-1]
174    ver = 0
175
176db = _mysql.connect(
177     "localhost",
178     "wikiuser",
179     "wikipass",
180     "wikidb",
181     )
182"""
183  Peruse these MediaWiki config file variables:
184 .  $wgDBserver
185 .  $wgDBname
186 .  $wgDBuser
187 .  $wgDBpassword
188 .  $wgDBport
189                                                                           """
190
191db.query(query)
192rs = db.use_result()
193set_row()
194reset_versioning()
195while True:
196    while row[-1] == cur:
197        ver += 1
198        print row_writer(row[0:1] + (ver,) + row[1:])
199        set_row()
200    reset_versioning()
201