Move database related code from backends to db
[mining-tools:mlstats.git] / pymlstats / db / mysql.py
1 #-*- coding:utf-8 -*-
2 # Copyright (C) 2007-2010 Libresoft Research Group
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 #
18 # Authors :
19 #       Israel Herraiz <herraiz@gsyc.escet.urjc.es>
20 #       Germán Poo-Caamaño <gpoo@gnome.org>
21
22 """
23 This module contains a basic SQL wrapper. It uses the standard
24 database API of Python, so any module may be used (just substitute
25 import MySQLdb for any other, for instance import PyGreSQL).
26
27 @authors:      Israel Herraiz
28 @organization: Libresoft Research Group, Universidad Rey Juan Carlos
29 @copyright:    Universidad Rey Juan Carlos (Madrid, Spain)
30 @license:      GNU GPL version 2 or any later version
31 @contact:      libresoft-tools-devel@lists.morfeo-project.org
32 """
33
34 import sys
35 import pprint
36 import MySQLdb as dbapi
37
38 from pymlstats.db import datamodel
39 from pymlstats.database import GenericDatabase
40
41
42 class Database(GenericDatabase):
43
44     def __init__(self, dbname='', username='', password='', hostname='',
45                  admin_user=None, admin_password=None):
46         GenericDatabase.__init__(self)
47
48         self.name = dbname
49         self.user = username
50         self.password = password
51         self.admin_user = admin_user
52         self.admin_password = admin_password
53         self.host = hostname or 'localhost'
54
55     def connect(self):
56         check_options_msg = "Please check the --db-user and " \
57                             "--db-password command line options"
58
59         try:
60             db = dbapi.connect(self.host, self.user,
61                                self.password, self.name,
62                                charset='utf8', use_unicode=True)
63         except dbapi.OperationalError, e:
64
65             # Check the error number
66             errno = e.args[0]
67             if 1045 == errno:  # Unknown or unauthorized user
68                 msg = e.args[1]
69                 print >>sys.stderr, msg
70                 print >>sys.stderr, check_options_msg
71                 sys.exit(2)
72             elif 1044 == errno:  # User can not access database
73                 msg = e.args[1]
74                 print >>sys.stderr, msg
75                 print >>sys.stderr, check_options_msg
76                 sys.exit(2)
77             elif 1049 == errno:  # Unknown database
78                 # Database does not exist
79                 # So create it
80                 try:
81                     db = dbapi.connect(self.host, self.admin_user,
82                                        self.admin_password, '',
83                                        charset='utf8', use_unicode=True)
84                 except dbapi.OperationalError, e:
85                     errno = e.args[0]
86
87                     if 1045 == errno:  # Unauthorized user
88                         msg = e.args[1]
89                         print >>sys.stderr, msg
90                         print >>sys.stderr, check_options_msg
91                         sys.exit(1)
92                     else:  # Unknown exception
93                         message = """ERROR: Runtime error while trying to
94                         connect to the database. Error number is '%s'.
95                         Original message is '%s'. I don't know how to
96                         continue after this failure. Please report the
97                         failure.""" % (str(e.args[0]), str(e.args[1]))
98
99                         # Write message to the stderr
100                         print >> sys.stderr, message
101                         sys.exit(1)
102
103                 cursor = db.cursor()
104                 query = 'CREATE DATABASE %s' % self.name
105                 cursor.execute(query)
106                 query = 'USE %s' % self.name
107                 cursor.execute(query)
108                 for query in datamodel.data_model_query_list:
109                     cursor.execute(query)
110                 db.commit()
111
112                 # Database created, now reconnect
113                 # If this point has passed the exceptions catching,
114                 # it should work
115                 db = dbapi.connect(self.host, self.user,
116                                    self.password, self.name,
117                                    charset='utf8', use_unicode=True)
118             else:  # Unknown exception
119                 message = """ERROR: Runtime error while trying to connect to
120                 the database. Error number is '%s'. Original
121                 message is '%s'. I don't know how to continue
122                 after this failure. Please report the
123                 failure.""" % (str(e.args[0]), str(e.args[1]))
124
125                 # Write message to the stderr
126                 print >> sys.stderr, message
127                 sys.exit(1)
128
129         GenericDatabase.connect(self, db)
130
131     def insert_people(self, name, email, mailing_list_url):
132         try:
133             top_level_domain = email.split(".")[-1]
134         except IndexError:
135             top_level_domain = ''
136         try:
137             username, domain_name = email.split('@')
138         except ValueError:
139             username, domain_name = ('', '')
140
141         query_people = '''INSERT INTO people
142                                       (email_address, name, username,
143                                        domain_name, top_level_domain)
144                           VALUES (%s, %s, %s, %s, %s);'''
145         from_values = [email, name, username, domain_name, top_level_domain]
146         try:
147             self.write_cursor.execute(query_people, from_values)
148         except dbapi.IntegrityError:
149             pass
150         except dbapi.DataError:
151             pprint.pprint(from_values)
152
153         query_mailing_lists_people = '''INSERT INTO mailing_lists_people
154                                         (email_address, mailing_list_url)
155                                         VALUES (%s, %s);'''
156         mailing_lists_people_values = [email, mailing_list_url]
157         try:
158             self.write_cursor.execute(query_mailing_lists_people,
159                                       mailing_lists_people_values)
160         except dbapi.IntegrityError:
161             # Duplicate entry email address-mailing list url
162             pass
163         except dbapi.DataError:
164             pprint.pprint(mailing_lists_people_values)
165
166     def store_messages(self, message_list, mailing_list_url):
167         query = 'SET FOREIGN_KEY_CHECKS = 0;'
168         self.write_cursor.execute(query)
169
170         stored_messages = 0
171         query_message = '''INSERT INTO messages (
172                                    message_id, is_response_of,
173                                    arrival_date, first_date, first_date_tz,
174                                    mailing_list, mailing_list_url,
175                                    subject, message_body)
176                            VALUES (%(message-id)s, %(in-reply-to)s,
177                                    %(received)s, %(date)s, %(date_tz)s,
178                                    %(list-id)s, %(mailing_list_url)s,
179                                    %(subject)s, %(body)s);'''
180         query_m_people = '''INSERT INTO messages_people
181                                (email_address, type_of_recipient, message_id)
182                             VALUES (%s, %s, %s);'''
183
184         for m in message_list:
185             values = m
186             values['mailing_list_url'] = mailing_list_url
187
188             # FIXME: If primary key check fails, ignore and continue
189             msgs_people_value = {}
190             for header in ('from', 'to', 'cc'):
191                 addresses = self.filter(m[header])
192                 if not addresses:
193                     continue
194
195                 for name, email in addresses:
196                     self.insert_people(name, email, mailing_list_url)
197                     key = '%s-%s' % (header, email)
198                     value = (email, header.capitalize(), m['message-id'])
199                     msgs_people_value.setdefault(key, value)
200
201             # Write the rest of the message
202             try:
203                 self.write_cursor.execute(query_message, values)
204             except dbapi.IntegrityError:
205                 # Duplicated message
206                 stored_messages -= 1
207             except:
208                 error_message = """ERROR: Runtime error while trying to write
209                 message with message-id '%s'. That message has not been written
210                 to the database, but the execution has not been stopped. Please
211                 report this failure including the message-id and the URL for
212                 the mbox.""" % m['message-id']
213
214                 stored_messages -= 1
215                 # Write message to the stderr
216                 print >> sys.stderr, error_message
217
218             for key, values in msgs_people_value.iteritems():
219                 try:
220                     self.write_cursor.execute(query_m_people, values)
221                 except dbapi.IntegrityError:
222                     # Duplicate entry email_address-to|cc-mailing list url
223                     pass
224             self.dbobj.commit()
225             stored_messages += 1
226
227         # Check that everything is consistent
228         query = 'SET FOREIGN_KEY_CHECKS = 1;'
229         self.write_cursor.execute(query)
230         self.dbobj.commit()
231
232         return stored_messages