1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
u"""
4
5
Mangafox-DL
6
-----------
7
8
Downloads mangas from MangaFox_ to the local drive as images, and optionnaly
9
creates a PDF with these images.
10
11
.. _MangaFox: http://www.mangafox.com
12
13
The PDF is in A4-size, so it's best printed in "leaflet" mode.
14
15
Dependencies:
16
17
- `BeautifulSoup <http://www.crummy.com/software/BeautifulSoup>`_
18
- `ReportLab <http://www.reportlab.com/software/opensource>`_
19
- `PIL <http://www.pythonware.com/products/pil>`_
20
21
22
.. :Authors:
23
       Aurélien Bompard <aurelien@bompard.org> <http://aurelien.bompard.org>
24
25
.. :License:
26
       GNU GPL v3 or later
27
28
"""
29
30
import os
31
import sys
32
import urllib
33
import urllib2
34
import urlparse
35
import re
36
from optparse import OptionParser
37
38
from BeautifulSoup import BeautifulSoup
39
from reportlab.pdfgen import canvas
40
from reportlab.lib.pagesizes import A4
41
import PIL.Image
42
43
BASE_URL = "http://www.mangafox.com/manga"
44
45
46
class MangaError(Exception): pass
47
48
class Manga(object):
49
    """
50
    This is the main Manga object, which contains Chapter instances. It's
51
    mainly a container object.
52
    """
53
54
    def __init__(self, name):
55
        self.name = name
56
        self.url = "%s/%s/" % (BASE_URL, name)
57
        self.subdir = os.path.join(options.output, name)
58
        self._chapters = None
59
60
    def _get_chapters(self):
61
        if self._chapters is not None:
62
            return self._chapters
63
        main_index = os.path.join(self.subdir, "index.txt")
64
        if os.path.exists(main_index):
65
            chapurls = self._get_chapter_urls_index()
66
        else:
67
            chapurls = self._get_chapter_urls_web()
68
        self._chapters = []
69
        for chapter_url in chapurls:
70
            chapter = Chapter(self, chapter_url)
71
            self._chapters.append(chapter)
72
        self.filter_chapters(options.volume, options.chapter)
73
        self._chapters.sort()
74
        return self._chapters
75
    chapters = property(_get_chapters)
76
77
    def _get_chapter_urls_index(self):
78
        urllist = []
79
        main_index = os.path.join(self.subdir, "index.txt")
80
        chapfile = open(main_index)
81
        for line in chapfile:
82
            volnum, chapnum = line.strip().split(" ")
83
            urllist.append("v%s/c%s" % (volnum, chapnum))
84
        return urllist
85
86
    def _get_chapter_urls_web(self):
87
        main_page = BeautifulSoup(urllib2.urlopen(self.url))
88
        listing = main_page.find("table", attrs={"id":"listing"})
89
        if not listing:
90
            raise MangaError("Error reading the web page")
91
        urllist = []
92
        for chaplink in listing.findAll("a", attrs={"class": "chico"}):
93
            urllist.append(urlparse.urljoin(self.url, chaplink["href"]))
94
        return urllist
95
96
    def filter_chapters(self, volnum=None, chapnum=None):
97
        for chapter in self._chapters[:]:
98
            if volnum and chapter.volnum != volnum:
99
                if options.debug:
100
                    print "found volume %s but you asked for %s, skipping." \
101
                            % (chapter.volnum, volnum)
102
                self._chapters.remove(chapter)
103
                continue
104
            if chapnum and chapter.chapnum != chapnum:
105
                if options.debug:
106
                    print "found chapter %s but you asked for %s, skipping." \
107
                            % (chapter.chapnum, chapnum)
108
                self._chapters.remove(chapter)
109
                continue
110
111
    def download(self):
112
        if len(self.chapters) == 0:
113
            print "No chapter matching your selection !"
114
            return
115
        for chapter in self.chapters:
116
            chapter.download()
117
118
    def make_pdf(self):
119
        if len(self.chapters) == 0:
120
            print "No chapter matching your selection !"
121
            return
122
        print "Building PDF..."
123
        self.page_size = A4
124
        self.margin = 0
125
        #self.move_down = 35
126
        self.move_down = 0
127
        if options.volume:
128
            if options.chapter:
129
                pdf_filename = "%s-%s-%s.pdf" % (self.name.title(),
130
                                    options.volume, options.chapter)
131
                pdf_title = "%s - vol. %s, chap. %s" % (self.name.title(),
132
                                    options.volume, options.chapter)
133
            else:
134
                pdf_filename = "%s-%s.pdf" % (self.name.title(), options.volume)
135
                pdf_title = "%s - vol. %s" % (self.name.title(), options.volume)
136
        else:
137
            pdf_filename = "%s.pdf" % self.name.title()
138
            pdf_title = self.name.title()
139
        self.pdf = canvas.Canvas(os.path.join(self.subdir, pdf_filename),
140
                                 pagesize=self.page_size)
141
        self.pdf.setTitle(pdf_title)
142
        for chapter in self.chapters:
143
            chapter.add_to_pdf(self.pdf)
144
        self.pdf.save()
145
146
147
class Chapter(object):
148
    """
149
    This is a chapter objects, contained in a Manga object. It "knows" how to
150
    download itself from MangaFox and how to append itself to the PDF (if
151
    requested)
152
153
    We use an ``index.txt`` file to store the order of the images. This is an
154
    optimisation to allow generating a PDF when the images are already
155
    downloaded, without having to go back to MangaFox and parse the webpages.
156
    """
157
158
    def __init__(self, manga, url):
159
        self.manga = manga
160
        self.url = url
161
        _suburl = self.url.replace(self.manga.url, "")
162
        self.volnum = _suburl.split("/")[0][1:]
163
        self.chapnum = _suburl.split("/")[1][1:]
164
        self.subdir = os.path.join(self.manga.subdir, self.volnum, self.chapnum)
165
        self.title = "Volume %s, chapter %s" % (self.volnum, self.chapnum)
166
        self._pagenums = None
167
        self._pages = {}
168
169
    def __repr__(self):
170
        return "<Chapter %s, volume %s of %s>" \
171
                    % (self.chapnum, self.volnum, self.manga.name)
172
173
    def __cmp__(self, other):
174
        if not isinstance(other, Chapter):
175
            raise ValueError("Only comparasion between Chapter instances is supported")
176
        if self.volnum != other.volnum:
177
            return cmp(self.volnum, other.volnum)
178
        return cmp(self.chapnum, other.chapnum)
179
180
    def _get_pages(self):
181
        if self._pages:
182
            return self._pages
183
        index_path = os.path.join(self.subdir, "index.txt")
184
        if os.path.exists(index_path): # index found, use it
185
            self._pages = {}
186
            index = open(index_path, "r")
187
            for line in index:
188
                key, value = line.strip().split(" ")
189
                self._pages[key] = value
190
            index.close()
191
        return self._pages
192
    pages = property(_get_pages)
193
194
    def _get_pagenums(self):
195
        if self._pagenums is not None:
196
            return self._pagenums
197
        index_path = os.path.join(self.subdir, "index.txt")
198
        if os.path.exists(index_path): # index found, use it
199
            self._pagenums = []
200
            index = open(index_path, "r")
201
            for line in index:
202
                self._pagenums.append(line.split(" ")[0])
203
            index.close()
204
        else: # index not found, parse the webpage
205
            chapter_page = BeautifulSoup(urllib2.urlopen(self.url))
206
            pages_select = chapter_page.find("div", attrs={"class": "right middle"}) \
207
                                         .find("select").findAll("option")
208
            self._pagenums = [ o["value"] for o in pages_select ]
209
        return self._pagenums
210
    pagenums = property(_get_pagenums)
211
212
    def download(self):
213
        if not os.path.exists(self.subdir):
214
            os.makedirs(self.subdir)
215
        for pagenum in self.pagenums:
216
            sys.stdout.write("\r%s, page %s / %s" 
217
                             % (self.title, pagenum, len(self.pagenums)))
218
            sys.stdout.flush()
219
            if pagenum in self.pages.keys():
220
                local_filename = os.path.join(self.subdir, self.pages[pagenum])
221
                if os.path.exists(local_filename):
222
                    continue # already downloaded
223
            page_url = "%s%s.html" % (self.url, pagenum)
224
            page = BeautifulSoup(urllib2.urlopen(page_url))
225
            img = page.find("img", attrs={"id": "image"})
226
            img_url = img["src"]
227
            filename = os.path.basename(img_url)
228
            local_filename = os.path.join(self.subdir, filename)
229
            if not os.path.exists(local_filename):
230
                urllib.urlretrieve(img_url, local_filename)
231
            index = open(os.path.join(self.subdir, "index.txt"), "a")
232
            index.write("%s %s\n" % (pagenum, filename))
233
            index.close()
234
            self.pages[pagenum] = filename
235
        print
236
237
    def add_to_pdf(self, pdf):
238
        outlinekey = str("v%s-c%s" % (self.volnum, self.chapnum))
239
        pdf.bookmarkPage(outlinekey)
240
        pdf.addOutlineEntry(self.title, outlinekey)
241
        for pagenum in self.pagenums:
242
            local_filename = os.path.join(self.subdir, self.pages[pagenum])
243
            try:
244
                self.add_image_to_pdf(local_filename, pdf)
245
            except IOError, e:
246
                print >>sys.stderr, "Error adding file %s: %s" \
247
                                    % (local_filename, e)
248
                continue
249
            pdf.showPage()
250
        #if len(self.pages) % 2 != 0:
251
        #    pdf.showPage()
252
253
    def add_image_to_pdf(self, filename, pdf):
254
        img_pil = PIL.Image.open(filename)
255
        img_height = self.manga.page_size[1] - 2 * self.manga.margin \
256
                     + self.manga.move_down
257
        img_width = int(img_pil.size[0] * img_height / float(img_pil.size[1]))
258
        img_y = self.manga.margin - self.manga.move_down
259
        img_x = (self.manga.page_size[0] - img_width) / 2
260
        if img_x < self.manga.margin: # Damn ! resize by the other dimension
261
            img_width = self.manga.page_size[0] - 2 * self.manga.margin
262
            img_height = int(img_pil.size[1] * img_width / float(img_pil.size[0]))
263
            img_x = self.manga.margin
264
            img_y = (self.manga.page_size[1] - img_height \
265
                        + self.manga.move_down) / 2
266
        #pdf.drawInlineImage(filename, img_x, img_y, img_width, img_height) # raises AttributeError
267
        pdf.drawInlineImage(img_pil, img_x, img_y, img_width, img_height)
268
269
270
def parse_opts():
271
    usage = "%prog [-o directory] [-v volume] [-c chapter] [-p] [-d] name"
272
    parser = OptionParser(usage=usage)
273
    parser.add_option("-o", "--output", dest="output", default=".",
274
                      help="Download in this directory (default: current dir)")
275
    parser.add_option("-v", "--volume", dest="volume",
276
                      help="Only download this volume")
277
    parser.add_option("-c", "--chapter", dest="chapter",
278
                      help="Only download this chapter")
279
    parser.add_option("-p", "--pdf", dest="pdf", action="store_true",
280
                      help="Make a PDF")
281
    parser.add_option("-d", "--debug", dest="debug", action="store_true",
282
                      help="Print more stuff")
283
    options, args = parser.parse_args()
284
    if len(args) != 1:
285
        parser.error("You must give a manga name, as found in the Mangafox URL")
286
    return options, args[0]
287
288
289
def main():
290
    global options, subdir
291
    options, manga_name = parse_opts()
292
    manga = Manga(manga_name)
293
    try:
294
        manga.download()
295
    except MangaError, e:
296
        print >>sys.stderr, e
297
        sys.exit(1)
298
    except KeyboardInterrupt:
299
        print >>sys.stderr, "Aborted by user"
300
        sys.exit(0)
301
    if options.pdf:
302
        manga.make_pdf()
303
304
305
if __name__ == "__main__":
306
    main()