0e89900 by Aurélien Bompard at 2010-04-17 1
#!/usr/bin/env python
2
# vim: set fileencoding=utf-8 tabstop=4 shiftwidth=4 expandtab smartindent:
3
4
u"""
7581434 by Aurélien Bompard at 2011-04-03 5
6
Files to Feed
7
-------------
8
0e89900 by Aurélien Bompard at 2010-04-17 9
Create an ATOM feed from the files in a directory
10
7581434 by Aurélien Bompard at 2011-04-03 11
.. :Authors:
12
      Aurélien Bompard <aurelien@bompard.org> <http://aurelien.bompard.org>
0e89900 by Aurélien Bompard at 2010-04-17 13
7581434 by Aurélien Bompard at 2011-04-03 14
.. :License:
15
      GNU GPL v3 or later
0e89900 by Aurélien Bompard at 2010-04-17 16
"""
17
18
import os
19
import sys
20
import socket
21
import datetime
22
import hashlib
23
import cgi
24
import urllib
25
import optparse
26
27
28
def main(directory, options):
29
    feed = open(options.output, "w")
30
31
    feed.write("""<?xml version="1.0" encoding="utf-8" ?>
32
<feed xmlns="http://www.w3.org/2005/Atom">
33
    <title>%(title)s</title>
34
    <link rel="self" href="%(address)s/%(feed_name)s" />
35
    <updated>%(date)s</updated>
36
    <id>%(address)s</id>
d755245 by Aurélien Bompard at 2010-07-17 37
""" % { "feed_name": os.path.basename(options.output),
38
        "title": options.title,
0e89900 by Aurélien Bompard at 2010-04-17 39
        "address": options.url,
40
        "date": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")})
41
42
    files_raw = []
43
    for root, dirs, cur_files in os.walk(directory):
44
        for subdir in dirs:
45
            # Exclude dirs
46
            if subdir in options.exclude or subdir.startswith("."):
47
                del dirs[dirs.index(subdir)]
48
                if options.debug:
49
                    print "excluding", subdir
50
                continue
51
            # Recurse into symlinks to dirs (included in python >= 2.6)
52
            if os.path.islink(os.path.join(root,subdir)):
53
                for subroot, subsubdirs, subcur_files in \
54
                        os.walk(os.path.join(root,subdir)):
55
                    for subsubdir in subsubdirs:
56
                        # Exclude dirs
57
                        if subsubdir in options.exclude \
58
                                or subsubdir.startswith("."):
59
                            del subsubdirs[subsubdirs.index(subsubdir)]
60
                            if options.debug:
61
                                print "excluding", subsubdir
62
                            continue
63
                    for file in subcur_files:
64
                        files_raw.append(os.path.join(subroot,file))
65
        for file in cur_files:
66
            files_raw.append(os.path.join(root,file))
67
68
    files = []
69
    for file in files_raw:
70
        if os.path.basename(file) in options.exclude \
71
                or os.path.basename(file).startswith("."):
72
            continue
73
        if os.path.basename(file) == os.path.basename(sys.argv[0]):
74
            continue
0665d75 by Aurélien Bompard at 2010-04-18 75
        if os.path.basename(file) == os.path.basename(options.output):
0e89900 by Aurélien Bompard at 2010-04-17 76
            continue
77
        try:
78
            date = os.path.getmtime(file)
79
            size = os.path.getsize(file)
80
            files.append( (file.replace(directory,""), date, size) )
81
        except OSError:
82
            print "WARNING: can't access file %s" % file
83
84
    files.sort(cmp=lambda x,y: cmp(x[1],y[1]), reverse=True)
85
86
    for file in files[:options.max]:
87
        id = hashlib.md5()
88
        id.update(file[0])
89
        id = id.hexdigest()
90
        feed.write("""    <entry>
91
        <title>%(basename)s</title>
92
        <updated>%(mtime)s</updated>
93
        <link href=\"%(address)s%(filename)s\" />
94
        <summary type=\"html\">&lt;p&gt;%(basename)s (%(size)sKo)&lt;br/&gt;In: %(path)s&lt;/p&gt;</summary>
95
        <id>tag:%(host)s:%(md5)s</id>
96
    </entry>
97
""" % { "basename": cgi.escape(os.path.basename(file[0])), 
98
        "mtime": datetime.datetime.fromtimestamp(file[1]).strftime("%Y-%m-%dT%H:%M:%SZ"),
99
        "host": options.hostname,
100
        "filename": urllib.quote(file[0]),
101
        "path": cgi.escape(os.path.dirname(file[0])),
102
        "size": int(file[2]) / 1024,
103
        "year": datetime.date.today().year,
104
        "md5": id,
105
        "address": options.url,
106
      })
107
108
    feed.write("</feed>\n")
109
    feed.close()
110
111
112
def parse_opts():
113
    usage = "usage: %prog [options] [directory]"
114
    parser = optparse.OptionParser(usage)
115
    parser.add_option("-o", "--output", dest="output", metavar="FILE",
116
                      default="files.xml",
117
                      help="write to this file (default: %default)")
118
    parser.add_option("-m", "--max", dest="max", default=50, type="int",
119
                      help="the maximum number of files to include in the "
120
                      "feed (default: %default)")
121
    parser.add_option("-t", "--title", dest="title", default="Files on %s",
122
          help="title for the feed. You can use %s to include the hostname.")
123
    parser.add_option("-x", "--exclude", dest="exclude", default="",
124
                      metavar="DIR1,DIR2,...", help="a comma-sparated list "
125
                      "of directories that should not be crawled")
126
    parser.add_option("-r", "--root", dest="root", help="the document root "
127
                      "(where the directory starts to be network-accessible")
128
    parser.add_option("--hostname", dest="hostname",
129
              default=socket.gethostname(),
130
              help="the hostname to make links point to (default: %default)")
131
    parser.add_option("--url-scheme", dest="url_scheme", default="http",
132
                      help="the URL method (default: %default)")
133
    parser.add_option("--url-port", dest="url_port",
134
                      help="the URL port, if not default for the chosen scheme")
135
    parser.add_option("-d", "--debug", dest="debug", action="store_true",
136
                      help="Debug mode")
137
    opts, args = parser.parse_args()
138
139
    if len(args) == 0:
140
        directory = os.getcwd()
141
    elif len(args) == 1:
142
        directory = args[0]
143
    else:
144
        parser.error("Only one feed can be given as argument")
145
    if not os.path.isdir(directory):
146
        parser.error("The directory '%s' does not exist" % directory)
147
    directory = os.path.abspath(directory)
148
149
    if opts.title.count("%"):
150
        opts.title = opts.title % opts.hostname
151
152
    opts.exclude = [ d.strip() for d in opts.exclude.split(",") ]
153
154
    if not opts.root:
155
        parser.error("You must provide a document root")
156
157
    url = [opts.url_scheme, "://", opts.hostname]
158
    if opts.url_port:
159
        url.append(":", opts.url_port)
160
    path = directory.replace(opts.root, "")
161
    if not path.startswith("/"):
162
        url.append("/")
163
    url.append(path)
164
    url = "".join(url)
165
    opts.url = url
166
167
    return opts, directory
168
169
170
if __name__ == "__main__":
171
    opts, directory = parse_opts()
172
    main(directory, opts)