1
#!/usr/bin/env python
2
# vim: set fileencoding=utf-8 tabstop=4 shiftwidth=4 expandtab smartindent:
3
4
u"""
5
6
Files to Feed
7
-------------
8
9
Create an ATOM feed from the files in a directory
10
11
.. :Authors:
12
      Aurélien Bompard <aurelien@bompard.org> <http://aurelien.bompard.org>
13
14
.. :License:
15
      GNU GPL v3 or later
16
"""
17
18
import os
19
import sys
20
import socket
21
import datetime
22
import hashlib
23
import cgi
24
import urllib
25
import optparse
26
27
28
def main(directory, options):
29
    feed = open(options.output, "w")
30
31
    feed.write("""<?xml version="1.0" encoding="utf-8" ?>
32
<feed xmlns="http://www.w3.org/2005/Atom">
33
    <title>%(title)s</title>
34
    <link rel="self" href="%(address)s/%(feed_name)s" />
35
    <updated>%(date)s</updated>
36
    <id>%(address)s</id>
37
""" % { "feed_name": os.path.basename(options.output),
38
        "title": options.title,
39
        "address": options.url,
40
        "date": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")})
41
42
    files_raw = []
43
    for root, dirs, cur_files in os.walk(directory):
44
        for subdir in dirs:
45
            # Exclude dirs
46
            if subdir in options.exclude or subdir.startswith("."):
47
                del dirs[dirs.index(subdir)]
48
                if options.debug:
49
                    print "excluding", subdir
50
                continue
51
            # Recurse into symlinks to dirs (included in python >= 2.6)
52
            if os.path.islink(os.path.join(root,subdir)):
53
                for subroot, subsubdirs, subcur_files in \
54
                        os.walk(os.path.join(root,subdir)):
55
                    for subsubdir in subsubdirs:
56
                        # Exclude dirs
57
                        if subsubdir in options.exclude \
58
                                or subsubdir.startswith("."):
59
                            del subsubdirs[subsubdirs.index(subsubdir)]
60
                            if options.debug:
61
                                print "excluding", subsubdir
62
                            continue
63
                    for file in subcur_files:
64
                        files_raw.append(os.path.join(subroot,file))
65
        for file in cur_files:
66
            files_raw.append(os.path.join(root,file))
67
68
    files = []
69
    for file in files_raw:
70
        if os.path.basename(file) in options.exclude \
71
                or os.path.basename(file).startswith("."):
72
            continue
73
        if os.path.basename(file) == os.path.basename(sys.argv[0]):
74
            continue
75
        if os.path.basename(file) == os.path.basename(options.output):
76
            continue
77
        try:
78
            date = os.path.getmtime(file)
79
            size = os.path.getsize(file)
80
            files.append( (file.replace(directory,""), date, size) )
81
        except OSError:
82
            print "WARNING: can't access file %s" % file
83
84
    files.sort(cmp=lambda x,y: cmp(x[1],y[1]), reverse=True)
85
86
    for file in files[:options.max]:
87
        id = hashlib.md5()
88
        id.update(file[0])
89
        id = id.hexdigest()
90
        feed.write("""    <entry>
91
        <title>%(basename)s</title>
92
        <updated>%(mtime)s</updated>
93
        <link href=\"%(address)s%(filename)s\" />
94
        <summary type=\"html\">&lt;p&gt;%(basename)s (%(size)sKo)&lt;br/&gt;In: %(path)s&lt;/p&gt;</summary>
95
        <id>tag:%(host)s:%(md5)s</id>
96
    </entry>
97
""" % { "basename": cgi.escape(os.path.basename(file[0])), 
98
        "mtime": datetime.datetime.fromtimestamp(file[1]).strftime("%Y-%m-%dT%H:%M:%SZ"),
99
        "host": options.hostname,
100
        "filename": urllib.quote(file[0]),
101
        "path": cgi.escape(os.path.dirname(file[0])),
102
        "size": int(file[2]) / 1024,
103
        "year": datetime.date.today().year,
104
        "md5": id,
105
        "address": options.url,
106
      })
107
108
    feed.write("</feed>\n")
109
    feed.close()
110
111
112
def parse_opts():
113
    usage = "usage: %prog [options] [directory]"
114
    parser = optparse.OptionParser(usage)
115
    parser.add_option("-o", "--output", dest="output", metavar="FILE",
116
                      default="files.xml",
117
                      help="write to this file (default: %default)")
118
    parser.add_option("-m", "--max", dest="max", default=50, type="int",
119
                      help="the maximum number of files to include in the "
120
                      "feed (default: %default)")
121
    parser.add_option("-t", "--title", dest="title", default="Files on %s",
122
          help="title for the feed. You can use %s to include the hostname.")
123
    parser.add_option("-x", "--exclude", dest="exclude", default="",
124
                      metavar="DIR1,DIR2,...", help="a comma-sparated list "
125
                      "of directories that should not be crawled")
126
    parser.add_option("-r", "--root", dest="root", help="the document root "
127
                      "(where the directory starts to be network-accessible")
128
    parser.add_option("--hostname", dest="hostname",
129
              default=socket.gethostname(),
130
              help="the hostname to make links point to (default: %default)")
131
    parser.add_option("--url-scheme", dest="url_scheme", default="http",
132
                      help="the URL method (default: %default)")
133
    parser.add_option("--url-port", dest="url_port",
134
                      help="the URL port, if not default for the chosen scheme")
135
    parser.add_option("-d", "--debug", dest="debug", action="store_true",
136
                      help="Debug mode")
137
    opts, args = parser.parse_args()
138
139
    if len(args) == 0:
140
        directory = os.getcwd()
141
    elif len(args) == 1:
142
        directory = args[0]
143
    else:
144
        parser.error("Only one feed can be given as argument")
145
    if not os.path.isdir(directory):
146
        parser.error("The directory '%s' does not exist" % directory)
147
    directory = os.path.abspath(directory)
148
149
    if opts.title.count("%"):
150
        opts.title = opts.title % opts.hostname
151
152
    opts.exclude = [ d.strip() for d in opts.exclude.split(",") ]
153
154
    if not opts.root:
155
        parser.error("You must provide a document root")
156
157
    url = [opts.url_scheme, "://", opts.hostname]
158
    if opts.url_port:
159
        url.append(":", opts.url_port)
160
    path = directory.replace(opts.root, "")
161
    if not path.startswith("/"):
162
        url.append("/")
163
    url.append(path)
164
    url = "".join(url)
165
    opts.url = url
166
167
    return opts, directory
168
169
170
if __name__ == "__main__":
171
    opts, directory = parse_opts()
172
    main(directory, opts)