| 0e89900 by Aurélien Bompard at 2010-04-17 |
1 |
#!/usr/bin/env python |
|
2 |
# vim: set fileencoding=utf-8 tabstop=4 shiftwidth=4 expandtab smartindent: |
|
3 |
|
|
4 |
u""" |
| 7581434 by Aurélien Bompard at 2011-04-03 |
5 |
|
|
6 |
Files to Feed |
|
7 |
------------- |
|
8 |
|
| 0e89900 by Aurélien Bompard at 2010-04-17 |
9 |
Create an ATOM feed from the files in a directory |
|
10 |
|
| 7581434 by Aurélien Bompard at 2011-04-03 |
11 |
.. :Authors: |
|
12 |
Aurélien Bompard <aurelien@bompard.org> <http://aurelien.bompard.org> |
| 0e89900 by Aurélien Bompard at 2010-04-17 |
13 |
|
| 7581434 by Aurélien Bompard at 2011-04-03 |
14 |
.. :License: |
|
15 |
GNU GPL v3 or later |
| 0e89900 by Aurélien Bompard at 2010-04-17 |
16 |
""" |
|
17 |
|
|
18 |
import os |
|
19 |
import sys |
|
20 |
import socket |
|
21 |
import datetime |
|
22 |
import hashlib |
|
23 |
import cgi |
|
24 |
import urllib |
|
25 |
import optparse |
|
26 |
|
|
27 |
|
|
28 |
def main(directory, options): |
|
29 |
feed = open(options.output, "w") |
|
30 |
|
|
31 |
feed.write("""<?xml version="1.0" encoding="utf-8" ?> |
|
32 |
<feed xmlns="http://www.w3.org/2005/Atom"> |
|
33 |
<title>%(title)s</title> |
|
34 |
<link rel="self" href="%(address)s/%(feed_name)s" /> |
|
35 |
<updated>%(date)s</updated> |
|
36 |
<id>%(address)s</id> |
| d755245 by Aurélien Bompard at 2010-07-17 |
37 |
""" % { "feed_name": os.path.basename(options.output), |
|
38 |
"title": options.title, |
| 0e89900 by Aurélien Bompard at 2010-04-17 |
39 |
"address": options.url, |
|
40 |
"date": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")}) |
|
41 |
|
|
42 |
files_raw = [] |
|
43 |
for root, dirs, cur_files in os.walk(directory): |
|
44 |
for subdir in dirs: |
|
45 |
# Exclude dirs |
|
46 |
if subdir in options.exclude or subdir.startswith("."): |
|
47 |
del dirs[dirs.index(subdir)] |
|
48 |
if options.debug: |
|
49 |
print "excluding", subdir |
|
50 |
continue |
|
51 |
# Recurse into symlinks to dirs (included in python >= 2.6) |
|
52 |
if os.path.islink(os.path.join(root,subdir)): |
|
53 |
for subroot, subsubdirs, subcur_files in \ |
|
54 |
os.walk(os.path.join(root,subdir)): |
|
55 |
for subsubdir in subsubdirs: |
|
56 |
# Exclude dirs |
|
57 |
if subsubdir in options.exclude \ |
|
58 |
or subsubdir.startswith("."): |
|
59 |
del subsubdirs[subsubdirs.index(subsubdir)] |
|
60 |
if options.debug: |
|
61 |
print "excluding", subsubdir |
|
62 |
continue |
|
63 |
for file in subcur_files: |
|
64 |
files_raw.append(os.path.join(subroot,file)) |
|
65 |
for file in cur_files: |
|
66 |
files_raw.append(os.path.join(root,file)) |
|
67 |
|
|
68 |
files = [] |
|
69 |
for file in files_raw: |
|
70 |
if os.path.basename(file) in options.exclude \ |
|
71 |
or os.path.basename(file).startswith("."): |
|
72 |
continue |
|
73 |
if os.path.basename(file) == os.path.basename(sys.argv[0]): |
|
74 |
continue |
| 0665d75 by Aurélien Bompard at 2010-04-18 |
75 |
if os.path.basename(file) == os.path.basename(options.output): |
| 0e89900 by Aurélien Bompard at 2010-04-17 |
76 |
continue |
|
77 |
try: |
|
78 |
date = os.path.getmtime(file) |
|
79 |
size = os.path.getsize(file) |
|
80 |
files.append( (file.replace(directory,""), date, size) ) |
|
81 |
except OSError: |
|
82 |
print "WARNING: can't access file %s" % file |
|
83 |
|
|
84 |
files.sort(cmp=lambda x,y: cmp(x[1],y[1]), reverse=True) |
|
85 |
|
|
86 |
for file in files[:options.max]: |
|
87 |
id = hashlib.md5() |
|
88 |
id.update(file[0]) |
|
89 |
id = id.hexdigest() |
|
90 |
feed.write(""" <entry> |
|
91 |
<title>%(basename)s</title> |
|
92 |
<updated>%(mtime)s</updated> |
|
93 |
<link href=\"%(address)s%(filename)s\" /> |
|
94 |
<summary type=\"html\"><p>%(basename)s (%(size)sKo)<br/>In: %(path)s</p></summary> |
|
95 |
<id>tag:%(host)s:%(md5)s</id> |
|
96 |
</entry> |
|
97 |
""" % { "basename": cgi.escape(os.path.basename(file[0])), |
|
98 |
"mtime": datetime.datetime.fromtimestamp(file[1]).strftime("%Y-%m-%dT%H:%M:%SZ"), |
|
99 |
"host": options.hostname, |
|
100 |
"filename": urllib.quote(file[0]), |
|
101 |
"path": cgi.escape(os.path.dirname(file[0])), |
|
102 |
"size": int(file[2]) / 1024, |
|
103 |
"year": datetime.date.today().year, |
|
104 |
"md5": id, |
|
105 |
"address": options.url, |
|
106 |
}) |
|
107 |
|
|
108 |
feed.write("</feed>\n") |
|
109 |
feed.close() |
|
110 |
|
|
111 |
|
|
112 |
def parse_opts(): |
|
113 |
usage = "usage: %prog [options] [directory]" |
|
114 |
parser = optparse.OptionParser(usage) |
|
115 |
parser.add_option("-o", "--output", dest="output", metavar="FILE", |
|
116 |
default="files.xml", |
|
117 |
help="write to this file (default: %default)") |
|
118 |
parser.add_option("-m", "--max", dest="max", default=50, type="int", |
|
119 |
help="the maximum number of files to include in the " |
|
120 |
"feed (default: %default)") |
|
121 |
parser.add_option("-t", "--title", dest="title", default="Files on %s", |
|
122 |
help="title for the feed. You can use %s to include the hostname.") |
|
123 |
parser.add_option("-x", "--exclude", dest="exclude", default="", |
|
124 |
metavar="DIR1,DIR2,...", help="a comma-sparated list " |
|
125 |
"of directories that should not be crawled") |
|
126 |
parser.add_option("-r", "--root", dest="root", help="the document root " |
|
127 |
"(where the directory starts to be network-accessible") |
|
128 |
parser.add_option("--hostname", dest="hostname", |
|
129 |
default=socket.gethostname(), |
|
130 |
help="the hostname to make links point to (default: %default)") |
|
131 |
parser.add_option("--url-scheme", dest="url_scheme", default="http", |
|
132 |
help="the URL method (default: %default)") |
|
133 |
parser.add_option("--url-port", dest="url_port", |
|
134 |
help="the URL port, if not default for the chosen scheme") |
|
135 |
parser.add_option("-d", "--debug", dest="debug", action="store_true", |
|
136 |
help="Debug mode") |
|
137 |
opts, args = parser.parse_args() |
|
138 |
|
|
139 |
if len(args) == 0: |
|
140 |
directory = os.getcwd() |
|
141 |
elif len(args) == 1: |
|
142 |
directory = args[0] |
|
143 |
else: |
|
144 |
parser.error("Only one feed can be given as argument") |
|
145 |
if not os.path.isdir(directory): |
|
146 |
parser.error("The directory '%s' does not exist" % directory) |
|
147 |
directory = os.path.abspath(directory) |
|
148 |
|
|
149 |
if opts.title.count("%"): |
|
150 |
opts.title = opts.title % opts.hostname |
|
151 |
|
|
152 |
opts.exclude = [ d.strip() for d in opts.exclude.split(",") ] |
|
153 |
|
|
154 |
if not opts.root: |
|
155 |
parser.error("You must provide a document root") |
|
156 |
|
|
157 |
url = [opts.url_scheme, "://", opts.hostname] |
|
158 |
if opts.url_port: |
|
159 |
url.append(":", opts.url_port) |
|
160 |
path = directory.replace(opts.root, "") |
|
161 |
if not path.startswith("/"): |
|
162 |
url.append("/") |
|
163 |
url.append(path) |
|
164 |
url = "".join(url) |
|
165 |
opts.url = url |
|
166 |
|
|
167 |
return opts, directory |
|
168 |
|
|
169 |
|
|
170 |
if __name__ == "__main__": |
|
171 |
opts, directory = parse_opts() |
|
172 |
main(directory, opts) |