| 1 |
#!/usr/bin/env python |
| 2 |
# vim: set fileencoding=utf-8 tabstop=4 shiftwidth=4 expandtab smartindent: |
| 3 |
|
| 4 |
u""" |
| 5 |
|
| 6 |
Files to Feed |
| 7 |
------------- |
| 8 |
|
| 9 |
Create an ATOM feed from the files in a directory |
| 10 |
|
| 11 |
.. :Authors: |
| 12 |
Aurélien Bompard <aurelien@bompard.org> <http://aurelien.bompard.org> |
| 13 |
|
| 14 |
.. :License: |
| 15 |
GNU GPL v3 or later |
| 16 |
""" |
| 17 |
|
| 18 |
import os |
| 19 |
import sys |
| 20 |
import socket |
| 21 |
import datetime |
| 22 |
import hashlib |
| 23 |
import cgi |
| 24 |
import urllib |
| 25 |
import optparse |
| 26 |
|
| 27 |
|
| 28 |
def main(directory, options): |
| 29 |
feed = open(options.output, "w") |
| 30 |
|
| 31 |
feed.write("""<?xml version="1.0" encoding="utf-8" ?> |
| 32 |
<feed xmlns="http://www.w3.org/2005/Atom"> |
| 33 |
<title>%(title)s</title> |
| 34 |
<link rel="self" href="%(address)s/%(feed_name)s" /> |
| 35 |
<updated>%(date)s</updated> |
| 36 |
<id>%(address)s</id> |
| 37 |
""" % { "feed_name": os.path.basename(options.output), |
| 38 |
"title": options.title, |
| 39 |
"address": options.url, |
| 40 |
"date": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")}) |
| 41 |
|
| 42 |
files_raw = [] |
| 43 |
for root, dirs, cur_files in os.walk(directory): |
| 44 |
for subdir in dirs: |
| 45 |
# Exclude dirs |
| 46 |
if subdir in options.exclude or subdir.startswith("."): |
| 47 |
del dirs[dirs.index(subdir)] |
| 48 |
if options.debug: |
| 49 |
print "excluding", subdir |
| 50 |
continue |
| 51 |
# Recurse into symlinks to dirs (included in python >= 2.6) |
| 52 |
if os.path.islink(os.path.join(root,subdir)): |
| 53 |
for subroot, subsubdirs, subcur_files in \ |
| 54 |
os.walk(os.path.join(root,subdir)): |
| 55 |
for subsubdir in subsubdirs: |
| 56 |
# Exclude dirs |
| 57 |
if subsubdir in options.exclude \ |
| 58 |
or subsubdir.startswith("."): |
| 59 |
del subsubdirs[subsubdirs.index(subsubdir)] |
| 60 |
if options.debug: |
| 61 |
print "excluding", subsubdir |
| 62 |
continue |
| 63 |
for file in subcur_files: |
| 64 |
files_raw.append(os.path.join(subroot,file)) |
| 65 |
for file in cur_files: |
| 66 |
files_raw.append(os.path.join(root,file)) |
| 67 |
|
| 68 |
files = [] |
| 69 |
for file in files_raw: |
| 70 |
if os.path.basename(file) in options.exclude \ |
| 71 |
or os.path.basename(file).startswith("."): |
| 72 |
continue |
| 73 |
if os.path.basename(file) == os.path.basename(sys.argv[0]): |
| 74 |
continue |
| 75 |
if os.path.basename(file) == os.path.basename(options.output): |
| 76 |
continue |
| 77 |
try: |
| 78 |
date = os.path.getmtime(file) |
| 79 |
size = os.path.getsize(file) |
| 80 |
files.append( (file.replace(directory,""), date, size) ) |
| 81 |
except OSError: |
| 82 |
print "WARNING: can't access file %s" % file |
| 83 |
|
| 84 |
files.sort(cmp=lambda x,y: cmp(x[1],y[1]), reverse=True) |
| 85 |
|
| 86 |
for file in files[:options.max]: |
| 87 |
id = hashlib.md5() |
| 88 |
id.update(file[0]) |
| 89 |
id = id.hexdigest() |
| 90 |
feed.write(""" <entry> |
| 91 |
<title>%(basename)s</title> |
| 92 |
<updated>%(mtime)s</updated> |
| 93 |
<link href=\"%(address)s%(filename)s\" /> |
| 94 |
<summary type=\"html\"><p>%(basename)s (%(size)sKo)<br/>In: %(path)s</p></summary> |
| 95 |
<id>tag:%(host)s:%(md5)s</id> |
| 96 |
</entry> |
| 97 |
""" % { "basename": cgi.escape(os.path.basename(file[0])), |
| 98 |
"mtime": datetime.datetime.fromtimestamp(file[1]).strftime("%Y-%m-%dT%H:%M:%SZ"), |
| 99 |
"host": options.hostname, |
| 100 |
"filename": urllib.quote(file[0]), |
| 101 |
"path": cgi.escape(os.path.dirname(file[0])), |
| 102 |
"size": int(file[2]) / 1024, |
| 103 |
"year": datetime.date.today().year, |
| 104 |
"md5": id, |
| 105 |
"address": options.url, |
| 106 |
}) |
| 107 |
|
| 108 |
feed.write("</feed>\n") |
| 109 |
feed.close() |
| 110 |
|
| 111 |
|
| 112 |
def parse_opts(): |
| 113 |
usage = "usage: %prog [options] [directory]" |
| 114 |
parser = optparse.OptionParser(usage) |
| 115 |
parser.add_option("-o", "--output", dest="output", metavar="FILE", |
| 116 |
default="files.xml", |
| 117 |
help="write to this file (default: %default)") |
| 118 |
parser.add_option("-m", "--max", dest="max", default=50, type="int", |
| 119 |
help="the maximum number of files to include in the " |
| 120 |
"feed (default: %default)") |
| 121 |
parser.add_option("-t", "--title", dest="title", default="Files on %s", |
| 122 |
help="title for the feed. You can use %s to include the hostname.") |
| 123 |
parser.add_option("-x", "--exclude", dest="exclude", default="", |
| 124 |
metavar="DIR1,DIR2,...", help="a comma-sparated list " |
| 125 |
"of directories that should not be crawled") |
| 126 |
parser.add_option("-r", "--root", dest="root", help="the document root " |
| 127 |
"(where the directory starts to be network-accessible") |
| 128 |
parser.add_option("--hostname", dest="hostname", |
| 129 |
default=socket.gethostname(), |
| 130 |
help="the hostname to make links point to (default: %default)") |
| 131 |
parser.add_option("--url-scheme", dest="url_scheme", default="http", |
| 132 |
help="the URL method (default: %default)") |
| 133 |
parser.add_option("--url-port", dest="url_port", |
| 134 |
help="the URL port, if not default for the chosen scheme") |
| 135 |
parser.add_option("-d", "--debug", dest="debug", action="store_true", |
| 136 |
help="Debug mode") |
| 137 |
opts, args = parser.parse_args() |
| 138 |
|
| 139 |
if len(args) == 0: |
| 140 |
directory = os.getcwd() |
| 141 |
elif len(args) == 1: |
| 142 |
directory = args[0] |
| 143 |
else: |
| 144 |
parser.error("Only one feed can be given as argument") |
| 145 |
if not os.path.isdir(directory): |
| 146 |
parser.error("The directory '%s' does not exist" % directory) |
| 147 |
directory = os.path.abspath(directory) |
| 148 |
|
| 149 |
if opts.title.count("%"): |
| 150 |
opts.title = opts.title % opts.hostname |
| 151 |
|
| 152 |
opts.exclude = [ d.strip() for d in opts.exclude.split(",") ] |
| 153 |
|
| 154 |
if not opts.root: |
| 155 |
parser.error("You must provide a document root") |
| 156 |
|
| 157 |
url = [opts.url_scheme, "://", opts.hostname] |
| 158 |
if opts.url_port: |
| 159 |
url.append(":", opts.url_port) |
| 160 |
path = directory.replace(opts.root, "") |
| 161 |
if not path.startswith("/"): |
| 162 |
url.append("/") |
| 163 |
url.append(path) |
| 164 |
url = "".join(url) |
| 165 |
opts.url = url |
| 166 |
|
| 167 |
return opts, directory |
| 168 |
|
| 169 |
|
| 170 |
if __name__ == "__main__": |
| 171 |
opts, directory = parse_opts() |
| 172 |
main(directory, opts) |