note docbook-website depends
[freedombox-privoxy:freedombox-privoxy.git] / abp_import.py
1 #!/usr/bin/env python
2
3 """
4 Script to translate easyprivacy list into rules for privoxy.
5
6 James Vasile
7 """
8
9 import sys
10
11 easylist_url = "https://easylist-downloads.adblockplus.org/easylist.txt"
12 easyprivacy_url = "https://easylist-downloads.adblockplus.org/easyprivacy.txt"
13
14
15 def out(*args):
16     for a in args:
17         sys.stdout.write(a)
18     if len(args) >= 1 and args[0] != "":
19         print
20
21 def clean_pattern(pat):
22     return pat
23
24 def clean_rule(rule):
25     if rule.startswith("/") and rule.endswith("/"):
26         return clean_pattern(rule)
27     rule = (rule
28             .replace("?", r"\?")
29             .replace("^", "[/:=?&]")
30             .replace("||", "^")
31             )
32     if rule.startswith("|"):
33         rule = "^"+rule[1:]
34     if rule.endswith("|"):
35         rule = rule[:-1] + "$"
36
37     if rule:
38         return rule + "\n"
39
40 def ignore_opt(pat, opts, opt):
41     new_opts = [o for o in opts if o != opt]
42     if new_opts:
43         return translate(pat+"$"+','.join(new_opts))
44     else:
45         return translate(pat)
46     return ""
47
48 def translate(line):
49     if line.startswith("!"):
50         return( "#%s\n" % line[1:])
51     elif line.startswith("@@"):
52         unblock.append(line[2:])
53     elif '$' in line:
54         pat, opts = line.split("$",2) 
55         opts = opts.split(',')
56         for opt in opts:
57             if opt in "third-party|~third-party|script|image":
58                 return ignore_opt(pat, opts, opt)
59
60         sys.stderr.write("Unhandled options: "+', '.join(opts) + "\n")
61     elif '##' in line:
62         filter.append(line)
63     else:
64         return(clean_rule(line))
65     return ""
66
67 def translate_all(easylist, infile):
68     str = ""
69     for line in easylist:
70         line = line.strip()
71         str += translate(line)
72
73
74     str += "{-block{%s}}\n" % infile
75     for line in unblock:    
76         str += translate(line)
77
78     return str
79
80 unblock = []
81 filter = [] # todo: convert ## commands into filters
82 def main():
83     if len(sys.argv) < 2:
84         print "Must specify filename of ad block plus rules file to process."
85         print "You can get those lists from:"
86         print easylist_url
87         print easyprivacy_url
88         sys.exit()
89     else:
90         infile = sys.argv[1]
91
92     with open(infile, 'r') as INF:
93         easylist = INF.readlines()
94
95     print "{+block{%s}}" % infile
96
97     easylist[0] = "! "+ easylist[0]
98     print translate_all(easylist, infile)
99
100 if __name__ == "__main__":
101     main()
102