1
# -*- coding: iso-8859-1 -*-
2
# Copyright (C) 2006 Libresoft
3
#
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; either version 2 of the License, or
7
# (at your option) any later version.
8
#
9
# This program is distributed in the hope that it will be useful,
10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
# GNU Library General Public License for more details.
13
#
14
# You should have received a copy of the GNU General Public License
15
# along with this program; if not, write to the Free Software
16
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17
#
18
# Authors : Gregorio Robles <grex@gsyc.escet.urjc.es>
19
20
"""
21
This modules contains configuration parameters regarding filetypes
22
(documentation, develompent, sound, images...)
23
24
25
@author:       Gregorio Robles
26
@organization: Grupo de Sistemas y Comunicaciones, Universidad Rey Juan Carlos
27
@copyright:    Universidad Rey Juan Carlos (Madrid, Spain)
28
@license:      GNU GPL version 2 or any later version
29
@contact:      grex@gsyc.escet.urjc.es
30
"""
31
32
import re
33
34
# Code files (headers and the like included)
35
# (most common languages first)
36
37
config_files_code = [
38
    re.compile('\.c$'), # C 
39
    re.compile('\.pc$'), # C
40
    re.compile('\.ec$'), # C
41
    re.compile('\.ecp$'), # C
42
    re.compile('\.C$'), # C++
43
    re.compile('\.cpp$'), # C++
44
    re.compile('\.c\+\+$'), # C++
45
    re.compile('\.cxx$'), # C++
46
    re.compile('\.cc$'), # C++
47
    re.compile('\.pcc$'), # C++
48
    re.compile('\.cpy$'), # C++
49
    re.compile('\.h$'), # C or C++ header
50
    re.compile('\.hh$'), # C++ header
51
    re.compile('\.hpp$'), # C++ header
52
    re.compile('\.hxx$'), # C++ header
53
    re.compile('\.sh$'), # Shell
54
    re.compile('\.pl$'), # Perl
55
    re.compile('\.pm$'), # Perl
56
    re.compile('\.pod$'), # Perl
57
    re.compile('\.perl$'), # Perl
58
    re.compile('\.cgi$'), # CGI
59
    re.compile('\.php$'), # PHP
60
    re.compile('\.php3$'), # PHP
61
    re.compile('\.php4$'), # PHP
62
    re.compile('\.inc$'), # PHP
63
    re.compile('\.py$'), # Python
64
    re.compile('\.java$'), # Java
65
    re.compile('\.class$'), # Java Class (or at least a class in some OOPL)
66
    re.compile('\.ada$'), # ADA
67
    re.compile('\.ads$'), # ADA
68
    re.compile('\.adb$'), # ADA
69
    re.compile('\.pad$'), # ADA
70
    re.compile('\.s$'), # Assembly
71
    re.compile('\.S$'), # Assembly
72
    re.compile('\.asm$'), # Assembly
73
    re.compile('\.awk$'), # awk
74
    re.compile('\.cs$'), # C#
75
    re.compile('\.csh$'), # CShell (including tcsh)
76
    re.compile('\.cob$'), # COBOL
77
    re.compile('\.cbl$'), # COBOL
78
    re.compile('\.COB$'), # COBOL
79
    re.compile('\.CBL$'), # COBOL
80
    re.compile('\.exp$'), # Expect
81
    re.compile('\.l$'), # (F)lex
82
    re.compile('\.ll$'), # (F)lex
83
    re.compile('\.lex$'), # (F)lex
84
    re.compile('\.f$'), # Fortran
85
    re.compile('\.f77$'), # Fortran
86
    re.compile('\.F$'), # Fortran
87
    re.compile('\.hs$'), # Haskell
88
    re.compile('\.lhs$'), # Not preprocessed Haskell
89
    re.compile('\.el$'), # LISP (including Scheme)
90
    re.compile('\.scm$'), # LISP (including Scheme)
91
    re.compile('\.lsp$'), # LISP (including Scheme)
92
    re.compile('\.jl$'), # LISP (including Scheme)
93
    re.compile('\.ml$'), # ML
94
    re.compile('\.ml3$'), # ML
95
    re.compile('\.m3$'), # Modula3
96
    re.compile('\.i3$'), # Modula3
97
    re.compile('\.m$'), # Objective-C
98
    re.compile('\.p$'), # Pascal
99
    re.compile('\.pas$'), # Pascal
100
    re.compile('\.rb$'), # Ruby
101
    re.compile('\.sed$'), # sed
102
    re.compile('\.tcl$'), # TCL
103
    re.compile('\.tk$'), # TCL
104
    re.compile('\.itk$'), # TCL
105
    re.compile('\.y$'), # Yacc
106
    re.compile('\.yy$'), # Yacc
107
    re.compile('\.idl$'), # CORBA IDL
108
    re.compile('\.gnorba$'), # GNOME CORBA IDL
109
    re.compile('\.oafinfo$'), # GNOME OAF
110
    re.compile('\.mcopclass$'), # MCOP IDL compiler generated class
111
    re.compile('\.autoforms$'), # Autoform
112
    re.compile('\.atf$'), # Autoform
113
    re.compile('\.gnuplot$'),
114
    re.compile('\.xs$'), # Shared library? Seen a lot of them in gnome-perl
115
    re.compile('\.js$'), # JavaScript (and who knows, maybe more)
116
    re.compile('\.patch$'),
117
    re.compile('\.diff$'), # Sometimes patches appear this way
118
    re.compile('\.ids$'), # Not really sure what this means
119
    re.compile('\.upd$'), # żżż??? (from Kcontrol)
120
    re.compile('$.ad$'),  # żżż??? (from Kdisplay and mc)
121
    re.compile('$.i$'), # Appears in the kbindings for Qt
122
    re.compile('$.pri$'), # from Qt
123
    re.compile('\.schema$'), # Not really sure what this means
124
    re.compile('\.fd$'), # Something to do with latex
125
    re.compile('\.cls$'), # Something to do with latex
126
    re.compile('\.pro$'), # Postscript generation
127
    re.compile('\.ppd$'), # PDF generation
128
    re.compile('\.dlg$'), # Not really sure what this means
129
    re.compile('\.plugin$'), # Plug-in file
130
    re.compile('\.dsp'), # Microsoft Developer Studio Project File
131
    re.compile('\.vim$'), # vim syntax file
132
    re.compile('\.trm$'), # gnuplot term file
133
    re.compile('\.font$'), # Font mapping
134
    re.compile('\.ccg$'), # C++ files - Found in gtkmm*
135
    re.compile('\.hg$'), # C++ headers - Found in gtkmm*
136
    re.compile('\.dtd'), # XML Document Type Definition
137
    re.compile('\.bat'), # DOS batch files
138
    re.compile('\.vala'), # Vala
139
    re.compile('\.py\.in$'),
140
    re.compile('\.rhtml$'), # eRuby
141
    re.compile('\.sql$') # SQL script
142
    ]
143
144
# Development documentation files (for hacking generally)
145
146
config_files_devel_doc = [
147
    re.compile('^readme.*$'),
148
    re.compile('^changelog.*'),
149
    re.compile('^todo.*$'),
150
    re.compile('^credits.*$'),
151
    re.compile('^authors.*$'),
152
    re.compile('^changes.*$'),
153
    re.compile('^news.*$'),
154
    re.compile('^install.*$'),  
155
    re.compile('^hacking.*$'),
156
    re.compile('^copyright.*$'),
157
    re.compile('^licen(s|c)e.*$'),
158
    re.compile('^copying.*$'),
159
    re.compile('manifest$'),
160
    re.compile('faq$'),
161
    re.compile('building$'),
162
    re.compile('howto$'),
163
    re.compile('design$'),
164
    re.compile('\.files$'),
165
    re.compile('files$'),
166
    re.compile('subdirs$'),
167
    re.compile('maintainers$'),
168
    re.compile('developers$'),
169
    re.compile('contributors$'),
170
    re.compile('thanks$'),
171
    re.compile('releasing$'),
172
    re.compile('test$'),
173
    re.compile('testing$'),
174
    re.compile('build$'),
175
    re.compile('comments?$'),
176
    re.compile('bugs$'),
177
    re.compile('buglist$'),
178
    re.compile('problems$'),
179
    re.compile('debug$'),
180
    re.compile('hacks$'),
181
    re.compile('hacking$'),
182
    re.compile('versions?$'),
183
    re.compile('mappings$'),
184
    re.compile('tips$'),
185
    re.compile('ideas?$'),
186
    re.compile('spec$'),
187
    re.compile('compiling$'),
188
    re.compile('notes$'),
189
    re.compile('missing$'),
190
    re.compile('done$'),
191
    re.compile('\.omf$'), # XML-based format used in GNOME
192
    re.compile('\.lsm$'),
193
    re.compile('^doxyfile$'),
194
    re.compile('\.kdevprj$'),
195
    re.compile('\.directory$'),
196
    re.compile('\.dox$'),
197
    re.compile('\.doap$')
198
    ]
199
200
# Building, compiling, configuration and CVS admin files
201
202
config_files_building = [
203
    re.compile('\.in.*$'),
204
    re.compile('configure.*$'),
205
    re.compile('makefile.*$'), 
206
    re.compile('config\.sub$'),
207
    re.compile('config\.guess$'),
208
    re.compile('config\.status$'),
209
    re.compile('ltmain\.sh$'),
210
    re.compile('autogen\.sh$'),
211
    re.compile('config$'),
212
    re.compile('conf$'),
213
    re.compile('cvsignore$'),
214
    re.compile('\.cfg$'), 
215
    re.compile('\.m4$'),
216
    re.compile('\.mk$'),
217
    re.compile('\.mak$'),
218
    re.compile('\.make$'),
219
    re.compile('\.mbx$'),
220
    re.compile('\.protocol$'),
221
    re.compile('\.version$'),
222
    re.compile('mkinstalldirs$'),
223
    re.compile('install-sh$'),
224
    re.compile('rules$'),
225
    re.compile('\.kdelnk$'),
226
    re.compile('\.menu$'),
227
    re.compile('linguas$'), # Build translations
228
    re.compile('potfiles.*$'), # Build translations
229
    re.compile('\.shlibs$'), # Shared libraries
230
#    re.compile('%debian%'),
231
#    re.compile('%specs/%'),
232
    re.compile('\.spec$'), # It seems they're necessary for RPM building
233
    re.compile('\.def$') # build bootstrap for DLLs on win32
234
    ]
235
236
237
238
# Documentation files
239
240
config_files_documentation = [
241
#   'doc/%'),
242
#    re.compile('%HOWTO%'),
243
    re.compile('\.html$'),
244
    re.compile('\.txt$'),
245
    re.compile('\.ps(\.gz|\.bz2)?$'),
246
    re.compile('\.dvi(\.gz|\.bz2)?$'),
247
    re.compile('\.lyx$'),
248
    re.compile('\.tex$'),
249
    re.compile('\.texi$'),
250
    re.compile('\.pdf(\.gz|\.bz2)?$'),
251
    re.compile('\.djvu$'),
252
    re.compile('\.epub$'),
253
    re.compile('\.sgml$'),
254
    re.compile('\.docbook$'),    
255
    re.compile('\.wml$'),
256
    re.compile('\.xhtml$'),
257
    re.compile('\.phtml$'),
258
    re.compile('\.shtml$'),
259
    re.compile('\.htm$'),
260
    re.compile('\.rdf$'),
261
    re.compile('\.phtm$'),
262
    re.compile('\.tmpl$'),
263
    re.compile('\.ref$'), # References
264
    re.compile('\.css$'),
265
#    re.compile('%tutorial%'),
266
    re.compile('\.templates$'),
267
    re.compile('\.dsl$'),
268
    re.compile('\.ent$'),
269
    re.compile('\.xml$'),
270
    re.compile('\.xmi$'),
271
    re.compile('\.xsl$'),
272
    re.compile('\.entities$'),
273
    re.compile('\.[1-7]$'), # Man pages
274
    re.compile('\.man$'),
275
    re.compile('\.manpages$'),
276
    re.compile('\.doc$'),
277
    re.compile('\.rtf$'),
278
    re.compile('\.wpd$'),
279
    re.compile('\.qt3$'),
280
    re.compile('man\d?/.*\.\d$'),
281
    re.compile('\.docs$'),
282
    re.compile('\.sdw$'), # OpenOffice.org Writer document
283
    re.compile('\.odt$'), # OpenOffice.org document
284
    re.compile('\.en$'), # Files in English language
285
    re.compile('\.de$'), # Files in German
286
    re.compile('\.es$'), # Files in Spanish
287
    re.compile('\.fr$'), # Files in French
288
    re.compile('\.it$'), # Files in Italian
289
    re.compile('\.cz$') # Files in Czech
290
    ]
291
292
# Images
293
294
config_files_images = [
295
    re.compile('\.png$'),
296
    re.compile('\.jpg$'),
297
    re.compile('\.jpeg$'),
298
    re.compile('\.bmp$'),
299
    re.compile('\.gif$'),
300
    re.compile('\.xbm$'), 
301
    re.compile('\.eps$'), 
302
    re.compile('\.mng$'),
303
    re.compile('\.pnm$'),
304
    re.compile('\.pbm$'),
305
    re.compile('\.ppm$'),
306
    re.compile('\.pgm$'),
307
    re.compile('\.gbr$'),
308
    re.compile('\.svg$'),
309
    re.compile('\.fig$'),
310
    re.compile('\.tif$'),
311
    re.compile('\.swf$'),
312
    re.compile('\.svgz$'),
313
    re.compile('\.shape$'), # XML files used for shapes for instance in Kivio
314
    re.compile('\.sml$'), # XML files used for shapes for instance in Kivio
315
    re.compile('\.bdf$'), #  vfontcap  - Vector Font Capability Database (VFlib Version 2)
316
    re.compile('\.ico$'),
317
    re.compile('\.dia$') # We consider .dia as images, I don't want them in unknown
318
    ]
319
320
# Translation files
321
322
config_files_translation = [
323
    re.compile('\.po$'),
324
    re.compile('\.pot$'),
325
    re.compile('\.charset$'),
326
    re.compile('\.mo$')
327
    ]
328
329
# User interface files
330
331
config_files_ui = [
332
    re.compile('\.desktop$'),
333
    re.compile('\.ui$'),
334
    re.compile('\.xpm$'),
335
    re.compile('\.xcf$'),
336
    re.compile('\.3ds$'),
337
    re.compile('\.theme$'),
338
    re.compile('\.kimap$'),
339
    re.compile('\.glade$'),
340
    re.compile('\.gtkbuilder$'),
341
    re.compile('rc$')
342
    ]
343
344
# Sound files
345
346
config_files_sound = [
347
    re.compile('\.mp3$'),
348
    re.compile('\.ogg$'),
349
    re.compile('\.wav$'),
350
    re.compile('\.au$'),
351
    re.compile('\.mid$'),
352
    re.compile('\.vorbis$'),
353
    re.compile('\.midi$'),
354
    re.compile('\.arts$')
355
    ]
356
357
# Packages (yes, there are people who upload packages to the repo)
358
359
config_files_packages = [
360
    re.compile('\.tar$'),
361
    re.compile('\.tar.gz$'),
362
    re.compile('\.tar.bz2$'),
363
    re.compile('\.tgz$'),
364
    re.compile('\.deb$'),
365
    re.compile('\.rpm$'),
366
    re.compile('\.srpm$'),
367
    re.compile('\.ebuild$')
368
    ]
369
370
# The list should keep this order
371
# ie. we want ltmain.sh -> build instead of code
372
config_files = [
373
    ('image'         , config_files_images),
374
    ('i18n'          , config_files_translation),
375
    ('ui'            , config_files_ui),
376
    ('multimedia'    , config_files_sound),
377
    ('package'       , config_files_packages),
378
    ('build'         , config_files_building),
379
    ('code'          , config_files_code),
380
    ('documentation' , config_files_documentation),
381
    ('devel-doc'     , config_files_devel_doc)
382
    ]
383
384
def guess_file_type (filename):
385
    for type, patt_list in config_files:
386
        for patt in patt_list:
387
            if patt.search (filename.lower ()):
388
                return type
389
            
390
    return 'unknown'
391
392
if __name__ == '__main__':
393
    import sys
394
    import os
395
396
    path = sys.argv[1]
397
    if os.path.isdir (path):
398
        for root, dirs, files in os.walk (path):
399
            for skip in ('.svn', 'CVS', '.git'):
400
                if skip in dirs:
401
                    dirs.remove (skip)
402
403
            for file in files:
404
                print "%s: %s" % (os.path.join (root, file), guess_file_type (file))
405
    else:
406
        print guess_file_type (path)