1
#!/usr/bin/env php
2
<?php
3
/*
4
 * StatusNet - the distributed open-source microblogging tool
5
 * Copyright (C) 2008, 2009, StatusNet, Inc.
6
 *
7
 * This program is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Affero General Public License as published by
9
 * the Free Software Foundation, either version 3 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Affero General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Affero General Public License
18
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
22
23
$shortoptions = 'f:d:u:';
24
25
$helptext = <<<END_OF_SITEMAP_HELP
26
Script for creating sitemaps files per http://sitemaps.org/
27
28
    -f <indexfile>   Use <indexfile> as output file
29
    -d <outputdir>   Use <outputdir> for new sitemaps
30
    -u <outputurl>   Use <outputurl> as root for URLs
31
32
END_OF_SITEMAP_HELP;
33
34
require_once INSTALLDIR . '/scripts/commandline.inc';
35
36
$output_paths = parse_args();
37
38
standard_map();
39
notices_map();
40
user_map();
41
index_map();
42
43
// ------------------------------------------------------------------------------
44
// Main functions: get data out and turn them into sitemaps
45
// ------------------------------------------------------------------------------
46
47
// Generate index sitemap of all other sitemaps.
48
function index_map()
49
{
50
    global $output_paths;
51
    $output_dir = $output_paths['output_dir'];
52
    $output_url = $output_paths['output_url'];
53
54
    foreach (glob("$output_dir*.xml") as $file_name) {
55
56
        // Just the file name please.
57
        $file_name = preg_replace("|$output_dir|", '', $file_name);
58
59
        $index_urls .= sitemap(
60
                           array(
61
                                 'url' => $output_url . $file_name,
62
                                 'changefreq' => 'daily'
63
                                 )
64
                           );
65
    }
66
67
    write_file($output_paths['index_file'], sitemapindex($index_urls));
68
}
69
70
// Generate sitemap of standard site elements.
71
function standard_map()
72
{
73
    global $output_paths;
74
75
    $standard_map_urls .= url(
76
                              array(
77
                                    'url' => common_local_url('public'),
78
                                    'changefreq' => 'daily',
79
                                    'priority' => '1',
80
                                    )
81
                              );
82
83
    $standard_map_urls .= url(
84
                              array(
85
                                    'url' => common_local_url('publicrss'),
86
                                    'changefreq' => 'daily',
87
                                    'priority' => '0.3',
88
                                    )
89
                              );
90
91
    $docs = array('about', 'faq', 'contact', 'im', 'openid', 'openmublog',
92
        'privacy', 'source', 'badge');
93
94
    foreach($docs as $title) {
95
        $standard_map_urls .= url(
96
                                  array(
97
                                        'url' => common_local_url('doc', array('title' => $title)),
98
                                        'changefreq' => 'monthly',
99
                                        'priority'   => '0.2',
100
                                        )
101
                                  );
102
    }
103
104
    $urlset_path = $output_paths['output_dir'] . 'standard.xml';
105
106
    write_file($urlset_path, urlset($standard_map_urls));
107
}
108
109
// Generate sitemaps of all notices.
110
function notices_map()
111
{
112
    global $output_paths;
113
114
    $notices = DB_DataObject::factory('notice');
115
116
    $notices->query('SELECT id, uri, url, modified FROM notice where is_local = 1');
117
118
    $notice_count = 0;
119
    $map_count = 1;
120
121
    while ($notices->fetch()) {
122
123
        // Maximum 50,000 URLs per sitemap file.
124
        if ($notice_count == 50000) {
125
            $notice_count = 0;
126
            $map_count++;
127
        }
128
129
        // remote notices have an URL
130
131
        if (!$notices->url && $notices->uri) {
132
            $notice = array(
133
                        'url'        => ($notices->uri) ? $notices->uri : common_local_url('shownotice', array('notice' => $notices->id)),
134
                        'lastmod'    => common_date_w3dtf($notices->modified),
135
                        'changefreq' => 'never',
136
                        'priority'   => '1',
137
                        );
138
139
            $notice_list[$map_count] .= url($notice);
140
            $notice_count++;
141
        }
142
    }
143
144
    // Make full sitemaps from the lists and save them.
145
    array_to_map($notice_list, 'notice');
146
}
147
148
// Generate sitemaps of all users.
149
function user_map()
150
{
151
    global $output_paths;
152
153
    $users = DB_DataObject::factory('user');
154
155
    $users->query('SELECT id, nickname FROM user');
156
157
    $user_count = 0;
158
    $map_count = 1;
159
160
    while ($users->fetch()) {
161
162
        // Maximum 50,000 URLs per sitemap file.
163
        if ($user_count == 50000) {
164
            $user_count = 0;
165
            $map_count++;
166
        }
167
168
        $user_args = array('nickname' => $users->nickname);
169
170
        // Define parameters for generating <url></url> elements.
171
        $user = array(
172
                      'url'        => common_local_url('showstream', $user_args),
173
                      'changefreq' => 'daily',
174
                      'priority'   => '1',
175
                      );
176
177
        $user_rss = array(
178
                          'url'        => common_local_url('userrss', $user_args),
179
                          'changefreq' => 'daily',
180
                          'priority'   => '0.3',
181
                          );
182
183
        $all = array(
184
                     'url'        => common_local_url('all', $user_args),
185
                     'changefreq' => 'daily',
186
                     'priority'   => '1',
187
                     );
188
189
        $all_rss = array(
190
                         'url'        => common_local_url('allrss', $user_args),
191
                         'changefreq' => 'daily',
192
                         'priority'   => '0.3',
193
                         );
194
195
        $replies = array(
196
                         'url'        => common_local_url('replies', $user_args),
197
                         'changefreq' => 'daily',
198
                         'priority'   => '1',
199
                         );
200
201
        $replies_rss = array(
202
                             'url'        => common_local_url('repliesrss', $user_args),
203
                             'changefreq' => 'daily',
204
                             'priority'   => '0.3',
205
                             );
206
207
        $foaf = array(
208
                      'url'        => common_local_url('foaf', $user_args),
209
                      'changefreq' => 'weekly',
210
                      'priority'   => '0.5',
211
                      );
212
213
        // Construct a <url></url> element for each user facet and add it
214
        // to our existing list of those.
215
        $user_list[$map_count]        .= url($user);
216
        $user_rss_list[$map_count]    .= url($user_rss);
217
        $all_list[$map_count]         .= url($all);
218
        $all_rss_list[$map_count]     .= url($all_rss);
219
        $replies_list[$map_count]     .= url($replies);
220
        $replies_rss_list[$map_count] .= url($replies_rss);
221
        $foaf_list[$map_count]        .= url($foaf);
222
223
        $user_count++;
224
    }
225
226
    // Make full sitemaps from the lists and save them.
227
    // Possible factoring: put all the lists into a master array, thus allowing
228
    // calling with single argument (i.e., array_to_map('user')).
229
    array_to_map($user_list, 'user');
230
    array_to_map($user_rss_list, 'user_rss');
231
    array_to_map($all_list, 'all');
232
    array_to_map($all_rss_list, 'all_rss');
233
    array_to_map($replies_list, 'replies');
234
    array_to_map($replies_rss_list, 'replies_rss');
235
    array_to_map($foaf_list, 'foaf');
236
}
237
238
// ------------------------------------------------------------------------------
239
// XML generation functions
240
// ------------------------------------------------------------------------------
241
242
// Generate a <url></url> element.
243
function url($url_args)
244
{
245
    $url        = preg_replace('/&/', '&amp;', $url_args['url']); // escape ampersands for XML
246
    $lastmod    = $url_args['lastmod'];
247
    $changefreq = $url_args['changefreq'];
248
    $priority   = $url_args['priority'];
249
250
    if (is_null($url)) {
251
        error("url() arguments require 'url' value.");
252
    }
253
254
    $url_out = "\t<url>\n";
255
    $url_out .= "\t\t<loc>$url</loc>\n";
256
257
    if ($changefreq) {
258
        $url_out .= "\t\t<changefreq>$changefreq</changefreq>\n";
259
    }
260
261
    if ($lastmod) {
262
        $url_out .= "\t\t<lastmod>$lastmod</lastmod>\n";
263
    }
264
265
    if ($priority) {
266
        $url_out .= "\t\t<priority>$priority</priority>\n";
267
    }
268
269
    $url_out .= "\t</url>\n";
270
271
    return $url_out;
272
}
273
274
function sitemap($sitemap_args)
275
{
276
    $url        = preg_replace('/&/', '&amp;', $sitemap_args['url']); // escape ampersands for XML
277
    $lastmod    = $sitemap_args['lastmod'];
278
279
    if (is_null($url)) {
280
        error("url() arguments require 'url' value.");
281
    }
282
283
    $sitemap_out = "\t<sitemap>\n";
284
    $sitemap_out .= "\t\t<loc>$url</loc>\n";
285
286
    if ($lastmod) {
287
        $sitemap_out .= "\t\t<lastmod>$lastmod</lastmod>\n";
288
    }
289
290
    $sitemap_out .= "\t</sitemap>\n";
291
292
    return $sitemap_out;
293
}
294
295
// Generate a <urlset></urlset> element.
296
function urlset($urlset_text)
297
{
298
    $urlset = '<?xml version="1.0" encoding="UTF-8"?>' . "\n" .
299
      '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n" .
300
      $urlset_text .
301
      '</urlset>';
302
303
    return $urlset;
304
}
305
306
// Generate a <urlset></urlset> element.
307
function sitemapindex($sitemapindex_text)
308
{
309
    $sitemapindex = '<?xml version="1.0" encoding="UTF-8"?>' . "\n" .
310
      '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n" .
311
      $sitemapindex_text .
312
      '</sitemapindex>';
313
314
    return $sitemapindex;
315
}
316
317
// Generate a sitemap from an array containing <url></url> elements and write it to a file.
318
function array_to_map($url_list, $filename_prefix)
319
{
320
    global $output_paths;
321
322
    if ($url_list) {
323
        // $map_urls is a long string containing concatenated <url></url> elements.
324
        while (list($map_idx, $map_urls) = each($url_list)) {
325
            $urlset_path = $output_paths['output_dir'] . "$filename_prefix-$map_idx.xml";
326
327
            write_file($urlset_path, urlset($map_urls));
328
        }
329
    }
330
}
331
332
// ------------------------------------------------------------------------------
333
// Internal functions
334
// ------------------------------------------------------------------------------
335
336
// Parse command line arguments.
337
function parse_args()
338
{
339
    $index_file = get_option_value('f');
340
    $output_dir = get_option_value('d');
341
    $output_url = get_option_value('u');
342
343
    if (file_exists($output_dir)) {
344
        if (is_writable($output_dir) === false) {
345
            error("$output_dir is not writable.");
346
        }
347
    }     else {
348
        error("output directory $output_dir does not exist.");
349
    }
350
351
    $paths = array(
352
                   'index_file' => $index_file,
353
                   'output_dir' => trailing_slash($output_dir),
354
                   'output_url' => trailing_slash($output_url),
355
                   );
356
357
    return $paths;
358
}
359
360
// Ensure paths end with a "/".
361
function trailing_slash($path)
362
{
363
    if (preg_match('/\/$/', $path) == 0) {
364
        $path .= '/';
365
    }
366
367
    return $path;
368
}
369
370
// Write data to disk.
371
function write_file($path, $data)
372
{
373
    if (is_null($path)) {
374
        error('No path specified for writing to.');
375
    }     elseif (is_null($data)) {
376
        error('No data specified for writing.');
377
    }
378
379
    if (($fh_out = fopen($path,'w')) === false) {
380
        error("couldn't open $path for writing.");
381
    }
382
383
    if (fwrite($fh_out, $data) === false) {
384
        error("couldn't write to $path.");
385
    }
386
}
387
388
// Display an error message and exit.
389
function error ($error_msg)
390
{
391
    if (is_null($error_msg)) {
392
        $error_msg = 'error() was called without any explanation!';
393
    }
394
395
    echo "Error: $error_msg\n";
396
    exit(1);
397
}
398
399
?>