| 1 |
#!/usr/bin/php |
| 2 |
<?php |
| 3 |
# vim: set expandtab tabstop=4 shiftwidth=4: |
| 4 |
/** |
| 5 |
* xhtml2odt - XHTML to ODT XML transformation |
| 6 |
* |
| 7 |
* This script can convert a wiki page to the OpenDocument Text (ODT) format, |
| 8 |
* standardized as ISO/IEC 26300:2006, and the native format of office suites |
| 9 |
* such as OpenOffice.org, KOffice, and others. |
| 10 |
* |
| 11 |
* It uses a template ODT file which will be filled with the converted |
| 12 |
* content of the exported Wiki page. |
| 13 |
* |
| 14 |
* Inspired by the work on {@link http://open.comsultia.com/docbook2odf/ |
| 15 |
* docbook2odt}, by Roman Fordinal |
| 16 |
* |
| 17 |
* @link http://xhtml2odt.org xhtml2odt project |
| 18 |
* @author Aurélien Bompard <aurelien@bompard.org> |
| 19 |
* @copyright Aurélien Bompard <aurelien@bompard.org> 2009-2010 |
| 20 |
* @license http://www.gnu.org/licenses/lgpl-2.1.html LGPLv2+ |
| 21 |
* @package xhtml2odt |
| 22 |
* |
| 23 |
* This program is free software; you can redistribute it and/or |
| 24 |
* modify it under the terms of the GNU Lesser General Public |
| 25 |
* License as published by the Free Software Foundation; either |
| 26 |
* version 2.1 of the License, or (at your option) any later version. |
| 27 |
* |
| 28 |
* This program is distributed in the hope that it will be useful, |
| 29 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 31 |
* Library General Public License for more details. |
| 32 |
* |
| 33 |
*/ |
| 34 |
|
| 35 |
|
| 36 |
/** |
| 37 |
* Conversion failure |
| 38 |
* @package xhtml2odt |
| 39 |
*/ |
| 40 |
class ODTException extends Exception {} |
| 41 |
|
| 42 |
|
| 43 |
/** |
| 44 |
* Handling of an ODT file based on a template (another ODT file) |
| 45 |
* |
| 46 |
* The template ODT file is given to the constructor. Then, you must: |
| 47 |
* - set the XSLT parameters, |
| 48 |
* - call the {@link compile} method, |
| 49 |
* - use either the {@link saveToFile} method or the {@link |
| 50 |
* exportAsAttachedFile} method, depending on whether you want to save the |
| 51 |
* file on disk or to push the result to the browser. |
| 52 |
* @package xhtml2odt |
| 53 |
*/ |
| 54 |
class ODTFile { |
| 55 |
protected $odtfile; |
| 56 |
protected $odtfilepath; |
| 57 |
protected $tmpfiles = array(); |
| 58 |
protected $contentXml; |
| 59 |
protected $stylesXml; |
| 60 |
protected $autostyles = array(); |
| 61 |
protected $styles = array(); |
| 62 |
protected $fonts = array(); |
| 63 |
protected $images = array(); |
| 64 |
public $template; |
| 65 |
public $xslparams = array(); |
| 66 |
public $get_remote_images = true; |
| 67 |
const PIXEL_TO_CM = 0.026458333; |
| 68 |
|
| 69 |
/** |
| 70 |
* Constructor |
| 71 |
* |
| 72 |
* @param string $template the path to the template ODT file |
| 73 |
*/ |
| 74 |
public function __construct($template) { |
| 75 |
$this->template = $template; |
| 76 |
if (! class_exists('ZipArchive')) { |
| 77 |
throw new ODTException('Zip extension not loaded - check your php |
| 78 |
settings, PHP 5.2 minimum with zip and XSL extensions is |
| 79 |
required.'); ; |
| 80 |
} |
| 81 |
if (! class_exists('XSLTProcessor')) { |
| 82 |
throw new ODTException('XSL extension not loaded - check your php |
| 83 |
settings, PHP 5.2 minimum with zip and XSL extensions is |
| 84 |
required.'); ; |
| 85 |
} |
| 86 |
// Loading content.xml and styles.xml from the template |
| 87 |
$this->odtfile = new ZipArchive(); |
| 88 |
if ($this->odtfile->open($template) !== true) { |
| 89 |
throw new ODTException("Error while Opening the file '$template' - |
| 90 |
Check your odt file"); |
| 91 |
} |
| 92 |
if (($this->contentXml = $this->odtfile->getFromName('content.xml')) === false) { |
| 93 |
throw new ODTException("Nothing to parse - check that the |
| 94 |
content.xml file is correctly formed"); |
| 95 |
} |
| 96 |
if (($this->stylesXml = $this->odtfile->getFromName('styles.xml')) === false) { |
| 97 |
throw new ODTException("Nothing to parse - check that the |
| 98 |
styles.xml file is correctly formed"); |
| 99 |
} |
| 100 |
$this->odtfile->close(); |
| 101 |
// Use you app's cache directory here instead of null: |
| 102 |
$tmp = tempnam(null, md5(uniqid())); |
| 103 |
copy($template, $tmp); |
| 104 |
$this->odtfilepath = $tmp; |
| 105 |
} |
| 106 |
|
| 107 |
public function __destruct() { |
| 108 |
if (file_exists($this->odtfilepath)) { |
| 109 |
unlink($this->odtfilepath); |
| 110 |
} |
| 111 |
foreach ($this->tmpfiles as $tmp) { |
| 112 |
unlink($tmp); |
| 113 |
} |
| 114 |
} |
| 115 |
|
| 116 |
public function __toString() { |
| 117 |
return $this->contentXml; |
| 118 |
} |
| 119 |
|
| 120 |
/** |
| 121 |
* Main function which runs the other |
| 122 |
* |
| 123 |
* If your app has a templating engine, you may want to use the template |
| 124 |
* ODT file as one of you app's templates. You would then do the following |
| 125 |
* steps: |
| 126 |
* - run it here through your template engine, which would produce a mix |
| 127 |
* of ODT XML and XHTML. |
| 128 |
* - pass the result to the {@link xhtml2odt} method, which would only |
| 129 |
* convert the XHTML to ODT, and leave the ODT untouched |
| 130 |
* - the rest of the function is identical |
| 131 |
*/ |
| 132 |
public function compile() { |
| 133 |
//$html = YourAppsTemplatingEngine($this->template); |
| 134 |
// here we'll just use the global $html variable. |
| 135 |
global $html, $options; |
| 136 |
$odt = $this->xhtml2odt($html); |
| 137 |
$odt = str_replace('<'.'?xml version="1.0" encoding="utf-8"?'.'>', '', $odt); |
| 138 |
// You can do some debugging here if you want to. |
| 139 |
//print $html; |
| 140 |
//print $this->contentXml; |
| 141 |
//print $odt; |
| 142 |
//print "\n"; |
| 143 |
//exit(); |
| 144 |
// If you're using the ODT file as a template in a templating engine, |
| 145 |
// you can just set $this->contentXml to the output of xhtml2odt() |
| 146 |
// Here, we'll show how to replace a given string in the template, or |
| 147 |
// how to append text to the template. |
| 148 |
if ($options["r"] and |
| 149 |
strpos($this->contentXml, $options["r"]) !== false) { |
| 150 |
$this->contentXml = preg_replace( |
| 151 |
"/<text:p[^>]*>".$options["r"]."<\/text:p>/", |
| 152 |
$odt, $this->contentXml); |
| 153 |
} else { |
| 154 |
$this->contentXml = str_replace("</office:text>", |
| 155 |
"$odt</office:text>", $this->contentXml); |
| 156 |
} |
| 157 |
// Add the missing styles (used in content.xml but not defined in |
| 158 |
// styles.xml or automatic styles |
| 159 |
$this->addStyles(); |
| 160 |
} |
| 161 |
|
| 162 |
/** |
| 163 |
* Clean up the HTML we get in input |
| 164 |
* |
| 165 |
* Because the stylesheets will only accept well-formed (and if possible |
| 166 |
* valid) XHTML. |
| 167 |
* |
| 168 |
* If you have XHTML *and* ODT mixed up in input, because you used |
| 169 |
* the ODT file as a template in your templating engine, then you |
| 170 |
* *can't* run it through "tidy". Or else you'd have to use the |
| 171 |
* input-xml option, and it does strange things like removing the |
| 172 |
* white space after links. I didn't find a way around this. |
| 173 |
*/ |
| 174 |
public function cleanupInput($xhtml) { |
| 175 |
// add namespace if you used the ODT file as a template |
| 176 |
//$xhtml = str_replace("<office:document-content", '<office:document-content xmlns="http://www.w3.org/1999/xhtml"', $xhtml); |
| 177 |
|
| 178 |
/* Won't work if you have ODT XML *and* XHTML as input */ |
| 179 |
if (extension_loaded('tidy')) { |
| 180 |
$tidy_config = array( |
| 181 |
'output-xhtml' => true, |
| 182 |
'add-xml-decl' => false, |
| 183 |
'indent' => false, |
| 184 |
'tidy-mark' => false, |
| 185 |
//'input-encoding' => "latin1", |
| 186 |
'output-encoding' => "utf8", |
| 187 |
'doctype' => "auto", |
| 188 |
'wrap' => 0, |
| 189 |
'char-encoding' => "utf8", |
| 190 |
); |
| 191 |
$tidy = new tidy; |
| 192 |
$tidy->parseString($xhtml, $tidy_config, 'utf8'); |
| 193 |
$tidy->cleanRepair(); |
| 194 |
$xhtml = "$tidy"; |
| 195 |
} |
| 196 |
|
| 197 |
// replace html codes with unicode |
| 198 |
// http://www.mail-archive.com/analog-help@lists.meer.net/msg03670.html |
| 199 |
$xhtml = str_replace(" "," ",$xhtml); |
| 200 |
//$xhtml = html_entity_decode($xhtml, ENT_COMPAT, "UTF-8"); |
| 201 |
|
| 202 |
return $xhtml; |
| 203 |
} |
| 204 |
|
| 205 |
/** |
| 206 |
* Convert from XHTML to ODT using the stylesheets |
| 207 |
* |
| 208 |
* @param string $xhtml XHTML to convert |
| 209 |
* @return string resulting ODT XML |
| 210 |
*/ |
| 211 |
public function xhtml2odt($xhtml) { |
| 212 |
$xhtml = self::cleanupInput($xhtml); |
| 213 |
$xhtml = $this->handleImages($xhtml); |
| 214 |
// run the stylesheets |
| 215 |
$xsl = dirname(__FILE__)."/xsl"; |
| 216 |
$xmldoc = new DOMDocument(); |
| 217 |
$xmldoc->loadXML($xhtml); |
| 218 |
$xsldoc = new DOMDocument(); |
| 219 |
$xsldoc->load($xsl."/xhtml2odt.xsl"); |
| 220 |
$proc = new XSLTProcessor(); |
| 221 |
$proc->importStylesheet($xsldoc); |
| 222 |
foreach ($this->xslparams as $pkey=>$pval) { |
| 223 |
$proc->setParameter("", $pkey, $pval); |
| 224 |
} |
| 225 |
$output = $proc->transformToXML($xmldoc); |
| 226 |
if ($output === false) { |
| 227 |
throw new ODTException('XSLT transformation failed'); |
| 228 |
} |
| 229 |
return $output; |
| 230 |
} |
| 231 |
|
| 232 |
/** |
| 233 |
* Handle images. |
| 234 |
* |
| 235 |
* Download and include them when possible. Local and remote images are |
| 236 |
* handled differently. |
| 237 |
* |
| 238 |
* @param string $xhtml XHTML to look for images in |
| 239 |
* @return string XHTML with normalized img tags |
| 240 |
*/ |
| 241 |
protected function handleImages($xhtml) { |
| 242 |
global $options; |
| 243 |
// Turn false absolute URLs into relative ones. Useful for a webapp. |
| 244 |
$xhtml = preg_replace('#<img ([^>]*)src="http://'.$options["u"].'#', |
| 245 |
'<img \1src="', $xhtml); |
| 246 |
/* Since we're a command-line script, there is no notion of a "local |
| 247 |
image". Our handleLocalImg function will just convert the source |
| 248 |
to absolute URLs. See the top of the function for an example of |
| 249 |
what you could do in a webapp (2 lines !) |
| 250 |
*/ |
| 251 |
$xhtml = preg_replace_callback('#<img [^>]*src="([^"]+)"[^>]*>#', |
| 252 |
array($this,"handleLocalImg"), $xhtml); |
| 253 |
if ($this->get_remote_images) { |
| 254 |
$xhtml = preg_replace_callback( |
| 255 |
'#<img [^>]*src="(https?://[^"]+)"[^>]*#', |
| 256 |
array($this,"handleRemoteImg"), $xhtml); |
| 257 |
} |
| 258 |
return $xhtml; |
| 259 |
} |
| 260 |
|
| 261 |
/** |
| 262 |
* Handling of local images (on this server) |
| 263 |
* |
| 264 |
* Must be called as a regexp callback. Outsources all the hard work to |
| 265 |
* the {@link handleImg} method. |
| 266 |
* |
| 267 |
* This implementation downloads the files that come from the same domain |
| 268 |
* as the XHTML document cames from, but server-based export plugins can |
| 269 |
* just retrieve it from the local disk, using either the |
| 270 |
* <samp>DOCUMENT_ROOT</samp> or any appropriate method (depending on the |
| 271 |
* web application you're writing an export plugin for). |
| 272 |
* |
| 273 |
* @param array $matches regexp matches |
| 274 |
* @return string regexp replacement |
| 275 |
*/ |
| 276 |
protected function handleLocalImg($matches) { |
| 277 |
global $options; |
| 278 |
$src = $matches[1]; |
| 279 |
/* Example for a webapp: |
| 280 |
$file = $_SERVER["DOCUMENT_ROOT"].$src; |
| 281 |
return $this->handleImg($file, $matches); |
| 282 |
What follows is more complicated because we're a command-line script: |
| 283 |
- if the image is really local, include it |
| 284 |
- else, turn it into an absolute URL which will be downloaded later |
| 285 |
*/ |
| 286 |
if (strpos($src, "://") !== false and |
| 287 |
strpos($src, "file://") === false) { |
| 288 |
// This is an absolute link, don't touch it |
| 289 |
if (isset($options["v"])) { |
| 290 |
print "Local image: $src is an absolute link\n"; |
| 291 |
} |
| 292 |
return $matches[0]; |
| 293 |
} |
| 294 |
if (strpos($src, "file://") == 0) { |
| 295 |
$file = substr($src, 7); |
| 296 |
} elseif (strpos($src, "/") == 0) { |
| 297 |
$file = $src; |
| 298 |
} else { |
| 299 |
// relative link |
| 300 |
$file = dirname($options["i"])."/".$src; |
| 301 |
} |
| 302 |
if (realpath($file) !== false) { |
| 303 |
if (isset($options["v"])) { |
| 304 |
print "Local image: $src is actually local !\n"; |
| 305 |
} |
| 306 |
return $this->handleImg(realpath($file), $matches); |
| 307 |
} |
| 308 |
if (!$options["u"]) { |
| 309 |
// There's nothing we can do here |
| 310 |
if (isset($options["v"])) { |
| 311 |
print "Local image: $src not local, can't download\n"; |
| 312 |
} |
| 313 |
return $matches[0]; |
| 314 |
} |
| 315 |
if (function_exists("http_build_url")) { |
| 316 |
$newsrc = http_build_url($options["u"], $src); |
| 317 |
} else { |
| 318 |
$newsrc = $options["u"]."/".$src; |
| 319 |
} |
| 320 |
if (isset($options["v"])) print "Local image: $src -> $newsrc\n"; |
| 321 |
return str_replace($src, $newsrc, $matches[0]); |
| 322 |
} |
| 323 |
|
| 324 |
/* |
| 325 |
* Download remote images with cURL |
| 326 |
* |
| 327 |
* Must be called as a regexp callback. Outsources all the hard work to |
| 328 |
* the {@link handleImg} method. |
| 329 |
* |
| 330 |
* @param array $matches regexp matches |
| 331 |
* @return string regexp replacement |
| 332 |
*/ |
| 333 |
protected function handleRemoteImg($matches) { |
| 334 |
global $options; |
| 335 |
if (!function_exists("curl_init")) { |
| 336 |
return $matches[0]; // abort |
| 337 |
} |
| 338 |
$url = $matches[1]; |
| 339 |
if (isset($options["v"])) { |
| 340 |
print "Downloading image from: $url\n"; |
| 341 |
} |
| 342 |
// Use you app's cache directory here instead of null: |
| 343 |
$tempfilename = tempnam(null,"xhtml2odt-"); |
| 344 |
$this->tmpfiles []= $tempfilename; |
| 345 |
$tempfile = fopen($tempfilename,"w"); |
| 346 |
if ($tempfile === false) { |
| 347 |
return $matches[0]; |
| 348 |
} |
| 349 |
$ch = curl_init(); |
| 350 |
curl_setopt($ch, CURLOPT_URL, $url); |
| 351 |
curl_setopt($ch, CURLOPT_FILE, $tempfile); |
| 352 |
curl_setopt($ch, CURLOPT_BINARYTRANSFER, true); |
| 353 |
$result = curl_exec($ch); |
| 354 |
if ($result === false) { |
| 355 |
return $matches[0]; |
| 356 |
} |
| 357 |
curl_close($ch); |
| 358 |
fclose($tempfile); |
| 359 |
return $this->handleImg($tempfilename, $matches); |
| 360 |
} |
| 361 |
|
| 362 |
/** |
| 363 |
* Insertion of the image in the ODT file and the content.xml file |
| 364 |
* |
| 365 |
* @param string $file the path to the image |
| 366 |
* @param array $matches regexp matches |
| 367 |
* @return string regext replacement |
| 368 |
* @throws ODTException |
| 369 |
*/ |
| 370 |
protected function handleImg($file, $matches) { |
| 371 |
if (!is_readable($file)) { |
| 372 |
throw new ODTException("Image $file is not readable or does " |
| 373 |
."not exist"); |
| 374 |
} |
| 375 |
$width = 0; |
| 376 |
$height = 0; |
| 377 |
if (strpos($matches[0], 'width="') !== false |
| 378 |
and strpos($matches[0], 'height="') !== false) { |
| 379 |
// Size is specified in the HTML, keep it |
| 380 |
$width = preg_replace('/.*\s+width="(\d+)(px)?".*/', '\1', |
| 381 |
$matches[0]); |
| 382 |
$height = preg_replace('/.*\s+height="(\d+)(px)?".*/', '\1', |
| 383 |
$matches[0]); |
| 384 |
} |
| 385 |
// Remove any previous size specification |
| 386 |
$matches[0] = preg_replace('/\s+width="[^"]*"/', '', $matches[0]); |
| 387 |
$matches[0] = preg_replace('/\s+height="[^"]*"/', '', $matches[0]); |
| 388 |
if (!$width or !$height) { |
| 389 |
// Could not find or extract the wanted size, use the real size |
| 390 |
$size = @getimagesize($file); |
| 391 |
if ($size === false) { |
| 392 |
$size = array($this->xslparams["img_default_width"], |
| 393 |
$this->xslparams["img_default_height"]); |
| 394 |
} |
| 395 |
list ($width, $height) = $size; |
| 396 |
} |
| 397 |
$width *= self::PIXEL_TO_CM; |
| 398 |
$height *= self::PIXEL_TO_CM; |
| 399 |
$this->images[$file] = basename($file); |
| 400 |
// Remove existing sizes and replace them with the calculated size |
| 401 |
return str_replace($matches[1],"Pictures/".basename($file).'" width="'.$width.'cm" height="'.$height.'cm', $matches[0]); |
| 402 |
} |
| 403 |
|
| 404 |
/** |
| 405 |
* Inserts the generated ODT XML code into the content.xml and styles.xml |
| 406 |
* files |
| 407 |
*/ |
| 408 |
protected function _parse() { |
| 409 |
// automatic styles |
| 410 |
if ($this->autostyles) { |
| 411 |
$autostyles = implode("\n",$this->autostyles); |
| 412 |
if (strpos($this->contentXml, '<office:automatic-styles/>') !== false) { |
| 413 |
$this->contentXml = str_replace('<office:automatic-styles/>', |
| 414 |
'<office:automatic-styles>'.$autostyles.'</office:automatic-styles>', |
| 415 |
$this->contentXml); |
| 416 |
} else { |
| 417 |
$this->contentXml = str_replace('</office:automatic-styles>', |
| 418 |
$autostyles.'</office:automatic-styles>', $this->contentXml); |
| 419 |
} |
| 420 |
} |
| 421 |
// regular styles |
| 422 |
if ($this->styles) { |
| 423 |
$styles = implode("\n",$this->styles); |
| 424 |
$this->stylesXml = str_replace('</office:styles>', |
| 425 |
$styles.'</office:styles>', $this->stylesXml); |
| 426 |
} |
| 427 |
// fonts |
| 428 |
if ($this->fonts) { |
| 429 |
$fonts = implode("\n",$this->fonts); |
| 430 |
$this->contentXml = str_replace('</office:font-face-decls>', |
| 431 |
$fonts.'</office:font-face-decls>', $this->contentXml); |
| 432 |
} |
| 433 |
} |
| 434 |
|
| 435 |
/** |
| 436 |
* Internal save |
| 437 |
* |
| 438 |
* @throws ODTException |
| 439 |
*/ |
| 440 |
protected function _save() { |
| 441 |
$this->odtfile->open($this->odtfilepath, ZIPARCHIVE::CREATE); |
| 442 |
$this->_parse(); |
| 443 |
if (! $this->odtfile->addFromString('content.xml', $this->contentXml)) { |
| 444 |
throw new ODTException('Error during file export'); |
| 445 |
} |
| 446 |
if (! $this->odtfile->addFromString('styles.xml', $this->stylesXml)) { |
| 447 |
throw new ODTException('Error during file export'); |
| 448 |
} |
| 449 |
foreach ($this->images as $imageKey => $imageValue) { |
| 450 |
$this->odtfile->addFile($imageKey, 'Pictures/' . $imageValue); |
| 451 |
} |
| 452 |
$this->odtfile->close(); |
| 453 |
} |
| 454 |
|
| 455 |
/** |
| 456 |
* Exports the file as an HTTP attachment. |
| 457 |
* |
| 458 |
* If you're a web app, you'll probably want this. |
| 459 |
* |
| 460 |
* @param string $name name of the file to download (optional) |
| 461 |
* @throws ODTException |
| 462 |
*/ |
| 463 |
public function exportAsAttachedFile($name="") { |
| 464 |
$this->_save(); |
| 465 |
if (headers_sent($filename, $linenum)) { |
| 466 |
throw new ODTException("headers already sent ($filename at $linenum)"); |
| 467 |
} |
| 468 |
if( $name == "" ) { |
| 469 |
$name = md5(uniqid()) . ".odt"; |
| 470 |
} |
| 471 |
header('Content-type: application/vnd.oasis.opendocument.text'); |
| 472 |
header('Content-Disposition: attachment; filename="'.$name.'"'); |
| 473 |
readfile($this->odtfilepath); |
| 474 |
} |
| 475 |
|
| 476 |
/** |
| 477 |
* Saves the file to the disk |
| 478 |
* |
| 479 |
* Mainly useful for the command-line app, see {@link |
| 480 |
* exportAsAttachedFile} to have the browser download the file. |
| 481 |
* |
| 482 |
* @param string $name path to the file on the disk |
| 483 |
* @throws ODTException |
| 484 |
*/ |
| 485 |
public function saveToFile($name="") { |
| 486 |
$this->_save(); |
| 487 |
if( $name == "" ) { |
| 488 |
$name = md5(uniqid()) . ".odt"; |
| 489 |
} |
| 490 |
copy($this->odtfilepath, $name); |
| 491 |
} |
| 492 |
|
| 493 |
/** |
| 494 |
* Adds all missing styles and fonts in the document |
| 495 |
*/ |
| 496 |
protected function addStyles() { |
| 497 |
$xsl = dirname(__FILE__)."/xsl"; |
| 498 |
$contentxml = new DOMDocument(); |
| 499 |
$contentxml->loadXML($this->contentXml); |
| 500 |
$stylesxml = new DOMDocument(); |
| 501 |
$stylesxml->loadXML($this->stylesXml); |
| 502 |
$xsldoc = new DOMDocument(); |
| 503 |
$xsldoc->load($xsl."/styles.xsl"); |
| 504 |
$proc = new XSLTProcessor(); |
| 505 |
$proc->importStylesheet($xsldoc); |
| 506 |
$this->contentXml = $proc->transformToXML($contentxml); |
| 507 |
$this->stylesXml = $proc->transformToXML($stylesxml); |
| 508 |
if ($this->contentXml === false or $this->stylesXml === false) { |
| 509 |
throw new ODTException('Adding of styles failed'); |
| 510 |
} |
| 511 |
} |
| 512 |
|
| 513 |
} |
| 514 |
|
| 515 |
|
| 516 |
/** |
| 517 |
* Print a usage message and exit |
| 518 |
*/ |
| 519 |
function usage() { |
| 520 |
$message = sprintf("Usage: %s [options] -i input.html -o output.odt -t template.odt\n", $GLOBALS["argv"][0]); |
| 521 |
$message .= "Options: |
| 522 |
-u <URL> : the remote URL you downloaded the page from. This is required to include remote images. |
| 523 |
-r <KEYWORD> : a keyword in the template document to replace with the converted text. |
| 524 |
--top-header-level <LEVEL> : the maximum header level used in your HTML page (1 for <h1>, 2 for <h2> etc.). |
| 525 |
--img-default-width <SIZE> : the default width for images. |
| 526 |
--img-default-height <SIZE> : the default height for images. |
| 527 |
"; |
| 528 |
die($message); |
| 529 |
} |
| 530 |
|
| 531 |
/** |
| 532 |
* Parse the command line options |
| 533 |
*/ |
| 534 |
function parseOpts() { |
| 535 |
$shortopts = "i:o:t:u:r:vh"; |
| 536 |
$longopts = array( |
| 537 |
"help", |
| 538 |
"top-header-level:", |
| 539 |
"img-default-width:", |
| 540 |
"img-default-height:", |
| 541 |
); |
| 542 |
$options = getopt($shortopts, $longopts); |
| 543 |
if (array_key_exists("h", $options) or |
| 544 |
array_key_exists("help", $options)) { |
| 545 |
usage(); |
| 546 |
} |
| 547 |
foreach (array("i", "o", "t") as $reqopt) { |
| 548 |
if (!array_key_exists($reqopt, $options)) { |
| 549 |
print "Missing '-$reqopt' option.\n"; |
| 550 |
usage(); |
| 551 |
} |
| 552 |
} |
| 553 |
$input_url = @parse_url($options["i"]); |
| 554 |
if (isset($input_url["scheme"])) { |
| 555 |
$options["u"] = $options["i"]; |
| 556 |
} |
| 557 |
if (isset($options["u"])) { |
| 558 |
$input_url = @parse_url($options["u"]); |
| 559 |
$options["u"] = sprintf("%s://%s%s%s", |
| 560 |
$input_url["scheme"], $input_url["host"], |
| 561 |
isset($input_url["port"]) ? |
| 562 |
":".$input_url["port"] : "", |
| 563 |
isset($input_url["path"]) ? |
| 564 |
dirname($input_url["path"]) : ""); |
| 565 |
} else { |
| 566 |
print "Warning: you did not supply the '-u' option, " |
| 567 |
."the images in the page will not be included.\n"; |
| 568 |
$options["u"] = ""; |
| 569 |
} |
| 570 |
if (!extension_loaded('curl')) { |
| 571 |
print "Warning: you did not install the 'curl' PHP extension, " |
| 572 |
."the images in the page will not be included.\n"; |
| 573 |
} |
| 574 |
if (!extension_loaded('tidy')) { |
| 575 |
print "Warning: you should install the 'tidy' PHP extension to ensure " |
| 576 |
."a good conversion (or else your HTML must be valid already !)\n"; |
| 577 |
} |
| 578 |
if (isset($option["top-header-level"])) { |
| 579 |
$options["top-header-level"] = int($options["top-header-level"]); |
| 580 |
} else { |
| 581 |
$options["top-header-level"] = 1; |
| 582 |
} |
| 583 |
if (!isset($options["img-default-width"])) { |
| 584 |
$options["img-default-width"] = "8cm"; |
| 585 |
} |
| 586 |
if (!isset($options["img-default-height"])) { |
| 587 |
$options["img-default-height"] = "6cm"; |
| 588 |
} |
| 589 |
if (!isset($options["r"])) { |
| 590 |
$options["r"] = ""; |
| 591 |
} |
| 592 |
return $options; |
| 593 |
} |
| 594 |
|
| 595 |
/** |
| 596 |
* This function runs the whole conversion process: |
| 597 |
* - read command line options |
| 598 |
* - read the input file |
| 599 |
* - create the {@link ODTFile} instance and set the stylesheet parameters |
| 600 |
* - run the {@link ODTFile::compile()} method |
| 601 |
* - save the resulting file with the {@link ODTFile::saveToFile()} method |
| 602 |
*/ |
| 603 |
function main() { |
| 604 |
global $html, $options; |
| 605 |
|
| 606 |
$options = parseOpts(); |
| 607 |
|
| 608 |
$html = file_get_contents($options["i"]); |
| 609 |
|
| 610 |
$odf = new ODTFile($options["t"]); |
| 611 |
|
| 612 |
$odf->xslparams["url"] = $options["u"]; // this would be your app's URL |
| 613 |
// the following setting depends on how <h> tags are used in you app |
| 614 |
$odf->xslparams["heading_minus_level"] = $options["top-header-level"]; |
| 615 |
// set the following values from your config |
| 616 |
$odf->get_remote_images = ($options["u"] != ""); |
| 617 |
$odf->xslparams["img_default_width"] = $options["img-default-width"]; |
| 618 |
$odf->xslparams["img_default_height"] = $options["img-default-height"]; |
| 619 |
|
| 620 |
$odf->compile(); |
| 621 |
|
| 622 |
$odf->saveToFile($options["o"]); |
| 623 |
print "Wrote document to: ".$options["o"]."\n"; |
| 624 |
} |
| 625 |
|
| 626 |
main(); |
| 627 |
|
| 628 |
?> |