-
Notifications
You must be signed in to change notification settings - Fork 114
Expand file tree
/
Copy pathodt.php
More file actions
54 lines (48 loc) · 1.9 KB
/
odt.php
File metadata and controls
54 lines (48 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
<?php
/**
* Using QueryPath to parse an Open Document Type (ODT) text file.
*
* The ODT format is a standard way of representing word processing documents.
* It is based on a combination of technologies, one of which is XML. Document
* text is stored in a structural (semantic) XML document, while other information,
* such as style sheets, are stored in auxilliary files.
*
* This example shows how ODT files can be opened and manipulated using QueryPath.
*
* The file used here was generated by OpenOffice 3.x. Other word processors are
* also capable of generating ODT files.
*
*
* @author M Butcher <matt@aleph-null.tv>
* @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license.
*/
/** Include main QP library. */
require_once '../src/QueryPath/QueryPath.php';
// If you have the Zip lib combiled in:
//$file = 'zip://openoffice.odt#content';
// Example for systems w/o zip lib:
$file = 'tmp/content.xml';
$doc = qp($file);
print 'Contents:' . PHP_EOL;
// Show the "outline": all of the heading items:
foreach ($doc->find('text|h') as $header) {
$style = $header->attr('text:style-name');
$attr_parts = explode('_', $style);
$level = array_pop($attr_parts);
$out = str_repeat(' ', $level) . '- ' . $header->text();
print $out . PHP_EOL;
}
// This is a fairly sophisticated selector. It gets the first
// <text:list text:style-name="L1"> match and then gets the
// <text:p text:style-name="P1"> elements. That is the syntax for
// ODT lists. Not elegant....
$selector = 'text|list[text|style-name="L1"]:first text|p[text|style-name="P1"]';
print PHP_EOL . "Bullet List" . PHP_EOL;
foreach ($doc->top()->find($selector) as $bullet) {
print ' * ' . $bullet->text() . PHP_EOL;
}
print PHP_EOL . "Ordered List" . PHP_EOL;
$i = 0;
foreach ($doc->top()->find('text|list[text|style-name="L2"]:first text|p[text|style-name="P2"]') as $bullet) {
print ' ' . (++$i) . '. '. $bullet->text() . PHP_EOL;
}