1
/* POLE - Portable C++ library to access OLE Storage
2
   Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
3
4
   Redistribution and use in source and binary forms, with or without
5
   modification, are permitted provided that the following conditions
6
   are met:
7
   * Redistributions of source code must retain the above copyright notice,
8
     this list of conditions and the following disclaimer.
9
   * Redistributions in binary form must reproduce the above copyright notice,
10
     this list of conditions and the following disclaimer in the documentation
11
     and/or other materials provided with the distribution.
12
   * Neither the name of the authors nor the names of its contributors may be
13
     used to endorse or promote products derived from this software without
14
     specific prior written permission.
15
16
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26
   THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
#include <fstream>
30
#include <iostream>
31
#include <list>
32
#include <string>
33
#include <vector>
34
35
#include "pole.h"
36
37
#include <string.h>
38
39
// enable to activate debugging output
40
// #define POLE_DEBUG
41
42
namespace POLE
43
{
44
45
class Header
46
{
47
public:
48
    unsigned char id[8];       // signature, or magic identifier
49
    unsigned b_shift;          // bbat->blockSize = 1 << b_shift
50
    unsigned s_shift;          // sbat->blockSize = 1 << s_shift
51
    unsigned num_bat;          // blocks allocated for big bat
52
    unsigned dirent_start;     // starting block for directory info
53
    unsigned threshold;        // switch from small to big file (usually 4K)
54
    unsigned sbat_start;       // starting block index to store small bat
55
    unsigned num_sbat;         // blocks allocated for small bat
56
    unsigned mbat_start;       // starting block to store meta bat
57
    unsigned num_mbat;         // blocks allocated for meta bat
58
    unsigned long bb_blocks[109];
59
60
    Header();
61
    bool valid();
62
    void load(const unsigned char* buffer);
63
    void save(unsigned char* buffer);
64
    void debug();
65
};
66
67
class AllocTable
68
{
69
public:
70
    static const unsigned Eof;
71
    static const unsigned Avail;
72
    static const unsigned Bat;
73
    static const unsigned MetaBat;
74
    unsigned blockSize;
75
    AllocTable();
76
    void clear();
77
    unsigned long count();
78
    void resize(unsigned long newsize);
79
    void preserve(unsigned long n);
80
    void set(unsigned long index, unsigned long val);
81
    unsigned unused();
82
    void setChain(std::vector<unsigned long>);
83
    std::vector<unsigned long> follow(unsigned long start);
84
    unsigned long operator[](unsigned long index);
85
    void load(const unsigned char* buffer, unsigned len);
86
    void save(unsigned char* buffer);
87
    unsigned size();
88
    void debug();
89
private:
90
    std::vector<unsigned long> data;
91
    AllocTable(const AllocTable&);
92
    AllocTable& operator=(const AllocTable&);
93
};
94
95
class DirEntry
96
{
97
public:
98
    bool valid;            // false if invalid (should be skipped)
99
    std::string name;      // the name, not in unicode anymore
100
    bool dir;              // true if directory
101
    unsigned long size;    // size (not valid if directory)
102
    unsigned long start;   // starting block
103
    unsigned prev;         // previous sibling
104
    unsigned next;         // next sibling
105
    unsigned child;        // first child
106
};
107
108
class DirTree
109
{
110
public:
111
    static const unsigned End;
112
    DirTree();
113
    void clear();
114
    unsigned entryCount();
115
    DirEntry* entry(unsigned index);
116
    DirEntry* entry(const std::string& name, bool create = false);
117
    int indexOf(DirEntry* e);
118
    int parent(unsigned index);
119
    std::string fullName(unsigned index);
120
    std::vector<unsigned> children(unsigned index);
121
    void load(unsigned char* buffer, unsigned len);
122
    void save(unsigned char* buffer);
123
    unsigned size();
124
    void debug();
125
private:
126
    std::vector<DirEntry> entries;
127
    DirTree(const DirTree&);
128
    DirTree& operator=(const DirTree&);
129
};
130
131
class StorageIO
132
{
133
public:
134
    Storage* storage;         // owner
135
    std::string filename;     // filename
136
    std::fstream file;        // associated with above name
137
    int result;               // result of operation
138
    bool opened;              // true if file is opened
139
    unsigned long filesize;   // size of the file
140
141
    Header* header;           // storage header
142
    DirTree* dirtree;         // directory tree
143
    AllocTable* bbat;         // allocation table for big blocks
144
    AllocTable* sbat;         // allocation table for small blocks
145
146
    std::vector<unsigned long> sb_blocks; // blocks for "small" files
147
148
    std::list<Stream*> streams;
149
150
    StorageIO(Storage* storage, const char* filename);
151
    ~StorageIO();
152
153
    bool open();
154
    void close();
155
    void flush();
156
    void load();
157
    void create();
158
159
    unsigned long loadBigBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen);
160
161
    unsigned long loadBigBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
162
163
    unsigned long loadSmallBlocks(std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen);
164
165
    unsigned long loadSmallBlock(unsigned long block, unsigned char* buffer, unsigned long maxlen);
166
167
    StreamIO* streamIO(const std::string& name);
168
169
private:
170
    // no copy or assign
171
    StorageIO(const StorageIO&);
172
    StorageIO& operator=(const StorageIO&);
173
174
};
175
176
class StreamIO
177
{
178
public:
179
    StorageIO* io;
180
    DirEntry* entry;
181
    std::string fullName;
182
    bool eof;
183
    bool fail;
184
185
    StreamIO(StorageIO* io, DirEntry* entry);
186
    ~StreamIO();
187
    unsigned long size();
188
    void seek(unsigned long pos);
189
    unsigned long tell();
190
    int getch();
191
    unsigned long read(unsigned char* data, unsigned long maxlen);
192
    unsigned long read(unsigned long pos, unsigned char* data, unsigned long maxlen);
193
194
195
private:
196
    std::vector<unsigned long> blocks;
197
198
    // no copy or assign
199
    StreamIO(const StreamIO&);
200
    StreamIO& operator=(const StreamIO&);
201
202
    // pointer for read
203
    unsigned long m_pos;
204
205
    // simple cache system to speed-up getch()
206
    unsigned char* cache_data;
207
    unsigned long cache_size;
208
    unsigned long cache_pos;
209
    void updateCache();
210
};
211
212
} // namespace POLE
213
214
using namespace POLE;
215
216
static inline unsigned long readU16(const unsigned char* ptr)
217
{
218
    return ptr[0] + (ptr[1] << 8);
219
}
220
221
static inline unsigned long readU32(const unsigned char* ptr)
222
{
223
    return ptr[0] + (ptr[1] << 8) + (ptr[2] << 16) + (ptr[3] << 24);
224
}
225
226
static inline void writeU16(unsigned char* ptr, unsigned long data)
227
{
228
    ptr[0] = (unsigned char)(data & 0xff);
229
    ptr[1] = (unsigned char)((data >> 8) & 0xff);
230
}
231
232
static inline void writeU32(unsigned char* ptr, unsigned long data)
233
{
234
    ptr[0] = (unsigned char)(data & 0xff);
235
    ptr[1] = (unsigned char)((data >> 8) & 0xff);
236
    ptr[2] = (unsigned char)((data >> 16) & 0xff);
237
    ptr[3] = (unsigned char)((data >> 24) & 0xff);
238
}
239
240
static const unsigned char pole_magic[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
241
242
// =========== Header ==========
243
244
Header::Header()
245
{
246
    b_shift = 9;
247
    s_shift = 6;
248
    num_bat = 0;
249
    dirent_start = 0;
250
    threshold = 4096;
251
    sbat_start = 0;
252
    num_sbat = 0;
253
    mbat_start = 0;
254
    num_mbat = 0;
255
256
    for (unsigned i = 0; i < 8; i++)
257
        id[i] = pole_magic[i];
258
    for (unsigned i = 0; i < 109; i++)
259
        bb_blocks[i] = AllocTable::Avail;
260
}
261
262
bool Header::valid()
263
{
264
    if (threshold != 4096) return false;
265
    if (num_bat == 0) return false;
266
    if ((num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
267
    if ((num_bat < 109) && (num_mbat != 0)) return false;
268
    if (s_shift > b_shift) return false;
269
    if (b_shift <= 6) return false;
270
    if (b_shift >= 31) return false;
271
272
    return true;
273
}
274
275
void Header::load(const unsigned char* buffer)
276
{
277
    b_shift      = readU16(buffer + 0x1e); // sector shift
278
    s_shift      = readU16(buffer + 0x20); // mini sector shift
279
    num_bat      = readU32(buffer + 0x2c); // number of fat sectors
280
    dirent_start = readU32(buffer + 0x30); // first directory sector location
281
    threshold    = readU32(buffer + 0x38); // transaction signature number
282
    sbat_start   = readU32(buffer + 0x3c); // mini stream cutoff size
283
    num_sbat     = readU32(buffer + 0x40); // first mini fat sector location
284
    mbat_start   = readU32(buffer + 0x44); // first mini difat sector location
285
    num_mbat     = readU32(buffer + 0x48); // number of difat sectors
286
287
    for (unsigned i = 0; i < 8; i++)
288
        id[i] = buffer[i];
289
    for (unsigned i = 0; i < 109; i++)
290
        bb_blocks[i] = readU32(buffer + 0x4C + i * 4);
291
}
292
293
void Header::save(unsigned char* buffer)
294
{
295
    memset(buffer, 0, 0x4c);
296
    memcpy(buffer, pole_magic, 8);          // ole signature
297
    writeU32(buffer + 8, 0);                // unknown
298
    writeU32(buffer + 12, 0);               // unknown
299
    writeU32(buffer + 16, 0);               // unknown
300
    writeU16(buffer + 24, 0x003e);          // revision ?
301
    writeU16(buffer + 26, 3);               // version ?
302
    writeU16(buffer + 28, 0xfffe);          // unknown
303
    writeU16(buffer + 0x1e, b_shift);
304
    writeU16(buffer + 0x20, s_shift);
305
    writeU32(buffer + 0x2c, num_bat);
306
    writeU32(buffer + 0x30, dirent_start);
307
    writeU32(buffer + 0x38, threshold);
308
    writeU32(buffer + 0x3c, sbat_start);
309
    writeU32(buffer + 0x40, num_sbat);
310
    writeU32(buffer + 0x44, mbat_start);
311
    writeU32(buffer + 0x48, num_mbat);
312
313
    for (unsigned i = 0; i < 109; i++)
314
        writeU32(buffer + 0x4C + i*4, bb_blocks[i]);
315
}
316
317
void Header::debug()
318
{
319
    std::cout << std::endl;
320
    std::cout << "b_shift " << b_shift << std::endl;
321
    std::cout << "s_shift " << s_shift << std::endl;
322
    std::cout << "num_bat " << num_bat << std::endl;
323
    std::cout << "dirent_start " << dirent_start << std::endl;
324
    std::cout << "threshold " << threshold << std::endl;
325
    std::cout << "sbat_start " << sbat_start << std::endl;
326
    std::cout << "num_sbat " << num_sbat << std::endl;
327
    std::cout << "mbat_start " << mbat_start << std::endl;
328
    std::cout << "num_mbat " << num_mbat << std::endl;
329
330
    unsigned s = (num_bat <= 109) ? num_bat : 109;
331
    std::cout << "bat blocks: ";
332
    for (unsigned i = 0; i < s; i++)
333
        std::cout << bb_blocks[i] << " ";
334
    std::cout << std::endl;
335
}
336
337
// =========== AllocTable ==========
338
339
const unsigned AllocTable::Avail = 0xffffffff;
340
const unsigned AllocTable::Eof = 0xfffffffe;
341
const unsigned AllocTable::Bat = 0xfffffffd;
342
const unsigned AllocTable::MetaBat = 0xfffffffc;
343
344
AllocTable::AllocTable()
345
{
346
    blockSize = 4096;
347
    // initial size
348
    resize(128);
349
}
350
351
unsigned long AllocTable::count()
352
{
353
    return data.size();
354
}
355
356
void AllocTable::resize(unsigned long newsize)
357
{
358
    unsigned oldsize = data.size();
359
    data.resize(newsize);
360
    if (newsize > oldsize)
361
        for (unsigned i = oldsize; i < newsize; i++)
362
            data[i] = Avail;
363
}
364
365
// make sure there're still free blocks
366
void AllocTable::preserve(unsigned long n)
367
{
368
    std::vector<unsigned long> pre;
369
    for (unsigned i = 0; i < n; i++)
370
        pre.push_back(unused());
371
}
372
373
unsigned long AllocTable::operator[](unsigned long index)
374
{
375
    unsigned long result;
376
    result = data[index];
377
    return result;
378
}
379
380
void AllocTable::set(unsigned long index, unsigned long value)
381
{
382
    if (index >= count()) resize(index + 1);
383
    data[ index ] = value;
384
}
385
386
void AllocTable::setChain(std::vector<unsigned long> chain)
387
{
388
    if (chain.size()) {
389
        for (unsigned i = 0; i < chain.size() - 1; i++)
390
            set(chain[i], chain[i+1]);
391
        set(chain[ chain.size()-1 ], AllocTable::Eof);
392
    }
393
}
394
395
// follow
396
std::vector<unsigned long> AllocTable::follow(unsigned long start)
397
{
398
    std::vector<unsigned long> chain;
399
400
    if (start >= count()) return chain;
401
402
    unsigned long p = start;
403
    while (p < count()) {
404
        if (p == (unsigned long)Eof) break;
405
        if (p == (unsigned long)Bat) break;
406
        if (p == (unsigned long)MetaBat) break;
407
        if (p >= count()) break;
408
        chain.push_back(p);
409
        if (data[p] >= count()) break;
410
        p = data[ p ];
411
    }
412
413
    return chain;
414
}
415
416
unsigned AllocTable::unused()
417
{
418
    // find first available block
419
    for (unsigned i = 0; i < data.size(); i++)
420
        if (data[i] == Avail)
421
            return i;
422
423
    // completely full, so enlarge the table
424
    unsigned block = data.size();
425
    resize(data.size() + 10);
426
    return block;
427
}
428
429
void AllocTable::load(const unsigned char* buffer, unsigned len)
430
{
431
    resize(len / 4);
432
    for (unsigned i = 0; i < count(); i++)
433
        set(i, readU32(buffer + i*4));
434
}
435
436
// return space required to save this dirtree
437
unsigned AllocTable::size()
438
{
439
    return count() * 4;
440
}
441
442
void AllocTable::save(unsigned char* buffer)
443
{
444
    for (unsigned i = 0; i < count(); i++)
445
        writeU32(buffer + i*4, data[i]);
446
}
447
448
void AllocTable::debug()
449
{
450
    std::cout << "block size " << data.size() << std::endl;
451
    for (unsigned i = 0; i < data.size(); i++) {
452
        if (data[i] == Avail) continue;
453
        std::cout << i << ": ";
454
        if (data[i] == Eof) std::cout << "[eof]";
455
        else if (data[i] == Bat) std::cout << "[bat]";
456
        else if (data[i] == MetaBat) std::cout << "[metabat]";
457
        else std::cout << data[i];
458
        std::cout << std::endl;
459
    }
460
}
461
462
// =========== DirTree ==========
463
464
const unsigned DirTree::End = 0xffffffff;
465
466
DirTree::DirTree()
467
{
468
    clear();
469
}
470
471
void DirTree::clear()
472
{
473
    // leave only root entry
474
    entries.resize(1);
475
    entries[0].valid = true;
476
    entries[0].name = "Root Entry";
477
    entries[0].dir = true;
478
    entries[0].size = 0;
479
    entries[0].start = End;
480
    entries[0].prev = End;
481
    entries[0].next = End;
482
    entries[0].child = End;
483
}
484
485
unsigned DirTree::entryCount()
486
{
487
    return entries.size();
488
}
489
490
DirEntry* DirTree::entry(unsigned index)
491
{
492
    if (index >= entryCount()) return (DirEntry*) 0;
493
    return &entries[ index ];
494
}
495
496
int DirTree::indexOf(DirEntry* e)
497
{
498
    for (unsigned i = 0; i < entryCount(); i++)
499
        if (entry(i) == e) return i;
500
501
    return -1;
502
}
503
504
int DirTree::parent(unsigned index)
505
{
506
    // brute-force, basically we iterate for each entries, find its children
507
    // and check if one of the children is 'index'
508
    for (unsigned j = 0; j < entryCount(); j++) {
509
        std::vector<unsigned> chi = children(j);
510
        for (unsigned i = 0; i < chi.size();i++)
511
            if (chi[i] == index)
512
                return j;
513
    }
514
515
    return -1;
516
}
517
518
std::string DirTree::fullName(unsigned index)
519
{
520
    // don't use root name ("Root Entry"), just give "/"
521
    if (index == 0) return "/";
522
523
    std::string result = entry(index)->name;
524
    result.insert(0,  "/");
525
    int p = parent(index);
526
    DirEntry * _entry = 0;
527
    while (p > 0) {
528
        _entry = entry(p);
529
        if (_entry->dir && _entry->valid) {
530
            result.insert(0,  _entry->name);
531
            result.insert(0,  "/");
532
        }
533
        --p;
534
        index = p;
535
        if (index <= 0) break;
536
    }
537
    return result;
538
}
539
540
// given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
541
// if not found and create is false, return 0
542
// if create is true, a new entry is returned
543
DirEntry* DirTree::entry(const std::string& name, bool create)
544
{
545
    if (!name.length()) return (DirEntry*)0;
546
547
    // quick check for "/" (that's root)
548
    if (name == "/") return entry(0);
549
550
    // split the names, e.g  "/ObjectPool/_1020961869" will become:
551
    // "ObjectPool" and "_1020961869"
552
    std::list<std::string> names;
553
    std::string::size_type start = 0, end = 0;
554
    if (name[0] == '/') start++;
555
    while (start < name.length()) {
556
        end = name.find_first_of('/', start);
557
        if (end == std::string::npos) end = name.length();
558
        names.push_back(name.substr(start, end - start));
559
        start = end + 1;
560
    }
561
562
    // start from root
563
    int index = 0 ;
564
565
    // trace one by one
566
    std::list<std::string>::iterator it;
567
568
    for (it = names.begin(); it != names.end(); ++it) {
569
        // find among the children of index
570
        std::vector<unsigned> chi = children(index);
571
        unsigned child = 0;
572
        for (unsigned i = 0; i < chi.size(); i++) {
573
            DirEntry* ce = entry(chi[i]);
574
            if (ce)
575
                if (ce->valid && (ce->name.length() > 1))
576
                    if (ce->name == *it)
577
                        child = chi[i];
578
        }
579
580
        // traverse to the child
581
        if (child > 0) index = child;
582
        else {
583
            // not found among children
584
            if (!create) return (DirEntry*)0;
585
586
            // create a new entry
587
            unsigned parent = index;
588
            entries.push_back(DirEntry());
589
            index = entryCount() - 1;
590
            DirEntry* e = entry(index);
591
            e->valid = true;
592
            e->name = *it;
593
            e->dir = false;
594
            e->size = 0;
595
            e->start = 0;
596
            e->child = End;
597
            e->prev = End;
598
            e->next = entry(parent)->child;
599
            entry(parent)->child = index;
600
        }
601
    }
602
603
    return entry(index);
604
}
605
606
// helper function: recursively find siblings of index
607
void dirtree_find_siblings(DirTree* dirtree, std::vector<unsigned>& result,
608
                           unsigned index)
609
{
610
    DirEntry* e = dirtree->entry(index);
611
    if (!e) return;
612
    if (!e->valid) return;
613
614
    // prevent infinite loop
615
    for (unsigned i = 0; i < result.size(); i++)
616
        if (result[i] == index) return;
617
618
    // add myself
619
    result.push_back(index);
620
621
    // visit previous sibling, don't go infinitely
622
    unsigned prev = e->prev;
623
    if ((prev > 0) && (prev < dirtree->entryCount())) {
624
        for (unsigned i = 0; i < result.size(); i++)
625
            if (result[i] == prev) prev = 0;
626
        if (prev) dirtree_find_siblings(dirtree, result, prev);
627
    }
628
629
    // visit next sibling, don't go infinitely
630
    unsigned next = e->next;
631
    if ((next > 0) && (next < dirtree->entryCount())) {
632
        for (unsigned i = 0; i < result.size(); i++)
633
            if (result[i] == next) next = 0;
634
        if (next) dirtree_find_siblings(dirtree, result, next);
635
    }
636
}
637
638
std::vector<unsigned> DirTree::children(unsigned index)
639
{
640
    std::vector<unsigned> result;
641
642
    DirEntry* e = entry(index);
643
    if (e) if (e->valid && e->child < entryCount())
644
            dirtree_find_siblings(this, result, e->child);
645
646
    return result;
647
}
648
649
void DirTree::load(unsigned char* buffer, unsigned size)
650
{
651
    entries.clear();
652
653
    for (unsigned i = 0; i < size / 128; i++) {
654
        unsigned p = i * 128;
655
656
        // would be < 32 if first char in the name isn't printable
657
        unsigned prefix = 32;
658
659
        // parse name of this entry, which stored as Unicode 16-bit
660
        std::string name;
661
        int name_len = readU16(buffer + 0x40 + p);
662
        if (name_len > 64) name_len = 64;
663
        for (int j = 0; (buffer[j+p]) && (j < name_len); j += 2)
664
            name.append(1, buffer[j+p]);
665
666
        // first char isn't printable ? remove it...
667
        if (buffer[p] < 32) {
668
            prefix = buffer[0];
669
            name.erase(0, 1);
670
        }
671
672
        // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
673
        unsigned type = buffer[ 0x42 + p];
674
675
        DirEntry e;
676
        e.valid = true;
677
        e.name = name;
678
        e.start = readU32(buffer + 0x74 + p);
679
        e.size = readU32(buffer + 0x78 + p);
680
        e.prev = readU32(buffer + 0x44 + p);
681
        e.next = readU32(buffer + 0x48 + p);
682
        e.child = readU32(buffer + 0x4C + p);
683
        e.dir = (type != 2);
684
        
685
        // sanity checks
686
        if ((type != 2) && (type != 1) && (type != 5)) e.valid = false;
687
        if (name_len < 1) e.valid = false;
688
689
        // CLSID, contains a object class GUI if this entry is a storage or root
690
        // storage or all zero if not.
691
#ifdef POLE_DEBUG
692
        printf("DirTree::load name=%s type=%i prev=%i next=%i child=%i start=%i size=%i clsid=%i.%i.%i.%i\n",
693
               name.c_str(),type,e.prev,e.next,e.child,e.start,e.size,readU32(buffer+0x50+p),readU32(buffer+0x54+p),readU32(buffer+0x58+p),readU32(buffer+0x5C+p));
694
#endif
695
        entries.push_back(e);
696
    }
697
}
698
699
// return space required to save this dirtree
700
unsigned DirTree::size()
701
{
702
    return entryCount() * 128;
703
}
704
705
void DirTree::save(unsigned char* buffer)
706
{
707
    memset(buffer, 0, size());
708
709
    // root is fixed as "Root Entry"
710
    DirEntry* root = entry(0);
711
    std::string name = "Root Entry";
712
    for (unsigned j = 0; j < name.length(); j++)
713
        buffer[ j*2 ] = name[j];
714
    writeU16(buffer + 0x40, name.length()*2 + 2);
715
    writeU32(buffer + 0x74, 0xffffffff);
716
    writeU32(buffer + 0x78, 0);
717
    writeU32(buffer + 0x44, 0xffffffff);
718
    writeU32(buffer + 0x48, 0xffffffff);
719
    writeU32(buffer + 0x4c, root->child);
720
    buffer[ 0x42 ] = 5;
721
    buffer[ 0x43 ] = 1;
722
723
    for (unsigned i = 1; i < entryCount(); i++) {
724
        DirEntry* e = entry(i);
725
        if (!e) continue;
726
        if (e->dir) {
727
            e->start = 0xffffffff;
728
            e->size = 0;
729
        }
730
731
        // max length for name is 32 chars
732
        std::string name = e->name;
733
        if (name.length() > 32)
734
            name.erase(32, name.length());
735
736
        // write name as Unicode 16-bit
737
        for (unsigned j = 0; j < name.length(); j++)
738
            buffer[ i*128 + j*2 ] = name[j];
739
740
        writeU16(buffer + i*128 + 0x40, name.length()*2 + 2);
741
        writeU32(buffer + i*128 + 0x74, e->start);
742
        writeU32(buffer + i*128 + 0x78, e->size);
743
        writeU32(buffer + i*128 + 0x44, e->prev);
744
        writeU32(buffer + i*128 + 0x48, e->next);
745
        writeU32(buffer + i*128 + 0x4c, e->child);
746
        buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
747
        buffer[ i*128 + 0x43 ] = 1; // always black
748
    }
749
}
750
751
void DirTree::debug()
752
{
753
    for (unsigned i = 0; i < entryCount(); i++) {
754
        DirEntry* e = entry(i);
755
        if (!e) continue;
756
        std::cout << i << ": ";
757
        if (!e->valid) std::cout << "INVALID ";
758
        std::cout << e->name << " ";
759
        if (e->dir) std::cout << "(Dir) ";
760
        else std::cout << "(File) ";
761
        std::cout << e->size << " ";
762
        std::cout << "s:" << e->start << " ";
763
        std::cout << "(";
764
        if (e->child == End) std::cout << "-"; else std::cout << e->child;
765
        std::cout << " ";
766
        if (e->prev == End) std::cout << "-"; else std::cout << e->prev;
767
        std::cout << ":";
768
        if (e->next == End) std::cout << "-"; else std::cout << e->next;
769
        std::cout << ")";
770
        std::cout << std::endl;
771
    }
772
}
773
774
// =========== StorageIO ==========
775
776
StorageIO::StorageIO(Storage* st, const char* fname)
777
{
778
    storage = st;
779
    filename = fname;
780
    result = Storage::Ok;
781
    opened = false;
782
783
    header = new Header();
784
    dirtree = new DirTree();
785
    bbat = new AllocTable();
786
    sbat = new AllocTable();
787
788
    filesize = 0;
789
    bbat->blockSize = 1 << header->b_shift;
790
    sbat->blockSize = 1 << header->s_shift;
791
}
792
793
StorageIO::~StorageIO()
794
{
795
    if (opened) close();
796
    delete sbat;
797
    delete bbat;
798
    delete dirtree;
799
    delete header;
800
}
801
802
bool StorageIO::open()
803
{
804
    // already opened ? close first
805
    if (opened) close();
806
807
    load();
808
809
    return result == Storage::Ok;
810
}
811
812
void StorageIO::load()
813
{
814
    unsigned char* buffer = 0;
815
    unsigned long buflen = 0;
816
    std::vector<unsigned long> blocks;
817
818
    // open the file, check for error
819
    result = Storage::OpenFailed;
820
    file.open(filename.c_str(), std::ios::binary | std::ios::in);
821
    if (!file.good()) return;
822
823
    // find size of input file
824
    file.seekg(0, std::ios::end);
825
    filesize = file.tellg();
826
827
    // load header
828
    buffer = new unsigned char[512];
829
    file.seekg(0);
830
    file.read((char*)buffer, 512);
831
    if (!file.good()) {
832
        delete[] buffer;
833
        return;
834
    }
835
    header->load(buffer);
836
    delete[] buffer;
837
838
    // check OLE magic id
839
    result = Storage::NotOLE;
840
    for (unsigned i = 0; i < 8; i++)
841
        if (header->id[i] != pole_magic[i])
842
            return;
843
844
    // sanity checks
845
    result = Storage::BadOLE;
846
    if (!header->valid()) return;
847
    if (header->threshold != 4096) return;
848
849
    // important block size
850
    bbat->blockSize = 1 << header->b_shift;
851
    sbat->blockSize = 1 << header->s_shift;
852
853
    // find blocks allocated to store big bat
854
    // the first 109 blocks are in header, the rest in meta bat
855
    blocks.clear();
856
    blocks.resize(header->num_bat);
857
    for (unsigned i = 0; i < 109; i++)
858
        if (i >= header->num_bat) break;
859
        else blocks[i] = header->bb_blocks[i];
860
    if ((header->num_bat > 109) && (header->num_mbat > 0)) {
861
        unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
862
        unsigned k = 109;
863
        unsigned mblock = header->mbat_start;
864
        for (unsigned r = 0; r < header->num_mbat; r++) {
865
            unsigned long rr = loadBigBlock(mblock, buffer2, bbat->blockSize);
866
            if (rr != bbat->blockSize) {
867
                delete[] buffer2;
868
                return;
869
            }
870
            for (unsigned s = 0; s < bbat->blockSize - 4; s += 4) {
871
                if (k >= header->num_bat) break;
872
                else  blocks[k++] = readU32(buffer2 + s);
873
            }
874
            mblock = readU32(buffer2 + bbat->blockSize - 4);
875
        }
876
        delete[] buffer2;
877
    }
878
879
    // load big bat
880
    buflen = blocks.size() * bbat->blockSize;
881
    if (buflen > 0) {
882
        buffer = new unsigned char[ buflen ];
883
        unsigned long r = loadBigBlocks(blocks, buffer, buflen);
884
        if (r != buflen) {
885
            delete[] buffer;
886
            return;
887
        }
888
        bbat->load(buffer, buflen);
889
        delete[] buffer;
890
    }
891
892
    // load small bat
893
    blocks.clear();
894
    blocks = bbat->follow(header->sbat_start);
895
    buflen = blocks.size() * bbat->blockSize;
896
    if (buflen > 0) {
897
        buffer = new unsigned char[ buflen ];
898
        unsigned long r = loadBigBlocks(blocks, buffer, buflen);
899
        if (r != buflen) {
900
            delete[] buffer;
901
            return;
902
        }
903
        sbat->load(buffer, buflen);
904
        delete[] buffer;
905
    }
906
907
    // load directory tree
908
    blocks.clear();
909
    blocks = bbat->follow(header->dirent_start);
910
    buflen = blocks.size() * bbat->blockSize;
911
    buffer = new unsigned char[ buflen ];
912
    unsigned long r = loadBigBlocks(blocks, buffer, buflen);
913
    if (r != buflen) {
914
        delete[] buffer;
915
        return;
916
    }
917
    dirtree->load(buffer, buflen);
918
    unsigned sb_start = readU32(buffer + 0x74);
919
    delete[] buffer;
920
921
    // fetch block chain as data for small-files
922
    sb_blocks = bbat->follow(sb_start);   // small files
923
924
    // for troubleshooting, just enable this block
925
#ifdef POLE_DEBUG
926
    header->debug();
927
    sbat->debug();
928
    bbat->debug();
929
    dirtree->debug();
930
#endif
931
932
    // so far so good
933
    result = Storage::Ok;
934
    opened = true;
935
}
936
937
void StorageIO::create()
938
{
939
    // std::cout << "Creating " << filename << std::endl;
940
941
    file.open(filename.c_str(), std::ios::out | std::ios::binary);
942
    if (!file.good()) {
943
        std::cerr << "Can't create " << filename << std::endl;
944
        result = Storage::OpenFailed;
945
        return;
946
    }
947
948
    // so far so good
949
    opened = true;
950
    result = Storage::Ok;
951
}
952
953
void StorageIO::flush()
954
{
955
    /* Note on Microsoft implementation:
956
       - directory entries are stored in the last block(s)
957
       - BATs are as second to the last
958
       - Meta BATs are third to the last
959
    */
960
}
961
962
void StorageIO::close()
963
{
964
    if (!opened) return;
965
966
    file.close();
967
    opened = false;
968
969
    std::list<Stream*>::iterator it;
970
    for (it = streams.begin(); it != streams.end(); ++it)
971
        delete *it;
972
}
973
974
StreamIO* StorageIO::streamIO(const std::string& name)
975
{
976
    // sanity check
977
    if (!name.length()) return (StreamIO*)0;
978
979
    // search in the entries
980
    DirEntry* entry = dirtree->entry(name);
981
    //if( entry) std::cout << "FOUND\n";
982
    if (!entry) return (StreamIO*)0;
983
    //if( !entry->dir ) std::cout << "  NOT DIR\n";
984
    if (entry->dir) return (StreamIO*)0;
985
986
    StreamIO* result = new StreamIO(this, entry);
987
    result->fullName = name;
988
989
    return result;
990
}
991
992
unsigned long StorageIO::loadBigBlocks(std::vector<unsigned long> blocks,
993
                                       unsigned char* data, unsigned long maxlen)
994
{
995
    // sentinel
996
    if (!data) return 0;
997
    if (!file.good()) return 0;
998
    if (blocks.size() < 1) return 0;
999
    if (maxlen == 0) return 0;
1000
1001
    // read block one by one, seems fast enough
1002
    unsigned long bytes = 0;
1003
    for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) {
1004
        unsigned long block = blocks[i];
1005
        unsigned long pos =  bbat->blockSize * (block + 1);
1006
        unsigned long p = (bbat->blockSize < maxlen - bytes) ? bbat->blockSize : maxlen - bytes;
1007
        if (pos + p > filesize) p = filesize - pos;
1008
        file.seekg(pos);
1009
        file.read((char*)data + bytes, p);
1010
        if (!file.good()) return 0;
1011
        bytes += p;
1012
    }
1013
1014
    return bytes;
1015
}
1016
1017
unsigned long StorageIO::loadBigBlock(unsigned long block,
1018
                                      unsigned char* data, unsigned long maxlen)
1019
{
1020
    // sentinel
1021
    if (!data) return 0;
1022
    if (!file.good()) return 0;
1023
1024
    // wraps call for loadBigBlocks
1025
    std::vector<unsigned long> blocks;
1026
    blocks.resize(1);
1027
    blocks[ 0 ] = block;
1028
1029
    return loadBigBlocks(blocks, data, maxlen);
1030
}
1031
1032
// return number of bytes which has been read
1033
unsigned long StorageIO::loadSmallBlocks(std::vector<unsigned long> blocks,
1034
        unsigned char* data, unsigned long maxlen)
1035
{
1036
    // sentinel
1037
    if (!data) return 0;
1038
    if (!file.good()) return 0;
1039
    if (blocks.size() < 1) return 0;
1040
    if (maxlen == 0) return 0;
1041
1042
    // our own local buffer
1043
    unsigned char* buf = new unsigned char[ bbat->blockSize ];
1044
1045
    // read small block one by one
1046
    unsigned long bytes = 0;
1047
    for (unsigned long i = 0; (i < blocks.size()) && (bytes < maxlen); i++) {
1048
        unsigned long block = blocks[i];
1049
1050
        // find where the small-block exactly is
1051
        unsigned long pos = block * sbat->blockSize;
1052
        unsigned long bbindex = pos / bbat->blockSize;
1053
        if (bbindex >= sb_blocks.size()) break;
1054
1055
        unsigned long r = loadBigBlock(sb_blocks[ bbindex ], buf, bbat->blockSize);
1056
        if (r != bbat->blockSize) {
1057
            delete[] buf;
1058
            return 0;
1059
        }
1060
1061
        // copy the data
1062
        unsigned offset = pos % bbat->blockSize;
1063
        unsigned long p = (maxlen - bytes < bbat->blockSize - offset) ? maxlen - bytes :  bbat->blockSize - offset;
1064
        p = (sbat->blockSize < p) ? sbat->blockSize : p;
1065
        memcpy(data + bytes, buf + offset, p);
1066
        bytes += p;
1067
    }
1068
1069
    delete[] buf;
1070
1071
    return bytes;
1072
}
1073
1074
unsigned long StorageIO::loadSmallBlock(unsigned long block,
1075
                                        unsigned char* data, unsigned long maxlen)
1076
{
1077
    // sentinel
1078
    if (!data) return 0;
1079
    if (!file.good()) return 0;
1080
1081
    // wraps call for loadSmallBlocks
1082
    std::vector<unsigned long> blocks;
1083
    blocks.resize(1);
1084
    blocks.assign(1, block);
1085
1086
    return loadSmallBlocks(blocks, data, maxlen);
1087
}
1088
1089
// =========== StreamIO ==========
1090
1091
StreamIO::StreamIO(StorageIO* s, DirEntry* e)
1092
{
1093
    io = s;
1094
    entry = e;
1095
    eof = false;
1096
    fail = false;
1097
1098
    m_pos = 0;
1099
1100
    if (entry->size >= io->header->threshold)
1101
        blocks = io->bbat->follow(entry->start);
1102
    else
1103
        blocks = io->sbat->follow(entry->start);
1104
1105
    // prepare cache
1106
    cache_pos = 0;
1107
    cache_size = 4096; // optimal ?
1108
    cache_data = new unsigned char[cache_size];
1109
    updateCache();
1110
}
1111
1112
// FIXME tell parent we're gone
1113
StreamIO::~StreamIO()
1114
{
1115
    delete[] cache_data;
1116
}
1117
1118
void StreamIO::seek(unsigned long pos)
1119
{
1120
    m_pos = pos;
1121
}
1122
1123
unsigned long StreamIO::tell()
1124
{
1125
    return m_pos;
1126
}
1127
1128
int StreamIO::getch()
1129
{
1130
    // past end-of-file ?
1131
    if (m_pos > entry->size) return -1;
1132
1133
    // need to update cache ?
1134
    if (!cache_size || (m_pos < cache_pos) ||
1135
            (m_pos >= cache_pos + cache_size))
1136
        updateCache();
1137
1138
    // something bad if we don't get good cache
1139
    if (!cache_size) return -1;
1140
1141
    int data = cache_data[m_pos - cache_pos];
1142
    m_pos++;
1143
1144
    return data;
1145
}
1146
1147
unsigned long StreamIO::read(unsigned long pos, unsigned char* data, unsigned long maxlen)
1148
{
1149
    // sanity checks
1150
    if (!data) return 0;
1151
    if (maxlen == 0) return 0;
1152
1153
    unsigned long totalbytes = 0;
1154
1155
    if (entry->size < io->header->threshold) {
1156
        // small file
1157
        unsigned long index = pos / io->sbat->blockSize;
1158
1159
        if (index >= blocks.size()) return 0;
1160
1161
        unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
1162
        unsigned long offset = pos % io->sbat->blockSize;
1163
        while (totalbytes < maxlen) {
1164
            if (index >= blocks.size()) break;
1165
            io->loadSmallBlock(blocks[index], buf, io->bbat->blockSize);
1166
            unsigned long count = io->sbat->blockSize - offset;
1167
            if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1168
            memcpy(data + totalbytes, buf + offset, count);
1169
            totalbytes += count;
1170
            offset = 0;
1171
            index++;
1172
        }
1173
        delete[] buf;
1174
1175
    } else {
1176
        // big file
1177
        unsigned long index = pos / io->bbat->blockSize;
1178
1179
        if (index >= blocks.size()) return 0;
1180
1181
        unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
1182
        unsigned long offset = pos % io->bbat->blockSize;
1183
        while (totalbytes < maxlen) {
1184
            if (index >= blocks.size()) break;
1185
            unsigned long r = io->loadBigBlock(blocks[index], buf, io->bbat->blockSize);
1186
            if (r != io->bbat->blockSize) {
1187
                delete [] buf;
1188
                return 0;
1189
            }
1190
            unsigned long count = io->bbat->blockSize - offset;
1191
            if (count > maxlen - totalbytes) count = maxlen - totalbytes;
1192
            memcpy(data + totalbytes, buf + offset, count);
1193
            totalbytes += count;
1194
            index++;
1195
            offset = 0;
1196
        }
1197
        delete [] buf;
1198
1199
    }
1200
1201
    return totalbytes;
1202
}
1203
1204
unsigned long StreamIO::read(unsigned char* data, unsigned long maxlen)
1205
{
1206
    unsigned long bytes = read(tell(), data, maxlen);
1207
    m_pos += bytes;
1208
    return bytes;
1209
}
1210
1211
void StreamIO::updateCache()
1212
{
1213
    // sanity check
1214
    if (!cache_data) return;
1215
1216
    cache_pos = m_pos - (m_pos % cache_size);
1217
    unsigned long bytes = cache_size;
1218
    if (cache_pos + bytes > entry->size) bytes = entry->size - cache_pos;
1219
    cache_size = read(cache_pos, cache_data, bytes);
1220
}
1221
1222
1223
// =========== Storage ==========
1224
1225
Storage::Storage(const char* filename)
1226
{
1227
    io = new StorageIO(this, filename);
1228
}
1229
1230
Storage::~Storage()
1231
{
1232
    delete io;
1233
}
1234
1235
int Storage::result()
1236
{
1237
    return io->result;
1238
}
1239
1240
bool Storage::open()
1241
{
1242
    return io->open();
1243
}
1244
1245
void Storage::close()
1246
{
1247
    io->close();
1248
}
1249
1250
std::list<std::string> Storage::entries(const std::string& path)
1251
{
1252
    std::list<std::string> result;
1253
    DirTree* dt = io->dirtree;
1254
    DirEntry* e = dt->entry(path, false);
1255
    if (e) {
1256
        if (e->dir) {
1257
            unsigned parent = dt->indexOf(e);
1258
            std::vector<unsigned> children = dt->children(parent);
1259
            for (unsigned i = 0; i < children.size(); i++)
1260
                result.push_back(dt->entry(children[i])->name);
1261
        }
1262
    }
1263
    return result;
1264
}
1265
1266
bool Storage::isDirectory(const std::string& name)
1267
{
1268
    DirEntry* e = io->dirtree->entry(name, false);
1269
    return e ? e->dir : false;
1270
}
1271
1272
// =========== Stream ==========
1273
1274
Stream::Stream(Storage* storage, const std::string& name)
1275
{
1276
    io = storage->io->streamIO(name);
1277
}
1278
1279
// FIXME tell parent we're gone
1280
Stream::~Stream()
1281
{
1282
    delete io;
1283
}
1284
1285
std::string Stream::fullName()
1286
{
1287
    return io ? io->fullName : std::string();
1288
}
1289
1290
unsigned long Stream::tell()
1291
{
1292
    return io ? io->tell() : 0;
1293
}
1294
1295
void Stream::seek(unsigned long newpos)
1296
{
1297
    if (io) io->seek(newpos);
1298
}
1299
1300
unsigned long Stream::size()
1301
{
1302
    return io ? io->entry->size : 0;
1303
}
1304
1305
int Stream::getch()
1306
{
1307
    return io ? io->getch() : 0;
1308
}
1309
1310
unsigned long Stream::read(unsigned char* data, unsigned long maxlen)
1311
{
1312
    return io ? io->read(data, maxlen) : 0;
1313
}
1314
1315
bool Stream::eof()
1316
{
1317
    return io ? io->eof : false;
1318
}
1319
1320
bool Stream::fail()
1321
{
1322
    return io ? io->fail : true;
1323
}