|
Packit |
443c7d |
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
|
|
Packit |
443c7d |
/* libwpd
|
|
Packit |
443c7d |
* Version: MPL 2.0 / LGPLv2.1+
|
|
Packit |
443c7d |
*
|
|
Packit |
443c7d |
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
Packit |
443c7d |
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
Packit |
443c7d |
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
Packit |
443c7d |
*
|
|
Packit |
443c7d |
* Major Contributor(s):
|
|
Packit |
443c7d |
* Copyright (C) 2004 Marc Maurer (uwog@uwog.net)
|
|
Packit |
443c7d |
* Copyright (C) 2004 Fridrich Strba (fridrich.strba@bluewin.ch)
|
|
Packit |
443c7d |
*
|
|
Packit |
443c7d |
* For minor contributions see the git repository.
|
|
Packit |
443c7d |
*
|
|
Packit |
443c7d |
* Alternatively, the contents of this file may be used under the terms
|
|
Packit |
443c7d |
* of the GNU Lesser General Public License Version 2.1 or later
|
|
Packit |
443c7d |
* (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
|
|
Packit |
443c7d |
* applicable instead of those above.
|
|
Packit |
443c7d |
*
|
|
Packit |
443c7d |
* For further information visit http://libwpd.sourceforge.net
|
|
Packit |
443c7d |
*/
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
/* "This product is not manufactured, approved, or supported by
|
|
Packit |
443c7d |
* Corel Corporation or Corel Corporation Limited."
|
|
Packit |
443c7d |
*/
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
#include "WP3Parser.h"
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
#include <memory>
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
#include "WPXHeader.h"
|
|
Packit |
443c7d |
#include "WP3Part.h"
|
|
Packit |
443c7d |
#include "WP3ContentListener.h"
|
|
Packit |
443c7d |
#include "WP3StylesListener.h"
|
|
Packit |
443c7d |
#include "WP3ResourceFork.h"
|
|
Packit |
443c7d |
#include "libwpd_internal.h"
|
|
Packit |
443c7d |
#include "WPXTable.h"
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
WP3Parser::WP3Parser(librevenge::RVNGInputStream *input, WPXHeader *header, WPXEncryption *encryption) :
|
|
Packit |
443c7d |
WPXParser(input, header, encryption)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
WP3Parser::~WP3Parser()
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
WP3ResourceFork *WP3Parser::getResourceFork(librevenge::RVNGInputStream *input, WPXEncryption *encryption)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
// Certain WP2 documents actually don't contain resource fork, so check for its existence
|
|
Packit |
443c7d |
if (!getHeader() || getHeader()->getDocumentOffset() <= 0x10)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
WPD_DEBUG_MSG(("WP3Parser: Document does not contain resource fork\n"));
|
|
Packit |
443c7d |
return nullptr;
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
return new WP3ResourceFork(input, encryption);
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
void WP3Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
listener->startDocument();
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
input->seek(getHeader()->getDocumentOffset(), librevenge::RVNG_SEEK_SET);
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell()));
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
parseDocument(input, encryption, listener);
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
listener->endDocument();
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
// parseDocument: parses a document body (may call itself recursively, on other streams, or itself)
|
|
Packit |
443c7d |
void WP3Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP3Listener *listener)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
while (!input->isEnd())
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
unsigned char readVal;
|
|
Packit |
443c7d |
readVal = readU8(input, encryption);
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
if (readVal == 0 || readVal == 0x7F || readVal == 0xFF)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
// FIXME: VERIFY: is this IF clause correct? (0xFF seems to be OK at least)
|
|
Packit |
443c7d |
// do nothing: this token is meaningless and is likely just corruption
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
else if (readVal >= (unsigned char)0x01 && readVal <= (unsigned char)0x1F)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
// control characters ?
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
else if (readVal >= (unsigned char)0x20 && readVal <= (unsigned char)0x7E)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
listener->insertCharacter(readVal);
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
else
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
std::unique_ptr<WP3Part> part(WP3Part::constructPart(input, encryption, readVal));
|
|
Packit |
443c7d |
if (part)
|
|
Packit |
443c7d |
part->parse(listener);
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
void WP3Parser::parse(librevenge::RVNGTextInterface *textInterface)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
librevenge::RVNGInputStream *input = getInput();
|
|
Packit |
443c7d |
WPXEncryption *encryption = getEncryption();
|
|
Packit |
443c7d |
std::list<WPXPageSpan> pageList;
|
|
Packit |
443c7d |
WPXTableList tableList;
|
|
Packit |
443c7d |
WP3ResourceFork *resourceFork = nullptr;
|
|
Packit |
443c7d |
std::vector<WP3SubDocument *> subDocuments;
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
try
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
resourceFork = getResourceFork(input, encryption);
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
// do a "first-pass" parse of the document
|
|
Packit |
443c7d |
// gather table border information, page properties (per-page)
|
|
Packit |
443c7d |
WP3StylesListener stylesListener(pageList, tableList, subDocuments);
|
|
Packit |
443c7d |
stylesListener.setResourceFork(resourceFork);
|
|
Packit |
443c7d |
parse(input, encryption, &stylesListener);
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
// postprocess the pageList == remove duplicate page spans due to the page breaks
|
|
Packit |
443c7d |
std::list<WPXPageSpan>::iterator previousPage = pageList.begin();
|
|
Packit |
443c7d |
for (std::list<WPXPageSpan>::iterator Iter=pageList.begin(); Iter != pageList.end(); /* Iter++ */)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
if ((Iter != previousPage) && (*previousPage==*Iter))
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
(*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan());
|
|
Packit |
443c7d |
Iter = pageList.erase(Iter);
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
else
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
previousPage = Iter;
|
|
Packit |
443c7d |
++Iter;
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
// second pass: here is where we actually send the messages to the target app
|
|
Packit |
443c7d |
// that are necessary to emit the body of the target document
|
|
Packit |
443c7d |
WP3ContentListener listener(pageList, subDocuments, textInterface); // FIXME: SHOULD BE CONTENT_LISTENER, AND SHOULD BE PASSED TABLE DATA!
|
|
Packit |
443c7d |
listener.setResourceFork(resourceFork);
|
|
Packit |
443c7d |
parse(input, encryption, &listener);
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
// cleanup section: free the used resources
|
|
Packit |
443c7d |
for (auto &subDocument : subDocuments)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
if (subDocument)
|
|
Packit |
443c7d |
delete subDocument;
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
delete resourceFork;
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
catch (FileException)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
for (auto &subDocument : subDocuments)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
if (subDocument)
|
|
Packit |
443c7d |
delete subDocument;
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
delete resourceFork;
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
throw FileException();
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
void WP3Parser::parseSubDocument(librevenge::RVNGTextInterface *textInterface)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
std::list<WPXPageSpan> pageList;
|
|
Packit |
443c7d |
WPXTableList tableList;
|
|
Packit |
443c7d |
std::vector<WP3SubDocument *> subDocuments;
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
librevenge::RVNGInputStream *input = getInput();
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
try
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
WP3StylesListener stylesListener(pageList, tableList, subDocuments);
|
|
Packit |
443c7d |
stylesListener.startSubDocument();
|
|
Packit |
443c7d |
parseDocument(input, nullptr, &stylesListener);
|
|
Packit |
443c7d |
stylesListener.endSubDocument();
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
input->seek(0, librevenge::RVNG_SEEK_SET);
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
WP3ContentListener listener(pageList, subDocuments, textInterface);
|
|
Packit |
443c7d |
listener.startSubDocument();
|
|
Packit |
443c7d |
parseDocument(input, nullptr, &listener);
|
|
Packit |
443c7d |
listener.endSubDocument();
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
for (auto &subDocument : subDocuments)
|
|
Packit |
443c7d |
if (subDocument)
|
|
Packit |
443c7d |
delete subDocument;
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
catch (FileException)
|
|
Packit |
443c7d |
{
|
|
Packit |
443c7d |
WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
|
|
Packit |
443c7d |
for (auto &subDocument : subDocuments)
|
|
Packit |
443c7d |
if (subDocument)
|
|
Packit |
443c7d |
delete subDocument;
|
|
Packit |
443c7d |
throw FileException();
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
}
|
|
Packit |
443c7d |
|
|
Packit |
443c7d |
/* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */
|