Blob Blame History Raw
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
/* libwpd
 * Version: MPL 2.0 / LGPLv2.1+
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Major Contributor(s):
 * Copyright (C) 2002 William Lachance (wrlach@gmail.com)
 * Copyright (C) 2002 Marc Maurer (uwog@uwog.net)
 *
 * For minor contributions see the git repository.
 *
 * Alternatively, the contents of this file may be used under the terms
 * of the GNU Lesser General Public License Version 2.1 or later
 * (LGPLv2.1+), in which case the provisions of the LGPLv2.1+ are
 * applicable instead of those above.
 *
 * For further information visit http://libwpd.sourceforge.net
 */

/* "This product is not manufactured, approved, or supported by
 * Corel Corporation or Corel Corporation Limited."
 */

#include <memory>

#include "WP6StylesListener.h"
#include "WP6ContentListener.h"
#include "WP6Parser.h"
#include "WPXHeader.h"
#include "WP6Header.h"
#include "WP60Header.h"
#include "WP61Header.h"
#include "WP6PrefixData.h"
#include "WP6Part.h"
#include "libwpd_internal.h"
#include "WP6DefaultInitialFontPacket.h"
#include "WPXTable.h"

WP6Parser::WP6Parser(librevenge::RVNGInputStream *input, WPXHeader *header, WPXEncryption *encryption) :
	WPXParser(input, header, encryption)
{
}

WP6Parser::~WP6Parser()
{
}

WP6PrefixData *WP6Parser::getPrefixData(librevenge::RVNGInputStream *input, WPXEncryption *encryption)
{
	WP6PrefixData *prefixData = nullptr;
	try
	{
		prefixData = new WP6PrefixData(input, encryption, (static_cast<WP6Header *>(getHeader())->getNumPrefixIndices()));
		return prefixData;
	}
	catch (FileException)
	{
		WPD_DEBUG_MSG(("WordPerfect: Prefix Data most likely corrupted.\n"));
		// TODO: Try to check packet after packet so that we try to recover at least the begining if the corruption is not at
		//       the begining.
		throw FileException();
	}
	catch (...)
	{
		WPD_DEBUG_MSG(("WordPerfect: Prefix Data most likely corrupted. Trying to ignore.\n"));
		// TODO: Try to check packet after packet so that we try to recover at least the begining if the corruption is not at
		//       the begining.
		return nullptr;
	}
}

void WP6Parser::parse(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP6Listener *listener)
{
	listener->startDocument();

	input->seek(getHeader()->getDocumentOffset(), librevenge::RVNG_SEEK_SET);

	WPD_DEBUG_MSG(("WordPerfect: Starting document body parse (position = %ld)\n",(long)input->tell()));

	parseDocument(input, encryption, listener);

	listener->endDocument();
}

static const unsigned short extendedInternationalCharacterMap[] =
{
	229, // lower case 'a' with a small circle
	197, // upper case 'a' with a small circle
	230, // lower case 'ae'
	198, // upper case 'ae'
	228, // lower case 'a' with diathesis
	196, // upper case 'a' with diathesis
	225, // lower case 'a' with acute
	224, // lower case 'a' with grave
	226, // lower case 'a' with circonflex
	227, // lower case 'a' with tilde
	195, // upper case 'a' with tilde
	231, // lower case 'c' with hook
	199, // upper case 'c' with hook
	235, // lower case 'e' with diathesis
	233, // lower case 'e' with acute
	201, // upper case 'e' with acute
	232, // lower case 'e' with grave
	234, // lower case 'e' with circonflex
	237, // lower case 'i' with acute
	241, // lower case 'n' with tilde
	209, // upper case 'n' with tilde
	248, // lower case 'o' with stroke
	216, // upper case 'o' with stroke
	245, // lower case 'o' with tilde
	213, // upper case 'o' with tilde
	246, // lower case 'o' with diathesis
	214, // upper case 'o' with diathesis
	252, // lower case 'u' with diathesis
	220, // upper case 'u' with diathesis
	250, // lower case 'u' with acute
	249, // lower case 'u' with grave
	223 // double s
};

// parseDocument: parses a document body (may call itself recursively, on other streams, or itself)
void WP6Parser::parseDocument(librevenge::RVNGInputStream *input, WPXEncryption *encryption, WP6Listener *listener)
{
	while (!input->isEnd())
	{
		unsigned char readVal;
		readVal = readU8(input, encryption);

		if (readVal == (unsigned char)0x00)
		{
			// do nothing: this token is meaningless and is likely just corruption
		}
		else if (readVal <= (unsigned char)0x20)
		{
			listener->insertCharacter(extendedInternationalCharacterMap[(readVal-1)]);
		}
		else if (readVal >= (unsigned char)0x21 && readVal <= (unsigned char)0x7F)
		{
			// normal ASCII characters
			listener->insertCharacter((unsigned)readVal);
		}
		else
		{
			std::unique_ptr<WP6Part> part(WP6Part::constructPart(input, encryption, readVal));
			if (part)
				part->parse(listener);
		}
	}
}

void WP6Parser::parsePacket(WP6PrefixData *prefixData, int type, WP6Listener *listener)
{
	if (!prefixData)
		return;

	std::pair< MPDP_CIter, MPDP_CIter > typeIterPair = prefixData->getPrefixDataPacketsOfType(type);
	if (typeIterPair.first != typeIterPair.second)
	{
		typeIterPair.first->second->parse(listener);
	}
}

void WP6Parser::parsePackets(WP6PrefixData *prefixData, int type, WP6Listener *listener)
{
	if (!prefixData)
		return;

	std::pair< MPDP_CIter, MPDP_CIter > typeIterPair = prefixData->getPrefixDataPacketsOfType(type);
	for (MPDP_CIter iter=typeIterPair.first;
	        iter != typeIterPair.second;
	        ++iter)
	{
		iter->second->parse(listener);
	}
}

// WP6Parser::parse() reads AND parses a wordperfect document, passing any retrieved low-level
// information to a low-level listener
void WP6Parser::parse(librevenge::RVNGTextInterface *documentInterface)
{
	std::list<WPXPageSpan> pageList;
	WPXTableList tableList;

	librevenge::RVNGInputStream *input = getInput();
	WPXEncryption *encryption = getEncryption();

	std::unique_ptr<WP6PrefixData> prefixData(getPrefixData(input, encryption));

	// do a "first-pass" parse of the document
	// gather table border information, page properties (per-page)
	WP6StylesListener stylesListener(pageList, tableList);
	stylesListener.setPrefixData(prefixData.get());
	parse(input, encryption, &stylesListener);

	// postprocess the pageList == remove duplicate page spans due to the page breaks
	std::list<WPXPageSpan>::iterator previousPage = pageList.begin();
	for (std::list<WPXPageSpan>::iterator Iter=pageList.begin(); Iter != pageList.end(); /* Iter++ */)
	{
		if ((Iter != previousPage) && ((*previousPage)==(*Iter)))
		{
			(*previousPage).setPageSpan((*previousPage).getPageSpan() + (*Iter).getPageSpan());
			Iter = pageList.erase(Iter);
		}
		else
		{
			previousPage = Iter;
			++Iter;
		}
	}

	// second pass: here is where we actually send the messages to the target app
	// that are necessary to emit the body of the target document
	WP6ContentListener listener(pageList, tableList, documentInterface);
	listener.setPrefixData(prefixData.get());

	// get the relevant initial prefix packets out of storage and tell them to parse
	// themselves
	parsePacket(prefixData.get(), WP6_INDEX_HEADER_EXTENDED_DOCUMENT_SUMMARY, &listener);
	parsePacket(prefixData.get(), WP6_INDEX_HEADER_INITIAL_FONT, &listener);
	parsePackets(prefixData.get(), WP6_INDEX_HEADER_OUTLINE_STYLE, &listener);

	parse(input, encryption, &listener);
}

void WP6Parser::parseSubDocument(librevenge::RVNGTextInterface *documentInterface)
{
	std::list<WPXPageSpan> pageList;
	WPXTableList tableList;

	librevenge::RVNGInputStream *input = getInput();

	try
	{
		WP6StylesListener stylesListener(pageList, tableList);
		stylesListener.startSubDocument();
		parseDocument(input, nullptr, &stylesListener);
		stylesListener.endSubDocument();

		input->seek(0, librevenge::RVNG_SEEK_SET);

		WP6ContentListener listener(pageList, tableList, documentInterface);
		listener.startSubDocument();
		parseDocument(input, nullptr, &listener);
		listener.endSubDocument();
	}
	catch (FileException)
	{
		WPD_DEBUG_MSG(("WordPerfect: File Exception. Parse terminated prematurely."));
		throw FileException();
	}
}
/* vim:set shiftwidth=4 softtabstop=4 noexpandtab: */