Blob Blame History Raw
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
 * This file is part of the libmspub project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#include "MSPUBParser97.h"

#include <algorithm>
#include <limits>
#include <map>
#include <memory>

#include "MSPUBCollector.h"
#include "MSPUBTypes.h"
#include "libmspub_utils.h"

namespace libmspub
{

MSPUBParser97::MSPUBParser97(librevenge::RVNGInputStream *input, MSPUBCollector *collector)
  : MSPUBParser2k(input, collector), m_isBanner(false)
{
  m_collector->useEncodingHeuristic();
}

unsigned short MSPUBParser97::getTextMarker() const
{
  return 0x0000;
}

unsigned MSPUBParser97::getTextIdOffset() const
{
  return 0x46;
}

bool MSPUBParser97::parse()
{
  std::unique_ptr<librevenge::RVNGInputStream> contents(m_input->getSubStreamByName("Contents"));
  if (!contents)
  {
    MSPUB_DEBUG_MSG(("Couldn't get contents stream.\n"));
    return false;
  }
  if (!parseContents(contents.get()))
  {
    MSPUB_DEBUG_MSG(("Couldn't parse contents stream.\n"));
    return false;
  }
  return m_collector->go();
}

bool MSPUBParser97::parseDocument(librevenge::RVNGInputStream *input)
{
  if (bool(m_documentChunkIndex))
  {
    input->seek(m_contentChunks[m_documentChunkIndex.get()].offset + 0x12, librevenge::RVNG_SEEK_SET);
    unsigned short coordinateSystemMark = readU16(input);
    m_isBanner = coordinateSystemMark == 0x0007;
    unsigned width = readU32(input);
    unsigned height = readU32(input);
    m_collector->setWidthInEmu(width);
    m_collector->setHeightInEmu(height);
    return true;
  }
  return false;
}

void MSPUBParser97::parseContentsTextIfNecessary(librevenge::RVNGInputStream *input)
{
  input->seek(0x12, librevenge::RVNG_SEEK_SET);
  input->seek(readU32(input), librevenge::RVNG_SEEK_SET);
  input->seek(14, librevenge::RVNG_SEEK_CUR);
  unsigned textStart = readU32(input);
  unsigned textEnd = readU32(input);
  unsigned prop1Index = readU16(input);
  unsigned prop2Index = readU16(input);
  unsigned prop3Index = readU16(input);
  unsigned prop3End = readU16(input);
  std::vector<SpanInfo97> spanInfos = getSpansInfo(input, prop1Index,
                                                   prop2Index, prop3Index, prop3End);
  input->seek(textStart, librevenge::RVNG_SEEK_SET);
  TextInfo97 textInfo = getTextInfo(input, textEnd - textStart);
  unsigned iParaEnd = 0, iSpanEnd = 0;
  unsigned currentParaIndex = 0;
  unsigned currentSpanIndex = 0;
  for (unsigned iShapeEnd = 0; iShapeEnd < textInfo.m_shapeEnds.size(); ++iShapeEnd)
  {
    unsigned shapeEnd = std::min<unsigned>(textInfo.m_shapeEnds[iShapeEnd], textInfo.m_chars.size());
    std::vector<TextParagraph> shapeParas;
    while (currentParaIndex < shapeEnd)
    {
      unsigned paraEnd = iParaEnd < textInfo.m_paragraphEnds.size() ?
                         textInfo.m_paragraphEnds[iParaEnd++] : shapeEnd;
      if (paraEnd > shapeEnd)
      {
        --iParaEnd;
        paraEnd = shapeEnd;
      }
      std::vector<TextSpan> paraSpans;
      while (currentSpanIndex < paraEnd)
      {
        const SpanInfo97 &spanInfo = iSpanEnd < spanInfos.size() ?
                                     spanInfos[iSpanEnd++] :
                                     SpanInfo97(paraEnd, CharacterStyle());
        unsigned spanEnd = spanInfo.m_spanEnd;
        if (spanEnd > paraEnd)
        {
          --iSpanEnd;
          spanEnd = paraEnd;
        }
        const CharacterStyle &spanStyle = spanInfo.m_style;
        std::vector<unsigned char> spanChars;
        spanChars.reserve(std::min(spanEnd - currentSpanIndex, m_length));
        for (unsigned i = currentSpanIndex; i < spanEnd; ++i)
        {
          unsigned char ch = textInfo.m_chars[i];
          if (ch == 0xB) // Pub97 interprets vertical tab as nonbreaking space.
          {
            spanChars.push_back('\n');
          }
          else if (ch == 0x0D)
          {
            if (i + 1 < spanEnd && textInfo.m_chars[i + 1] == 0x0A)
            {
              ++i; // ignore the 0x0D and advance past the 0x0A
            }
          }
          else if (ch == 0x0C)
          {
            // ignore the 0x0C
          }
          else
          {
            spanChars.push_back(ch);
          }
        }
        paraSpans.push_back(TextSpan(spanChars, spanStyle));
        currentSpanIndex = spanEnd;
      }
      shapeParas.push_back(TextParagraph(paraSpans, ParagraphStyle()));
      currentParaIndex = paraEnd;
    }
    m_collector->addTextString(shapeParas, iShapeEnd);
  }
}

std::vector<MSPUBParser97::SpanInfo97> MSPUBParser97::getSpansInfo(
  librevenge::RVNGInputStream *input,
  unsigned prop1Index, unsigned prop2Index, unsigned /* prop3Index */,
  unsigned /* prop3End */)
{
  std::vector<unsigned> spanEnds;
  std::vector<SpanInfo97> ret;
  for (unsigned i = prop1Index; i < prop2Index; ++i)
  {
    unsigned offset = i * 0x200;
    input->seek(offset + 0x1FF, librevenge::RVNG_SEEK_SET);
    unsigned numEntries = readU8(input);
    input->seek(offset, librevenge::RVNG_SEEK_SET);
    // Skip the first thing; it is not an end
    unsigned start = readU32(input);
    for (unsigned j = 0; j < numEntries; ++j)
    {
      spanEnds.push_back(readU32(input) - start);
    }
    std::vector<unsigned char> spanStyleIndices;
    for (unsigned j = 0; j < spanEnds.size(); ++j)
    {
      spanStyleIndices.push_back(readU8(input));
    }
    while (stillReading(input, offset + 0x200) && readU8(input) == 0)
    {
      ;
    }
    input->seek(-1, librevenge::RVNG_SEEK_CUR);
    std::map<unsigned char, CharacterStyle> stylesByIndex;
    while (stillReading(input, offset + 0x1FF))
    {
      unsigned length = readU8(input);
      unsigned nextOffset = input->tell() + length;
      auto index = static_cast<unsigned char>((input->tell() - 1 - offset) / 2);
      stylesByIndex[index] = readCharacterStyle(input, length);
      input->seek(nextOffset, librevenge::RVNG_SEEK_SET);
    }
    for (unsigned j = 0; j < spanEnds.size(); ++j)
    {
      ret.push_back(SpanInfo97(spanEnds[j], j < spanStyleIndices.size() ?
                               stylesByIndex[spanStyleIndices[j]] : CharacterStyle()));
    }
  }
  return ret;
}

CharacterStyle MSPUBParser97::readCharacterStyle(
  librevenge::RVNGInputStream *input, unsigned length)
{
  CharacterStyle style;

  unsigned begin = input->tell();
  int textSizeVariationFromDefault = 0;

  if (length >= 1)
  {
    unsigned char biFlags = readU8(input);
    style.bold = biFlags & 0x1;
    style.italic = biFlags & 0x2;
  }
  if (length >= 3)
  {
    input->seek(begin + 0x2, librevenge::RVNG_SEEK_SET);
    style.fontIndex = readU8(input);
  }
  if (length >= 9)
  {
    input->seek(begin + 0x8, librevenge::RVNG_SEEK_SET);
    if (readU8(input) & 0x1)
      style.underline = Underline::Single;
  }
  if (length >= 5)
  {
    input->seek(begin + 0x4, librevenge::RVNG_SEEK_SET);
    textSizeVariationFromDefault =
      length >= 6 ? readS16(input) : readS8(input);
  }
  if (length >= 16)
  {
    input->seek(begin + 0xC, librevenge::RVNG_SEEK_SET);
    style.colorIndex = getColorIndexByQuillEntry(readU32(input));
  }
  style.textSizeInPt = 10 +
                       static_cast<double>(textSizeVariationFromDefault) / 2;

  return style;
}

MSPUBParser97::TextInfo97 MSPUBParser97::getTextInfo(librevenge::RVNGInputStream *input, unsigned length)
{
  length = std::min(length, m_length); // sanity check
  std::vector<unsigned char> chars;
  chars.reserve(length);
  std::vector<unsigned> paragraphEnds;
  std::vector<unsigned> shapeEnds;
  unsigned start = input->tell();
  unsigned char last = '\0';
  while (stillReading(input, start + length))
  {
    chars.push_back(readU8(input));
    if (last == 0xD && chars.back() == 0xA)
    {
      paragraphEnds.push_back(chars.size());
    }
    else if (chars.back() == 0xC)
    {
      shapeEnds.push_back(chars.size());
    }
    last = chars.back();
  }
  return TextInfo97(chars, paragraphEnds, shapeEnds);
}

int MSPUBParser97::translateCoordinateIfNecessary(int coordinate) const
{
  const int offset = (m_isBanner ? 120 : 25) * EMUS_IN_INCH;
  if (std::numeric_limits<int>::min() + offset > coordinate)
    return std::numeric_limits<int>::min();
  else
    return coordinate - offset;
}

unsigned MSPUBParser97::getFirstLineOffset() const
{
  return 0x22;
}

unsigned MSPUBParser97::getSecondLineOffset() const
{
  return 0x2D;
}

unsigned MSPUBParser97::getShapeFillTypeOffset() const
{
  return 0x20;
}

unsigned MSPUBParser97::getShapeFillColorOffset() const
{
  return 0x18;
}

}

/* vim:set shiftwidth=2 softtabstop=2 expandtab: */