/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * This file is part of the libmspub project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "MSPUBParser97.h" #include #include #include #include #include "MSPUBCollector.h" #include "MSPUBTypes.h" #include "libmspub_utils.h" namespace libmspub { MSPUBParser97::MSPUBParser97(librevenge::RVNGInputStream *input, MSPUBCollector *collector) : MSPUBParser2k(input, collector), m_isBanner(false) { m_collector->useEncodingHeuristic(); } unsigned short MSPUBParser97::getTextMarker() const { return 0x0000; } unsigned MSPUBParser97::getTextIdOffset() const { return 0x46; } bool MSPUBParser97::parse() { std::unique_ptr contents(m_input->getSubStreamByName("Contents")); if (!contents) { MSPUB_DEBUG_MSG(("Couldn't get contents stream.\n")); return false; } if (!parseContents(contents.get())) { MSPUB_DEBUG_MSG(("Couldn't parse contents stream.\n")); return false; } return m_collector->go(); } bool MSPUBParser97::parseDocument(librevenge::RVNGInputStream *input) { if (bool(m_documentChunkIndex)) { input->seek(m_contentChunks[m_documentChunkIndex.get()].offset + 0x12, librevenge::RVNG_SEEK_SET); unsigned short coordinateSystemMark = readU16(input); m_isBanner = coordinateSystemMark == 0x0007; unsigned width = readU32(input); unsigned height = readU32(input); m_collector->setWidthInEmu(width); m_collector->setHeightInEmu(height); return true; } return false; } void MSPUBParser97::parseContentsTextIfNecessary(librevenge::RVNGInputStream *input) { input->seek(0x12, librevenge::RVNG_SEEK_SET); input->seek(readU32(input), librevenge::RVNG_SEEK_SET); input->seek(14, librevenge::RVNG_SEEK_CUR); unsigned textStart = readU32(input); unsigned textEnd = readU32(input); unsigned prop1Index = readU16(input); unsigned prop2Index = readU16(input); unsigned prop3Index = readU16(input); unsigned prop3End = readU16(input); std::vector spanInfos = getSpansInfo(input, prop1Index, prop2Index, prop3Index, prop3End); input->seek(textStart, librevenge::RVNG_SEEK_SET); TextInfo97 textInfo = getTextInfo(input, textEnd - textStart); unsigned iParaEnd = 0, iSpanEnd = 0; unsigned currentParaIndex = 0; unsigned currentSpanIndex = 0; for (unsigned iShapeEnd = 0; iShapeEnd < textInfo.m_shapeEnds.size(); ++iShapeEnd) { unsigned shapeEnd = std::min(textInfo.m_shapeEnds[iShapeEnd], textInfo.m_chars.size()); std::vector shapeParas; while (currentParaIndex < shapeEnd) { unsigned paraEnd = iParaEnd < textInfo.m_paragraphEnds.size() ? textInfo.m_paragraphEnds[iParaEnd++] : shapeEnd; if (paraEnd > shapeEnd) { --iParaEnd; paraEnd = shapeEnd; } std::vector paraSpans; while (currentSpanIndex < paraEnd) { const SpanInfo97 &spanInfo = iSpanEnd < spanInfos.size() ? spanInfos[iSpanEnd++] : SpanInfo97(paraEnd, CharacterStyle()); unsigned spanEnd = spanInfo.m_spanEnd; if (spanEnd > paraEnd) { --iSpanEnd; spanEnd = paraEnd; } const CharacterStyle &spanStyle = spanInfo.m_style; std::vector spanChars; spanChars.reserve(std::min(spanEnd - currentSpanIndex, m_length)); for (unsigned i = currentSpanIndex; i < spanEnd; ++i) { unsigned char ch = textInfo.m_chars[i]; if (ch == 0xB) // Pub97 interprets vertical tab as nonbreaking space. { spanChars.push_back('\n'); } else if (ch == 0x0D) { if (i + 1 < spanEnd && textInfo.m_chars[i + 1] == 0x0A) { ++i; // ignore the 0x0D and advance past the 0x0A } } else if (ch == 0x0C) { // ignore the 0x0C } else { spanChars.push_back(ch); } } paraSpans.push_back(TextSpan(spanChars, spanStyle)); currentSpanIndex = spanEnd; } shapeParas.push_back(TextParagraph(paraSpans, ParagraphStyle())); currentParaIndex = paraEnd; } m_collector->addTextString(shapeParas, iShapeEnd); } } std::vector MSPUBParser97::getSpansInfo( librevenge::RVNGInputStream *input, unsigned prop1Index, unsigned prop2Index, unsigned /* prop3Index */, unsigned /* prop3End */) { std::vector spanEnds; std::vector ret; for (unsigned i = prop1Index; i < prop2Index; ++i) { unsigned offset = i * 0x200; input->seek(offset + 0x1FF, librevenge::RVNG_SEEK_SET); unsigned numEntries = readU8(input); input->seek(offset, librevenge::RVNG_SEEK_SET); // Skip the first thing; it is not an end unsigned start = readU32(input); for (unsigned j = 0; j < numEntries; ++j) { spanEnds.push_back(readU32(input) - start); } std::vector spanStyleIndices; for (unsigned j = 0; j < spanEnds.size(); ++j) { spanStyleIndices.push_back(readU8(input)); } while (stillReading(input, offset + 0x200) && readU8(input) == 0) { ; } input->seek(-1, librevenge::RVNG_SEEK_CUR); std::map stylesByIndex; while (stillReading(input, offset + 0x1FF)) { unsigned length = readU8(input); unsigned nextOffset = input->tell() + length; auto index = static_cast((input->tell() - 1 - offset) / 2); stylesByIndex[index] = readCharacterStyle(input, length); input->seek(nextOffset, librevenge::RVNG_SEEK_SET); } for (unsigned j = 0; j < spanEnds.size(); ++j) { ret.push_back(SpanInfo97(spanEnds[j], j < spanStyleIndices.size() ? stylesByIndex[spanStyleIndices[j]] : CharacterStyle())); } } return ret; } CharacterStyle MSPUBParser97::readCharacterStyle( librevenge::RVNGInputStream *input, unsigned length) { CharacterStyle style; unsigned begin = input->tell(); int textSizeVariationFromDefault = 0; if (length >= 1) { unsigned char biFlags = readU8(input); style.bold = biFlags & 0x1; style.italic = biFlags & 0x2; } if (length >= 3) { input->seek(begin + 0x2, librevenge::RVNG_SEEK_SET); style.fontIndex = readU8(input); } if (length >= 9) { input->seek(begin + 0x8, librevenge::RVNG_SEEK_SET); if (readU8(input) & 0x1) style.underline = Underline::Single; } if (length >= 5) { input->seek(begin + 0x4, librevenge::RVNG_SEEK_SET); textSizeVariationFromDefault = length >= 6 ? readS16(input) : readS8(input); } if (length >= 16) { input->seek(begin + 0xC, librevenge::RVNG_SEEK_SET); style.colorIndex = getColorIndexByQuillEntry(readU32(input)); } style.textSizeInPt = 10 + static_cast(textSizeVariationFromDefault) / 2; return style; } MSPUBParser97::TextInfo97 MSPUBParser97::getTextInfo(librevenge::RVNGInputStream *input, unsigned length) { length = std::min(length, m_length); // sanity check std::vector chars; chars.reserve(length); std::vector paragraphEnds; std::vector shapeEnds; unsigned start = input->tell(); unsigned char last = '\0'; while (stillReading(input, start + length)) { chars.push_back(readU8(input)); if (last == 0xD && chars.back() == 0xA) { paragraphEnds.push_back(chars.size()); } else if (chars.back() == 0xC) { shapeEnds.push_back(chars.size()); } last = chars.back(); } return TextInfo97(chars, paragraphEnds, shapeEnds); } int MSPUBParser97::translateCoordinateIfNecessary(int coordinate) const { const int offset = (m_isBanner ? 120 : 25) * EMUS_IN_INCH; if (std::numeric_limits::min() + offset > coordinate) return std::numeric_limits::min(); else return coordinate - offset; } unsigned MSPUBParser97::getFirstLineOffset() const { return 0x22; } unsigned MSPUBParser97::getSecondLineOffset() const { return 0x2D; } unsigned MSPUBParser97::getShapeFillTypeOffset() const { return 0x20; } unsigned MSPUBParser97::getShapeFillColorOffset() const { return 0x18; } } /* vim:set shiftwidth=2 softtabstop=2 expandtab: */