/* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */
/* libmwaw
* Version: MPL 2.0 / LGPLv2+
*
* The contents of this file are subject to the Mozilla Public License Version
* 2.0 (the "License"); you may not use this file except in compliance with
* the License or as specified alternatively below. You may obtain a copy of
* the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* Major Contributor(s):
* Copyright (C) 2002 William Lachance (wrlach@gmail.com)
* Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
* Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
* Copyright (C) 2006, 2007 Andrew Ziem
* Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
*
*
* All Rights Reserved.
*
* For minor contributions see the git repository.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
* in which case the provisions of the LGPLv2+ are applicable
* instead of those above.
*/
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <limits>
#include <map>
#include <sstream>
#include <librevenge/librevenge.h>
#include "MWAWTextListener.hxx"
#include "MWAWFont.hxx"
#include "MWAWFontConverter.hxx"
#include "MWAWHeader.hxx"
#include "MWAWParagraph.hxx"
#include "MWAWPictData.hxx"
#include "MWAWPosition.hxx"
#include "MWAWPrinter.hxx"
#include "MWAWRSRCParser.hxx"
#include "MWAWSubDocument.hxx"
#include "libmwaw_internal.hxx"
#include "MacDocParser.hxx"
/** Internal: the structures of a MacDocParser */
namespace MacDocParserInternal
{
////////////////////////////////////////
//! Internal: the index data of a MacDocParser
struct Index {
//! constructor
Index()
: m_entry()
, m_level(0)
, m_numChild(0)
, m_page(0)
, m_box()
, m_extra("")
{
}
//! operator<<
friend std::ostream &operator<<(std::ostream &o, Index const &index)
{
if (index.m_level) o << "level=" << index.m_level << ",";
if (index.m_page) o << "page=" << index.m_page << ",";
o << "box=" << index.m_box << ",";
if (index.m_numChild) o << "numChild=" << index.m_numChild << ",";
o << index.m_extra;
return o;
}
//! the text entry
MWAWEntry m_entry;
//! the entry level
int m_level;
//! the number of child
int m_numChild;
//! the page
int m_page;
//! the bdbox
MWAWBox2i m_box;
//! extra data
std::string m_extra;
};
////////////////////////////////////////
//! Internal: the state of a MacDocParser
struct State {
//! constructor
State()
: m_idPictureMap()
, m_indexList()
, m_idFontMap()
, m_actPage(0)
, m_numPages(0)
{
}
//! the picture page map
std::map<int,MWAWEntry> m_idPictureMap;
//! the index list
std::vector<Index> m_indexList;
//! a map id to index font
std::map<int, MWAWFont> m_idFontMap;
int m_actPage /** the actual page */, m_numPages /** the number of page of the final document */;
};
}
////////////////////////////////////////////////////////////
// constructor/destructor, ...
////////////////////////////////////////////////////////////
MacDocParser::MacDocParser(MWAWInputStreamPtr const &input, MWAWRSRCParserPtr const &rsrcParser, MWAWHeader *header)
: MWAWTextParser(input, rsrcParser, header)
, m_state()
{
init();
}
MacDocParser::~MacDocParser()
{
}
void MacDocParser::init()
{
resetTextListener();
m_state.reset(new MacDocParserInternal::State);
// no margins ( ie. the document is a set of picture corresponding to each page )
getPageSpan().setMargins(0.01);
}
MWAWInputStreamPtr MacDocParser::rsrcInput()
{
return getRSRCParser()->getInput();
}
libmwaw::DebugFile &MacDocParser::rsrcAscii()
{
return getRSRCParser()->ascii();
}
////////////////////////////////////////////////////////////
// new page
////////////////////////////////////////////////////////////
void MacDocParser::newPage(int number)
{
if (number <= m_state->m_actPage || number > m_state->m_numPages)
return;
while (m_state->m_actPage < number) {
m_state->m_actPage++;
if (!getTextListener() || m_state->m_actPage == 1)
continue;
getTextListener()->insertBreak(MWAWTextListener::PageBreak);
}
}
////////////////////////////////////////////////////////////
// the parser
////////////////////////////////////////////////////////////
void MacDocParser::parse(librevenge::RVNGTextInterface *docInterface)
{
if (!getInput().get() || !getRSRCParser() || !checkHeader(nullptr)) throw(libmwaw::ParseException());
bool ok = false;
try {
checkHeader(nullptr);
ok = createZones();
if (ok) {
createDocument(docInterface);
sendContents();
}
ascii().reset();
}
catch (...) {
MWAW_DEBUG_MSG(("MacDocParser::parse: exception catched when parsing\n"));
ok = false;
}
resetTextListener();
if (!ok) throw(libmwaw::ParseException());
}
////////////////////////////////////////////////////////////
// create the document
////////////////////////////////////////////////////////////
void MacDocParser::createDocument(librevenge::RVNGTextInterface *documentInterface)
{
if (!documentInterface) return;
if (getTextListener()) {
MWAW_DEBUG_MSG(("MacDocParser::createDocument: listener already exist\n"));
return;
}
// update the page
m_state->m_actPage = 0;
// create the page list
auto numPages = int(m_state->m_idPictureMap.size());
if (!m_state->m_indexList.empty())
numPages++;
MWAWPageSpan ps(getPageSpan());
ps.setPageSpan(numPages+1);
std::vector<MWAWPageSpan> pageList(1,ps);
//
MWAWTextListenerPtr listen(new MWAWTextListener(*getParserState(), pageList, documentInterface));
setTextListener(listen);
listen->startDocument();
}
////////////////////////////////////////////////////////////
//
// Intermediate level
//
////////////////////////////////////////////////////////////
bool MacDocParser::createZones()
{
MWAWRSRCParserPtr rsrcParser = getRSRCParser();
auto &entryMap = rsrcParser->getEntriesMap();
// the index font zone: 1001, ...
auto it = entryMap.lower_bound("MDLv");
while (it != entryMap.end()) {
if (it->first != "MDLv")
break;
MWAWEntry const &entry = it++->second;
readFont(entry);
}
// index zone: 1 MDIx
it = entryMap.lower_bound("MDIx");
while (it != entryMap.end()) {
if (it->first != "MDIx")
break;
MWAWEntry const &entry = it++->second;
readIndex(entry);
}
// bookmark zone: Mdbk 1, ...
it = entryMap.lower_bound("MDbk");
while (it != entryMap.end()) {
if (it->first != "MDbk")
break;
MWAWEntry const &entry = it++->second;
readBookmark(entry);
}
// doc preference MDop:128 crypted ?
// the picture zone: 1, ...
bool pageSizeSet=false;
it = entryMap.lower_bound("MDpg");
while (it != entryMap.end()) {
if (it->first != "MDpg")
break;
MWAWEntry const &entry = it++->second;
m_state->m_idPictureMap[entry.id()]=entry;
if (!pageSizeSet) {
// as we do not read MDop, use picture to find page size
librevenge::RVNGBinaryData data;
if (!getRSRCParser()->parsePICT(entry, data))
continue;
MWAWInputStreamPtr pictInput=MWAWInputStream::get(data, false);
if (!pictInput)
continue;
MWAWBox2f box;
auto res = MWAWPictData::check(pictInput,static_cast<int>(data.size()), box);
if (res != MWAWPict::MWAW_R_BAD && box.size()[0]>0 && box.size()[1]>0) {
pageSizeSet=true;
getPageSpan().setFormWidth(double(box.size()[0])/72.);
getPageSpan().setFormLength(double(box.size()[1])/72.);
}
}
}
// windows pos? 128
it = entryMap.lower_bound("MDwp");
while (it != entryMap.end()) {
if (it->first != "MDwp")
break;
MWAWEntry const &entry = it++->second;
readWP(entry);
}
#ifdef DEBUG_WITH_FILES
// the file zone: 1, ...
it = entryMap.lower_bound("MDfi");
while (it != entryMap.end()) {
if (it->first != "MDfi")
break;
MWAWEntry const &entry = it++->second;
readFile(entry);
}
// get rid of the default application resource
libmwaw::DebugFile &ascFile = rsrcAscii();
static char const *appliRsrc[]= {
"ALRT","BNDL","CNTL","CURS","CDEF", "CODE","DLOG","DLGX","DITL","FREF",
"ICON","ICN#","MENU","MBAR","MDEF", "SIZE","TMPL","WIND",
"acur","cicn","crsr","dctb","icl4", "icl8","ics4","ics8","ics#","ictb",
"mstr","snd ",
"DATA", "MDsr" /* srd: version string */
};
for (int r=0; r < 18+12+2; ++r) {
it = entryMap.lower_bound(appliRsrc[r]);
while (it != entryMap.end()) {
if (it->first != appliRsrc[r])
break;
MWAWEntry const &entry = it++->second;
if (entry.isParsed()) continue;
entry.setParsed(true);
ascFile.skipZone(entry.begin()-4,entry.end()-1);
}
}
#endif
return !m_state->m_idPictureMap.empty();
}
bool MacDocParser::sendContents()
{
MWAWTextListenerPtr listener=getTextListener();
if (!listener) {
MWAW_DEBUG_MSG(("MacDocParser::sendContents: can not find the listener\n"));
return false;
}
int actPage=0;
if (sendIndex())
newPage(++actPage);
listener->setParagraph(MWAWParagraph());
for (auto &it : m_state->m_idPictureMap) {
sendPicture(it.second);
newPage(++actPage);
}
return true;
}
////////////////////////////////////////////////////////////
//
// Low level
//
////////////////////////////////////////////////////////////
// font
bool MacDocParser::readFont(MWAWEntry const &entry)
{
if (entry.length()<12) {
MWAW_DEBUG_MSG(("MacDocParser::readFont: the entry seems bad\n"));
return false;
}
entry.setParsed(true);
MWAWInputStreamPtr input = rsrcInput();
input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
libmwaw::DebugFile &ascFile = rsrcAscii();
libmwaw::DebugStream f;
auto fSz=static_cast<int>(input->readULong(1));
if (fSz<0 || 1+long(fSz)+1-(fSz%2)+10>entry.length()) {
f << "Entries(Font):###fSz=" << fSz;
ascFile.addPos(entry.begin()-4);
ascFile.addNote(f.str().c_str());
return false;
}
MWAWFont font;
std::string name("");
for (int i=0; i<fSz; i++)
name+=char(input->readLong(1));
font.setId(getParserState()->m_fontConverter->getId(name));
if ((fSz%2)==0)
input->seek(1, librevenge::RVNG_SEEK_CUR);
font.setSize(float(input->readULong(2)));
auto flag = static_cast<int>(input->readULong(2));
uint32_t flags=0;
if (flag&0x1) flags |= MWAWFont::boldBit;
if (flag&0x2) flags |= MWAWFont::italicBit;
if (flag&0x4) font.setUnderlineStyle(MWAWFont::Line::Simple);
if (flag&0x8) flags |= MWAWFont::embossBit;
if (flag&0x10) flags |= MWAWFont::shadowBit;
if (flag&0x20) font.setDeltaLetterSpacing(-1);
if (flag&0x40) font.setDeltaLetterSpacing(1);
if (flag&0x80) f << "#flag0[0x80],";
font.setFlags(flags);
unsigned char col[3];
for (auto &c : col) c=static_cast<unsigned char>(input->readULong(2)>>8);
font.setColor(MWAWColor(col[0],col[1],col[2]));
font.m_extra = f.str();
f.str("");
f << "Entries(Font)[" << entry.id() << "]:"
<< font.getDebugString(getParserState()->m_fontConverter);
m_state->m_idFontMap[entry.id()-999]=font;
ascFile.addPos(entry.begin()-4);
ascFile.addNote(f.str().c_str());
return true;
}
//
// index functions
//
bool MacDocParser::readIndex(MWAWEntry const &entry)
{
if (entry.length()<4) {
MWAW_DEBUG_MSG(("MacDocParser::readIndex: the entry seems bad\n"));
return false;
}
if (entry.id()!=1) {
MWAW_DEBUG_MSG(("MacDocParser::readIndex: the entry id seems bad\n"));
}
entry.setParsed(true);
MWAWInputStreamPtr input = rsrcInput();
input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
libmwaw::DebugFile &ascFile = rsrcAscii();
ascFile.addPos(entry.begin()-4);
ascFile.addNote("Entries(Index)");
libmwaw::DebugStream f;
long pos;
while (!input->isEnd()) {
pos=input->tell();
if (pos+21>=entry.end())
break;
f.str("");
MacDocParserInternal::Index index;
auto val=static_cast<int>(input->readLong(2));
if (val) f << "#f0=" << val << ",";
index.m_page=static_cast<int>(input->readLong(2));
if (index.m_page<=0) {
input->seek(pos, librevenge::RVNG_SEEK_SET);
break;
}
int dim[4];
for (auto &d : dim) d=static_cast<int>(input->readLong(2));
index.m_box=MWAWBox2i(MWAWVec2i(dim[1],dim[0]),MWAWVec2i(dim[3],dim[2]));
val=static_cast<int>(input->readLong(2));
if (val) f << "#f1=" << val << ",";
index.m_numChild=static_cast<int>(input->readLong(2));
for (int i=0; i<2; ++i) {
val=static_cast<int>(input->readLong(2));
if (val) f << "#f" << i+2 << "=" << val << ",";
}
index.m_extra=f.str();
f.str("");
f << "Index:" << index;
index.m_entry.setBegin(input->tell());
std::string name("");
bool ok=false;
while (!input->isEnd()) {
if (input->tell()>=entry.end())
break;
auto c=char(input->readLong(1));
if (c==0) {
ok = true;
break;
}
name+=c;
}
if (!ok) {
input->seek(pos, librevenge::RVNG_SEEK_SET);
break;
}
index.m_entry.setEnd(input->tell()-1);
m_state->m_indexList.push_back(index);
f << name;
ascFile.addPos(pos);
ascFile.addNote(f.str().c_str());
}
f.str("");
f << "Index[end]:";
pos=input->tell();
if (pos!=entry.end()-4) {
MWAW_DEBUG_MSG(("MacDocParser::readIndex: problem reading end\n"));
f << "###";
}
ascFile.addPos(pos);
ascFile.addNote(f.str().c_str());
return true;
}
int MacDocParser::updateIndex(int actIndex, int actLevel)
{
auto numIndex=static_cast<int>(m_state->m_indexList.size());
if (actIndex < 0 || actIndex >= numIndex) {
MWAW_DEBUG_MSG(("MacDocParser::updateIndex: the actual index seems bad\n"));
return -1;
}
auto &index = m_state->m_indexList[size_t(actIndex++)];
index.m_level=actLevel;
for (int c=0; c < index.m_numChild; ++c) {
actIndex=updateIndex(actIndex, actLevel+1);
if (actIndex==-1)
break;
}
return actIndex;
}
bool MacDocParser::sendIndex()
{
MWAWTextListenerPtr listener=getTextListener();
if (!listener) {
MWAW_DEBUG_MSG(("MacDocParser::sendIndex: can not find the listener\n"));
return false;
}
if (m_state->m_indexList.empty())
return false;
int id=0;
auto numIndex=size_t(m_state->m_indexList.size());
do
id=updateIndex(id,1);
while (id>0 && id < static_cast<int>(numIndex));
listener->setFont(MWAWFont(3,12,MWAWFont::boldBit));
MWAWParagraph para;
para.m_justify = MWAWParagraph::JustificationCenter;
listener->setParagraph(para);
listener->insertUnicodeString(librevenge::RVNGString("Index"));
listener->insertEOL();
listener->insertEOL();
MWAWInputStreamPtr input = rsrcInput();
para=MWAWParagraph();
double w = getPageWidth();
MWAWTabStop tab;
tab.m_alignment = MWAWTabStop::RIGHT;
tab.m_leaderCharacter='.';
tab.m_position = w-0.3;
para.m_tabs->push_back(tab);
#ifdef DEBUG
int n=0;
#endif
for (auto const &index : m_state->m_indexList) {
#ifdef DEBUG
++n;
#endif
if (!index.m_entry.valid() || index.m_level<=0)
continue;
para.m_margins[1]=0.5*double(index.m_level);
listener->setParagraph(para);
if (m_state->m_idFontMap.find(index.m_level)!=m_state->m_idFontMap.end())
listener->setFont(m_state->m_idFontMap.find(index.m_level)->second);
else {
MWAW_DEBUG_MSG(("MacDocParser::sendIndex: can not find font for index %d\n", int(n-1)));
listener->setFont(MWAWFont());
}
input->seek(index.m_entry.begin(), librevenge::RVNG_SEEK_SET);
for (long c=0; c < index.m_entry.length(); ++c) {
auto ch=static_cast<unsigned char>(input->readULong(1));
if (ch==9)
listener->insertCharacter(' ');
else
listener->insertCharacter(ch);
}
if (index.m_page>0) {
std::stringstream s;
s << index.m_page;
listener->setFont(MWAWFont());
listener->insertTab();
listener->insertUnicodeString(librevenge::RVNGString(s.str().c_str()));
}
listener->insertEOL();
}
return true;
}
// picture
bool MacDocParser::sendPicture(MWAWEntry const &entry)
{
if (!getTextListener()) {
MWAW_DEBUG_MSG(("MacDocParser::sendPicture: can not find the listener\n"));
return false;
}
librevenge::RVNGBinaryData data;
if (!getRSRCParser()->parsePICT(entry, data))
return false;
entry.setParsed(true);
auto dataSz=int(data.size());
if (!dataSz)
return false;
MWAWInputStreamPtr pictInput=MWAWInputStream::get(data, false);
if (!pictInput) {
MWAW_DEBUG_MSG(("MacDocParser::sendPicture: oops can not find an input\n"));
return false;
}
MWAWBox2f box;
auto res = MWAWPictData::check(pictInput, dataSz,box);
if (res == MWAWPict::MWAW_R_BAD) {
MWAW_DEBUG_MSG(("MacDocParser::sendPicture: can not find the picture\n"));
return false;
}
pictInput->seek(0,librevenge::RVNG_SEEK_SET);
std::shared_ptr<MWAWPict> thePict(MWAWPictData::get(pictInput, dataSz));
MWAWPosition pictPos=MWAWPosition(MWAWVec2f(0,0),box.size(), librevenge::RVNG_POINT);
pictPos.setRelativePosition(MWAWPosition::Char);
if (thePict) {
MWAWEmbeddedObject picture;
if (thePict->getBinary(picture))
getTextListener()->insertPicture(pictPos, picture);
}
return true;
}
// file: unknown format: 0002 0000 0000 00 + FileInfo + DataFrk + RSRCFork ?
bool MacDocParser::readFile(MWAWEntry const &entry)
{
entry.setParsed(true);
#ifdef DEBUG_WITH_FILES
MWAWInputStreamPtr input = rsrcInput();
input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
librevenge::RVNGBinaryData data;
input->readDataBlock(entry.length(), data);
libmwaw::DebugFile &ascFile = rsrcAscii();
static int volatile fileName = 0;
libmwaw::DebugStream f;
f << "FILE" << ++fileName;
libmwaw::Debug::dumpFile(data, f.str().c_str());
ascFile.addPos(entry.begin()-4);
ascFile.addNote(f.str().c_str());
ascFile.skipZone(entry.begin(),entry.end()-1);
#endif
return true;
}
// bookmark. note the name is stored as resource name
bool MacDocParser::readBookmark(MWAWEntry const &entry)
{
if (entry.length()!=8) {
MWAW_DEBUG_MSG(("MacDocParser::readWP: the entry seems bad\n"));
return false;
}
entry.setParsed(true);
MWAWInputStreamPtr input = rsrcInput();
input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
libmwaw::DebugFile &ascFile = rsrcAscii();
libmwaw::DebugStream f;
f << "Entries(BookMark)[" << entry.id() << "]:";
long val=input->readLong(4);
if (val) f << "page=" << val << ",";
val=input->readLong(4);
if (val) f << "yPos?=" << val << ",";
ascFile.addPos(entry.begin()-4);
ascFile.addNote(f.str().c_str());
return true;
}
// unknown related to window position?
bool MacDocParser::readWP(MWAWEntry const &entry)
{
if (entry.length()!=4) {
MWAW_DEBUG_MSG(("MacDocParser::readWP: the entry seems bad\n"));
return false;
}
entry.setParsed(true);
MWAWInputStreamPtr input = rsrcInput();
input->seek(entry.begin(), librevenge::RVNG_SEEK_SET);
libmwaw::DebugFile &ascFile = rsrcAscii();
libmwaw::DebugStream f;
f << "Entries(WP)[" << entry.id() << "]:";
for (int i=0; i < 2; ++i) { // f0=0|a6|c6, f1=0|1 show index ?
long val=input->readLong(2);
if (val)
f << "f" << i << "=" << val << ",";
}
ascFile.addPos(entry.begin()-4);
ascFile.addNote(f.str().c_str());
return true;
}
////////////////////////////////////////////////////////////
// read the header
////////////////////////////////////////////////////////////
bool MacDocParser::checkHeader(MWAWHeader *header, bool strict)
{
*m_state = MacDocParserInternal::State();
/** no data fork, may be ok, but this means
that the file contains no text, so... */
MWAWInputStreamPtr input = getInput();
if (!input || !getRSRCParser())
return false;
if (input->hasDataFork()) {
MWAW_DEBUG_MSG(("MacDocParser::checkHeader: find a datafork, odd!!!\n"));
}
if (strict) {
// check if at least one picture zone exists
auto &entryMap = getRSRCParser()->getEntriesMap();
if (entryMap.find("MDpg") == entryMap.end())
return false;
}
if (header)
header->reset(MWAWDocument::MWAW_T_MACDOC, version());
return true;
}
// vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: