Blob Blame History Raw
/* -*- Mode: C++; c-default-style: "k&r"; indent-tabs-mode: nil; tab-width: 2; c-basic-offset: 2 -*- */

/* libmwaw
* Version: MPL 2.0 / LGPLv2+
*
* The contents of this file are subject to the Mozilla Public License Version
* 2.0 (the "License"); you may not use this file except in compliance with
* the License or as specified alternatively below. You may obtain a copy of
* the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* Major Contributor(s):
* Copyright (C) 2002 William Lachance (wrlach@gmail.com)
* Copyright (C) 2002,2004 Marc Maurer (uwog@uwog.net)
* Copyright (C) 2004-2006 Fridrich Strba (fridrich.strba@bluewin.ch)
* Copyright (C) 2006, 2007 Andrew Ziem
* Copyright (C) 2011, 2012 Alonso Laurent (alonso@loria.fr)
*
*
* All Rights Reserved.
*
* For minor contributions see the git repository.
*
* Alternatively, the contents of this file may be used under the terms of
* the GNU Lesser General Public License Version 2 or later (the "LGPLv2+"),
* in which case the provisions of the LGPLv2+ are applicable
* instead of those above.
*/

#ifndef MS_WKS4_TEXT
#  define MS_WKS4_TEXT

#include <vector>

#include "MWAWEntry.hxx"
#include "MWAWDebug.hxx"
#include "MWAWInputStream.hxx"

namespace MsWks4TextInternal
{
struct Font;
struct Paragraph;
struct State;
}

class MsWksDocument;
class MsWks4Zone;

/** The class which parses text zones in a mac MS Works document v4
 *
 * This class must be associated with a MsWks4Zone. It reads the entries:
 * - TEXT : the text strings
 * - FONT : the fonts name
 * - FDPC, BTEC : the fonts properties
 * - FDPP, BTEP : the paragraph properties
 * - FTNT : the footnote definition
 * - PGD : the page break (only parsed)
 * - TOKN : the field properties (pagenumber, date, ...)
 */
class MsWks4Text
{
  friend class MsWks4Zone;
protected:
  struct DataFOD;
  /** callback when a new attribute is found in an FDPP/FDPC entry
   *
   * \param input defines the input
   * \param endPos the last position in the input
   * \return true and filled id if this attribute can be parsed
   * \note mess can be filled to add a message in debugFile */
  typedef bool (MsWks4Text::* FDPParser)(MWAWInputStreamPtr &input, long endPos,
                                         int &id, std::string &mess);
public:
  //! constructor
  explicit MsWks4Text(MsWksDocument &document);

  //! destructor
  ~MsWks4Text();

  //! sets the default font
  void setDefault(MWAWFont &font);

  //! returns the number of pages
  int numPages() const;

  //! sends the data which have not been sent: actually do nothing
  void flushExtra(MWAWInputStreamPtr /*input*/) {}

protected:
  /** finds and parses all structures which correspond to the text
   *
   * More precisely the TEXT, FONT, FDPC/FDPP, BTEC/BTEP, FTNT, PGD, TOKN entries */
  bool readStructures(MWAWInputStreamPtr input, bool mainOle);

  //! reads a text section and send it to the listener
  bool readText(MWAWInputStreamPtr input, MWAWEntry const &entry, bool mainOle);

  //! sends the text which corresponds to footnote \a id to the listner
  bool readFootNote(MWAWInputStreamPtr input, int id);

  //----------------------------------------
  // PLC parsing, setting
  //----------------------------------------
  /** definition of the plc data parser (low level)
   *
   * \param endPos the end of the properties' definition,
   * \param bot defined the begin of the text zone corresponding to these properties
   * \param eot defined the end of the text zone corresponding to these properties
   * \param id the number of this properties
   * \param mess a string which can be filled to indicate unparsed data */
  typedef bool (MsWks4Text::* DataParser)
  (MWAWInputStreamPtr input, long endPos,  long bot, long eot, int id, std::string &mess);

  /** reads a PLC (Pointer List Composant ?) in zone entry
   *
   * \param input the file's input
   * \param entry the zone which contains the plc
   * \param textPtrs lists of offset in text zones where properties changes
   * \param listValues lists of properties values (filled only if values are simple types: int, ..)
   * \param parser the parser to use to read the values */
  bool readPLC(MWAWInputStreamPtr input, MWAWEntry const &entry,
               std::vector<long> &textPtrs, std::vector<long> &listValues,
               DataParser parser = &MsWks4Text::defDataParser);
  /** reads a PLC (Pointer List Composant ?) in zone entry
   *
   * \param input the file's input
   * \param entry the zone which contains the plc
   * \param textPtrs lists of offset in text zones where properties changes
   * \param listValues lists of properties values (filled only if values are simple types: int, ..)
   */
  bool readSimplePLC(MWAWInputStreamPtr &input, MWAWEntry const &entry,
                     std::vector<long> &textPtrs,
                     std::vector<long> &listValues)
  {
    return readPLC(input, entry, textPtrs, listValues);
  }

  //! the default parser (does nothing)
  bool defDataParser(MWAWInputStreamPtr input, long endPos,
                     long bot, long eot, int id, std::string &mess);

  //! reads the font names entry : FONT
  bool readFontNames(MWAWInputStreamPtr input, MWAWEntry const &entry);

  //! reads a font properties
  bool readFont(MWAWInputStreamPtr &input, long endPos,
                int &id, std::string &mess);

  /** sends a paragraph properties to the listener */
  void setProperty(MsWks4TextInternal::Paragraph const &tabs);
  //! reads a paragraph properties
  bool readParagraph(MWAWInputStreamPtr &input, long endPos,
                     int &id, std::string &mess);

  //! parses the footnote position : FTNT
  bool ftntDataParser(MWAWInputStreamPtr input, long endPos,
                      long bot, long eot, int id, std::string &mess);

  //! parses the object position : EOBJ
  bool eobjDataParser(MWAWInputStreamPtr input, long endPos,
                      long bot, long eot, int id, std::string &mess);

  /** parses the field properties entries : TOKN.
   *
   * \note the read data are not used to create the document */
  bool toknDataParser(MWAWInputStreamPtr input, long endPos,
                      long bot, long eot, int id, std::string &mess);

  /** parses the pagebreak positin entries : PGD
   *
   * \note the read data are not used to create the document */
  bool pgdDataParser(MWAWInputStreamPtr input, long endPos,
                     long, long, int id, std::string &mess);

  //! sends to the listener the text which corresponds to noteId
  void flushNote(int noteId);

protected:
  /** function which takes two sorted list of attribute (by text position).
      \return a list of attribute */
  std::vector<DataFOD> mergeSortedLists
  (std::vector<DataFOD> const &lst1, std::vector<DataFOD> const &lst2) const;

  /** parses a FDPP or a FDPC entry (which contains a list of ATTR_TEXT/ATTR_PARAG
   * with their definition ) and adds found data in listFODs */
  bool readFDP(MWAWInputStreamPtr &input, MWAWEntry const &entry,
               std::vector<DataFOD> &fods, FDPParser parser);

  /** Fills the vector of (FDPCs/FDPPs) paragraph/characters strutures
   *
   * Uses the entry BTEC/BTEP : the normal ways, and calls readSimplePLC on each entry to check that the parsing is correct
   * \param input the file input
   * \param which set to 0 for paragraphs structures and to 1 for characters structures
   */
  bool findFDPStructures(MWAWInputStreamPtr &input, int which);
  /** Fills the vector of (FDPCs/FDPPs) paragraph/characters strutures,
   * a function to call when the normal ways fails
   *
   * Uses all entries FDPCs/FDPPs and calls readSimplePLC on each entry to check that the parsing is correct.
   * \param input the file input
   * \param which set to 0 for paragraphs structures and to 1 for characters structures
   */
  bool findFDPStructuresByHand(MWAWInputStreamPtr &input, int which);

protected:
  //! structure which retrieves data information which correspond to a text position
  struct DataFOD {
    /** different type which can be associated to a text position
     *
     * ATTR_TEXT: all text attributes (font, size, ...)
     * ATTR_PARAG: all paragraph attributes (margin, tabs, ...)
     * ATTR_PLC: other attribute (note, fields ... )
     */
    enum Type { ATTR_TEXT, ATTR_PARAG, ATTR_PLC, ATTR_UNKN };

    //! the constructor
    DataFOD()
      : m_type(ATTR_UNKN)
      , m_pos(-1)
      , m_defPos(0)
      , m_id(-1)
    {
    }

    //! the type of the attribute
    Type m_type;
    //! the offset position of the text modified by this attribute
    long m_pos;
    //! the offset position of the definition of the attribute in the file
    long m_defPos;
    //! an identificator (which must be unique by category)
    int m_id;
  };

private:
  MsWks4Text(MsWks4Text const &orig) = delete;
  MsWks4Text &operator=(MsWks4Text const &orig) = delete;
protected:
  //! the main parser;
  MWAWParser *m_mainParser;
  //! the parser state
  MWAWParserStatePtr m_parserState;

  //! the main document
  MsWksDocument &m_document;

  //! an entry which corresponds to the complete text zone
  MWAWEntry m_textPositions;

  //! the internal state
  mutable std::shared_ptr<MsWks4TextInternal::State> m_state;

  //! the list of a FOD
  std::vector<DataFOD> m_FODsList;

  //! the list of FDPC entries
  std::vector<MWAWEntry const *> m_FDPCs;
  //! the list of FDPP entries
  std::vector<MWAWEntry const *> m_FDPPs;
};

#endif
// vim: set filetype=cpp tabstop=2 shiftwidth=2 cindent autoindent smartindent noexpandtab: