Blob Blame History Raw
// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#ifndef ContentToken_INCLUDED
#define ContentToken_INCLUDED 1
#ifdef __GNUG__
#pragma interface
#endif

#include "Owner.h"
#include "Text.h"
#include "Vector.h"
#include "NCVector.h"
#include "Boolean.h"

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

class LeafContentToken;

struct SP_API Transition {
  ~Transition();
  enum { invalidIndex = -1 };
  // When performing this transition, reset all andState with index >= this.
  unsigned clearAndStateStartIndex;
  // This transition is possible only if all AND groups whose AND depth
  // is >= this (and contains the LeafContentToken that this transition is
  // from) have had all their non-nullable members matched.
  unsigned andDepth;
  // If this is 1, then this transition requires that the AND group
  // whose AND depth is andDepth - 1 have a non-nullable member unmatched,
  // and thus this transition is not ambiguous with a transition whose
  // AND depth is < andDepth.
  PackedBoolean isolated;
  // Index in andState that must be clear for this transition to be
  // allowed.
  unsigned requireClear;
  // Index in andState that is to be set after performing this transition.
  unsigned toSet;
};

class SP_API FirstSet {
public:
  FirstSet();
  void init(LeafContentToken *);
  void append(const FirstSet &);
  size_t size() const;
  LeafContentToken *token(size_t i) const;
  size_t requiredIndex() const;
  void setNotRequired();
private:
  Vector<LeafContentToken *> v_;
  // index of contextually required token or -1 if none
  size_t requiredIndex_;
};

class SP_API LastSet : public Vector<LeafContentToken *> {
public:
  LastSet() { }
  LastSet(size_t n) : Vector<LeafContentToken *>(n) { }
  void append(const LastSet &);
};

class ElementType;
class AndModelGroup;
struct GroupInfo;

struct SP_API ContentModelAmbiguity {
  inline ~ContentModelAmbiguity() {}
  const LeafContentToken *from;
  const LeafContentToken *to1;
  const LeafContentToken *to2;
  unsigned andDepth;
};

class ModelGroup;

class SP_API ContentToken {
public:
  enum OccurrenceIndicator { none = 0, opt = 01, plus = 02, rep = 03 };
  ContentToken(OccurrenceIndicator);
  virtual ~ContentToken();
  OccurrenceIndicator occurrenceIndicator() const;
  Boolean inherentlyOptional() const;
  static unsigned andDepth(const AndModelGroup *);
  static unsigned andIndex(const AndModelGroup *);
  void analyze(GroupInfo &, const AndModelGroup *, unsigned,
	       FirstSet &, LastSet &);
  static void addTransitions(const LastSet &from,
			     const FirstSet &to,
			     Boolean maybeRequired,
			     unsigned andClearIndex,
			     unsigned andDepth,
			     Boolean isolated = 0,
			     unsigned requireClear
			       = (unsigned)Transition::invalidIndex,
			     unsigned toSet
			       = (unsigned)Transition::invalidIndex);
  virtual void finish(Vector<unsigned> &minAndDepth,
		      Vector<size_t> &elementTransition,
		      Vector<ContentModelAmbiguity> &,
		      Boolean &pcdataUnreachable) = 0;
  virtual unsigned long grpgtcnt() const;
  virtual void setOrGroupMember();
  unsigned andGroupIndex() const;
  virtual const ModelGroup *asModelGroup() const;
  virtual const LeafContentToken *asLeafContentToken() const;
protected:
  PackedBoolean inherentlyOptional_;
private:
  ContentToken(const ContentToken &); // undefined
  void operator=(const ContentToken &);	// undefined
  virtual void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
			FirstSet &, LastSet &) = 0;
  OccurrenceIndicator occurrenceIndicator_;
};

class SP_API ModelGroup : public ContentToken {
public:
  enum Connector { andConnector, orConnector, seqConnector };
  ModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
  inline virtual ~ModelGroup() {};
  virtual Connector connector() const = 0;
  unsigned nMembers() const;
  void finish(Vector<unsigned> &minAndDepth,
	      Vector<size_t> &elementTransition,
	      Vector<ContentModelAmbiguity> &,
	      Boolean &pcdataUnreachable);
  ContentToken &member(unsigned i);
  const ContentToken &member(unsigned i) const;
  unsigned long grpgtcnt() const;
  const ModelGroup *asModelGroup() const;
protected:
  void setOrGroup();
private:
  ModelGroup(const ModelGroup &); // undefined
  void operator=(const ModelGroup &); // undefined
  NCVector<Owner<ContentToken> > members_;
};

class AndModelGroup : public ModelGroup {
public:
  AndModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
  Connector connector() const;
  unsigned andDepth() const;
  unsigned andIndex() const;
  unsigned andGroupIndex() const;
  const AndModelGroup *andAncestor() const;
private:
  AndModelGroup(const AndModelGroup &);	// undefined
  void operator=(const AndModelGroup &); // undefined
  unsigned andDepth_;		// number of and groups that contain this
  unsigned andIndex_;
  unsigned andGroupIndex_;
  const AndModelGroup *andAncestor_;
  void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
		FirstSet &, LastSet &);
};

class OrModelGroup : public ModelGroup {
public:
  OrModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
  Connector connector() const;
private:
  OrModelGroup(const OrModelGroup &); // undefined
  void operator=(const OrModelGroup &);	// undefined
  void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
		FirstSet &, LastSet &);
};

class SeqModelGroup : public ModelGroup {
public:
  SeqModelGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
  Connector connector() const;
private:
  SeqModelGroup(const SeqModelGroup &);	// undefined
  void operator=(const SeqModelGroup &); // undefined
  void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
		FirstSet &, LastSet &);
};

class AndState;

class SP_API AndInfo {
public:
  AndInfo() { }
  inline ~AndInfo() { }
  const AndModelGroup *andAncestor;
  unsigned andGroupIndex;
  Vector<Transition> follow;
private:
  AndInfo(const AndInfo &);	// undefined
  void operator=(const AndInfo &); // undefined
};

// A LeafContentToken is not quite the same as a primitive content token.
// A data tag group is a primitive content token but not a LeafContentToken.

class SP_API LeafContentToken : public ContentToken {
public:
  LeafContentToken(const ElementType *, OccurrenceIndicator);
  inline virtual ~LeafContentToken() {};
  unsigned index() const;
  unsigned typeIndex() const;
  const ElementType *elementType() const;
  virtual Boolean isInitial() const;
  void addTransitions(const FirstSet &to,
		      Boolean maybeRequired,
		      unsigned andClearIndex,
		      unsigned andDepth,
		      Boolean isolated,
		      unsigned requireClear,
		      unsigned toSet);
  void setFinal();
  void finish(Vector<unsigned> &minAndDepth,
	      Vector<size_t> &elementTransition,
	      Vector<ContentModelAmbiguity> &,
	      Boolean &pcdataUnreachable);
  Boolean isFinal() const;
  Boolean tryTransition(const ElementType *, AndState &,
			unsigned &minAndDepth,
			const LeafContentToken *&newpos) const;
  Boolean tryTransitionPcdata(AndState &, unsigned &minAndDepth,
			      const LeafContentToken *&newpos) const;
  void possibleTransitions(const AndState &, unsigned minAndDepth, Vector<const ElementType *> &) const;
  const LeafContentToken *impliedStartTag(const AndState &andpos,
					  unsigned minAndDepth) const;
  const LeafContentToken *transitionToken(const ElementType *to,
					  const AndState &andState,
					  unsigned minAndDepth) const;
  void doRequiredTransition(AndState &andState,
			    unsigned &minAndDepth,
			    const LeafContentToken *&newpos) const;
  unsigned computeMinAndDepth(const AndState&) const;
  Boolean orGroupMember() const;
  void setOrGroupMember();
  const AndModelGroup *andAncestor() const;
  unsigned andDepth() const;
  const LeafContentToken *asLeafContentToken() const;
protected:
  void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
		FirstSet &, LastSet &);
  const ElementType *element_;
private:
  LeafContentToken(const LeafContentToken &); // undefined
  void operator=(const LeafContentToken &);   // undefined
  void andFinish(Vector<unsigned> &minAndDepth,
		 Vector<size_t> &elementTransition,
		 Vector<ContentModelAmbiguity> &,
		 Boolean &pcdataUnreachable);
  unsigned computeMinAndDepth1(const AndState&) const;
  unsigned leafIndex_;
  unsigned typeIndex_;
  Vector<LeafContentToken *> follow_;
  PackedBoolean isFinal_;
  PackedBoolean orGroupMember_;
  // 0 none, 1 yes - simple, 2 - compled
  char pcdataTransitionType_;
  const LeafContentToken *simplePcdataTransition_;
  size_t requiredIndex_;
  Owner<AndInfo> andInfo_;
};

class PcdataToken : public LeafContentToken {
public:
  PcdataToken();
  void analyze1(GroupInfo &, const AndModelGroup *, unsigned,
		FirstSet &, LastSet &);
private:
  PcdataToken(const PcdataToken &); // undefined
  void operator=(const PcdataToken &); // undefined
};

class InitialPseudoToken : public LeafContentToken {
public:
  InitialPseudoToken();
  Boolean isInitial() const;
private:
  InitialPseudoToken(const InitialPseudoToken &); // undefined
  void operator=(const InitialPseudoToken &); // undefined
};

class ElementToken : public LeafContentToken {
public:
  ElementToken(const ElementType *, OccurrenceIndicator);
private:
  ElementToken(const ElementToken &); // undefined
  void operator=(const ElementToken &); // undefined
};

class DataTagGroup : public SeqModelGroup {
public:
  // first content token is a DataTagElementToken, second is PcdataToken
  DataTagGroup(NCVector<Owner<ContentToken> > &, OccurrenceIndicator);
private:
  DataTagGroup(const DataTagGroup &); // undefined
  void operator=(const DataTagGroup &); // undefined
};

class DataTagElementToken : public ElementToken {
public:
  DataTagElementToken(const ElementType *, Vector<Text> &templates);
  DataTagElementToken(const ElementType *, Vector<Text> &templates,
		      Text &paddingTemplate);
private:
  DataTagElementToken(const DataTagElementToken &); // undefined
  void operator=(const DataTagElementToken &); // undefined
  Vector<Text> templates_;
  Boolean havePaddingTemplate_;
  Text paddingTemplate_;
};

class SP_API CompiledModelGroup {
public:
  CompiledModelGroup(Owner<ModelGroup> &);
  inline ~CompiledModelGroup() {};
  void compile(size_t nElementTypeIndex,
	       Vector<ContentModelAmbiguity> &,
	       Boolean &pcdataUnreachable);
  CompiledModelGroup *copy() const;
  const LeafContentToken *initial() const;
  unsigned andStateSize() const;
  Boolean containsPcdata() const;
  const ModelGroup *modelGroup() const;
private:
  CompiledModelGroup(const CompiledModelGroup &); // undefined
  void operator=(const CompiledModelGroup &);	  // undefined
  Owner<ModelGroup> modelGroup_;
  Owner<LeafContentToken> initial_;
  unsigned andStateSize_;
  Boolean containsPcdata_;
};

class SP_API AndState {
public:
  AndState(unsigned);
  Boolean isClear(unsigned) const;
  void clearFrom(unsigned);
  void set(unsigned);
  Boolean operator==(const AndState &) const;
  Boolean operator!=(const AndState &) const;
private:
  void clearFrom1(unsigned);
  unsigned clearFrom_;
  Vector<PackedBoolean> v_;
};

class SP_API MatchState {
public:
  MatchState();
  MatchState(const CompiledModelGroup *); // may be 0
  Boolean tryTransition(const ElementType *);
  Boolean tryTransitionPcdata();
  void possibleTransitions(Vector<const ElementType *> &) const;
  Boolean isFinished() const;
  const LeafContentToken *impliedStartTag() const;
  const LeafContentToken *invalidExclusion(const ElementType *) const;
  void doRequiredTransition();
  const LeafContentToken *currentPosition() const;
  Boolean operator==(const MatchState &) const;
  Boolean operator!=(const MatchState &) const;
private:
  const LeafContentToken *pos_;
  AndState andState_;
  unsigned minAndDepth_;
};

inline
ContentToken::OccurrenceIndicator ContentToken::occurrenceIndicator() const
{
  return occurrenceIndicator_;
}

inline
unsigned LeafContentToken::index() const
{
  return leafIndex_;
}

inline
unsigned LeafContentToken::typeIndex() const
{
  return typeIndex_;
}

inline
Boolean ContentToken::inherentlyOptional() const
{
  return inherentlyOptional_;
}

inline
const ElementType *LeafContentToken::elementType() const
{
  return element_;
}

inline
unsigned AndModelGroup::andDepth() const
{
  return andDepth_;
}

inline
unsigned AndModelGroup::andIndex() const
{
  return andIndex_;
}

inline
unsigned ModelGroup::nMembers() const
{
  return members_.size();
}

inline
unsigned ContentToken::andDepth(const AndModelGroup *andAncestor)
{
  return andAncestor ? andAncestor->andDepth() + 1 : 0;
}

inline
unsigned ContentToken::andIndex(const AndModelGroup *andAncestor)
{
  return (andAncestor
	  ? andAncestor->andIndex() + andAncestor->nMembers()
	  : 0);
}

inline
ContentToken &ModelGroup::member(unsigned i)
{
  return *members_[i];
}

inline
const ContentToken &ModelGroup::member(unsigned i) const
{
  return *members_[i];
}

inline
void LeafContentToken::setFinal()
{
  isFinal_ = 1;
}

inline
Boolean LeafContentToken::isFinal() const
{
  return isFinal_;
}

inline
Boolean LeafContentToken::orGroupMember() const
{
  return orGroupMember_;
}

inline
unsigned CompiledModelGroup::andStateSize() const
{
  return andStateSize_;
}

inline
Boolean CompiledModelGroup::containsPcdata() const
{
  return containsPcdata_;
}

inline
const AndModelGroup *AndModelGroup::andAncestor() const
{
  return andAncestor_;
}

inline
unsigned AndModelGroup::andGroupIndex() const
{
  return andGroupIndex_;
}

inline
const LeafContentToken *CompiledModelGroup::initial() const
{
  return initial_.pointer();
}

inline
const ModelGroup *CompiledModelGroup::modelGroup() const
{
  return modelGroup_.pointer();
}

inline
const AndModelGroup *LeafContentToken::andAncestor() const
{
  return andInfo_ ? andInfo_->andAncestor : 0;
}

inline
unsigned LeafContentToken::andDepth() const
{
  return andInfo_ ? ContentToken::andDepth(andInfo_->andAncestor) : 0;
}

inline
unsigned LeafContentToken::computeMinAndDepth(const AndState &andState) const
{
  return andInfo_ ? computeMinAndDepth1(andState) : 0;
}

inline
Boolean LeafContentToken::tryTransitionPcdata(AndState &andState,
					      unsigned &minAndDepth,
					      const LeafContentToken *&newpos)
     const
{
  if (pcdataTransitionType_ == 1) {
    newpos = simplePcdataTransition_;
    return 1;
  }
  else if (pcdataTransitionType_ == 0)
    return 0;
  else
    return tryTransition(0, andState, minAndDepth, newpos);
}

inline
Boolean MatchState::tryTransition(const ElementType *to)
{
  return pos_->tryTransition(to, andState_, minAndDepth_, pos_);
}

inline
Boolean MatchState::tryTransitionPcdata()
{
  return pos_->tryTransitionPcdata(andState_, minAndDepth_, pos_);
}

inline
void MatchState::possibleTransitions(Vector<const ElementType *> &v) const
{
  pos_->possibleTransitions(andState_, minAndDepth_, v);
}

inline
Boolean MatchState::isFinished() const
{
  return pos_->isFinal() && minAndDepth_ == 0;
}

inline
const LeafContentToken *
MatchState::impliedStartTag() const
{
  return pos_->impliedStartTag(andState_, minAndDepth_);
}

inline
void MatchState::doRequiredTransition()
{
  pos_->doRequiredTransition(andState_, minAndDepth_, pos_);
}

inline
const LeafContentToken *MatchState::currentPosition() const
{
  return pos_;
}

inline
Boolean MatchState::operator!=(const MatchState &state) const
{
  return !(*this == state);
}

inline
Boolean AndState::isClear(unsigned i) const
{
  return v_[i] == 0;
}

inline
void AndState::set(unsigned i)
{
  v_[i] = 1;
  if (i >= clearFrom_)
    clearFrom_ = i + 1;
}

inline
void AndState::clearFrom(unsigned i)
{
  if (i < clearFrom_)
    clearFrom1(i);
}

inline
Boolean AndState::operator!=(const AndState &state) const
{
  return !(*this == state);
}


inline
size_t FirstSet::size() const
{
  return v_.size();
}

inline
LeafContentToken *FirstSet::token(size_t i) const
{
  return v_[i];
}

inline
size_t FirstSet::requiredIndex() const
{
  return requiredIndex_;
}

inline
void FirstSet::setNotRequired()
{
  requiredIndex_ = size_t(-1);
}

#ifdef SP_NAMESPACE
}
#endif

#endif /* not ContentToken_INCLUDED */