Blob Blame History Raw
// Copyright (c) 1994 James Clark
// See the file COPYING for copying permission.

#ifdef __GNUG__
#pragma implementation
#endif
#include "splib.h"
#include "types.h"
#include "macros.h"
#include "StringOf.h"
#include "Trie.h"
#include "TrieBuilder.h"
#include "Priority.h"
#include <stdlib.h>

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

Trie::~Trie()
{
  if (next_)
    delete [] next_;
}

Trie::Trie(const Trie &t)
: nCodes_(t.nCodes_),
  token_(t.token_),
  tokenLength_(t.tokenLength_),
  priority_(t.priority_),
  blank_(t.blank_)
{
  if (t.next_) {
    next_ = new Trie[nCodes_];
    for (int i = 0; i < nCodes_; i++)
      next_[i] = t.next_[i];
  }
  else
    next_ = 0;
}

Trie &Trie::operator=(const Trie &t)
{
  if (next_)
    delete [] next_;
  nCodes_ = t.nCodes_;
  token_ = t.token_;
  tokenLength_ = t.tokenLength_;
  priority_ = t.priority_;
  blank_ = t.blank_;
  if (t.next_) {
    next_ = new Trie[nCodes_];
    for (int i = 0; i < nCodes_; i++)
      next_[i] = t.next_[i];
  }
  else
    next_ = 0;
  return *this;
}

TrieBuilder::TrieBuilder(int nCodes)
: nCodes_(nCodes), root_(new Trie)
{
  root_->token_ = 0;
  root_->tokenLength_ = 0;
  root_->priority_ = Priority::data;
  root_->nCodes_ = nCodes;
}

void TrieBuilder::recognize(const String<EquivCode> &chars,
			    Token t,
			    Priority::Type priority,
			    TokenVector &ambiguities)
{
  setToken(extendTrie(root_.pointer(), chars), chars.size(), t, priority,
	   ambiguities);
}

void TrieBuilder::recognize(const String<EquivCode> &chars,
			    const String<EquivCode> &set,
			    Token t,
			    Priority::Type priority,
			    TokenVector &ambiguities)
{
  Trie *trie = extendTrie(root_.pointer(), chars);

  for (size_t i = 0; i < set.size(); i++)
    setToken(forceNext(trie, set[i]), chars.size() + 1, t, priority,
	     ambiguities);
}

void TrieBuilder::recognizeB(const String<EquivCode> &chars,
			     int bSequenceLength,
			     size_t maxBlankSequence,
			     const String<EquivCode> &blankCodes,
			     const String<EquivCode> &chars2,
			     Token token,
			     TokenVector &ambiguities)
{
  doB(extendTrie(root_.pointer(), chars),
      chars.size(),
      bSequenceLength,
      maxBlankSequence,
      blankCodes,
      chars2,
      token,
      Priority::blank(bSequenceLength),
      ambiguities);
}

void TrieBuilder::recognizeEE(EquivCode code, Token t)
{
  Trie *trie = forceNext(root_.pointer(), code);
  trie->tokenLength_ = 0;	// it has length 0 in the buffer
  trie->token_ = t;
  trie->priority_ = Priority::data;
}

void TrieBuilder::doB(Trie *trie,
		      int tokenLength,
		      int minBLength,
		      size_t maxLength,
		      const String<EquivCode> &blankCodes,
		      const String<EquivCode> &chars2,
		      Token token,
		      Priority::Type pri,
		      TokenVector &ambiguities)
{
  if (minBLength == 0 && trie->next_ == 0) {
    if (!trie->blank_) {
      BlankTrie *b = new BlankTrie;
      trie->blank_ = b;
      b->maxBlanksToScan_ = maxLength;
      b->additionalLength_ = tokenLength;
      b->codeIsBlank_.assign(nCodes_, 0);
      for (size_t i = 0; i < blankCodes.size(); i++)
	b->codeIsBlank_[blankCodes[i]] = 1;
      b->tokenLength_ = 0;
      b->token_ = 0;
      b->priority_ = Priority::data;
      b->nCodes_ = nCodes_;
    }
    else {
      // A B sequence is not allowed to be adjacent to a character
      // that can occur in a blank sequence, so maxLength will be
      // the same at a node, no matter how we got there.
      ASSERT(trie->blank_->maxBlanksToScan_ == maxLength);
      ASSERT(trie->blank_->additionalLength_ == tokenLength);
    }
    if (chars2.size() == 0)
      setToken(trie, tokenLength, token, pri, ambiguities);
    else
      setToken(extendTrie(trie->blank_.pointer(), chars2),
	       chars2.size(),
	       token,
	       pri,
	       ambiguities);
  }
  else {
    if (minBLength == 0)
      setToken(extendTrie(trie, chars2), tokenLength + chars2.size(),
	       token, pri, ambiguities);
    for (size_t i = 0; i < blankCodes.size(); i++)
      doB(forceNext(trie, blankCodes[i]),
	  tokenLength + 1,
	  minBLength == 0 ? 0 : minBLength - 1,
	  maxLength - 1,
	  blankCodes,
	  chars2,
	  token,
	  pri,
	  ambiguities);
  }
}

Trie *TrieBuilder::extendTrie(Trie *trie, const String<EquivCode> &s)
{
  for (size_t i = 0; i < s.size(); i++)
    trie = forceNext(trie, s[i]);
  return trie;
}

void TrieBuilder::setToken(Trie *trie,
			   int tokenLength,
			   Token token,
			   Priority::Type pri,
			   TokenVector &ambiguities)
{
  if (tokenLength > trie->tokenLength_
      || (tokenLength == trie->tokenLength_
	  && pri > trie->priority_)) {
    trie->tokenLength_ = tokenLength;
    trie->token_ = token;
    trie->priority_ = pri;
  }
  else if (trie->tokenLength_ == tokenLength
	   && trie->priority_ == pri
	   && trie->token_ != token
	   && trie->token_ != 0) {
    ambiguities.push_back(Token(trie->token_));
    ambiguities.push_back(token);
  }
  if (trie->hasNext()) {
    for (int i = 0; i < nCodes_; i++)
      setToken(&trie->next_[i], tokenLength, token, pri, ambiguities);
  }
}

void TrieBuilder::copyInto(Trie *into, const Trie *from, int additionalLength)
{
  if (from->token_ != 0) {
    TokenVector ambiguities;
    setToken(into, from->tokenLength_ + additionalLength, from->token_,
	     from->priority_, ambiguities);
    ASSERT(ambiguities.size() == 0);
  }
  if (from->hasNext())
    for (int i = 0; i < nCodes_; i++)
      copyInto(forceNext(into, i), &from->next_[i], additionalLength);
}

Trie *TrieBuilder::forceNext(Trie *trie, EquivCode c)
{
  if (!trie->hasNext()) {
    trie->next_ = new Trie[nCodes_];
    if (trie->blank_) {
      trie->blank_->additionalLength_ += 1;
      trie->blank_->maxBlanksToScan_ -= 1;
    }
    Owner<BlankTrie> blankOwner(trie->blank_.extract());
    const BlankTrie *b = blankOwner.pointer();
    for (int i = 0; i < nCodes_; i++) {
      Trie *p = &trie->next_[i];
      if (b && b->codeIsBlank(i))
	trie->next_[i].blank_ = (blankOwner
				 ? blankOwner.extract()
				 : new BlankTrie(*b));
      p->token_ = trie->token_;
      p->tokenLength_ = trie->tokenLength_;
      p->priority_ = trie->priority_;
      p->nCodes_ = nCodes_;
    }
    if (b)
      // -1 because 1 was added above
      copyInto(trie, b, b->additionalLength_ - 1);
  }
  return &trie->next_[c];
}

#ifdef SP_NAMESPACE
}
#endif