Blob Blame History Raw
// Copyright (c) 1994, 1995 James Clark
// See the file COPYING for copying permission.

#include "splib.h"
#include "Parser.h"
#include "MessageArg.h"
#include "token.h"
#include "macros.h"
#include "ParserMessages.h"

#ifdef SP_NAMESPACE
namespace SP_NAMESPACE {
#endif

Boolean Parser::parseAttributeSpec(Mode mode,
				   AttributeList &atts,
				   Boolean &netEnabling,
				   Ptr<AttributeDefinitionList> &newAttDef)

{
  unsigned specLength = 0;
  AttributeParameter::Type curParm;

  if (!parseAttributeParameter(mode, 0, curParm, netEnabling))
    return 0;
  while (curParm != AttributeParameter::end) {
    switch (curParm) {
    case AttributeParameter::name:
      {
	Text text;
	text.addChars(currentInput()->currentTokenStart(),
		      currentInput()->currentTokenLength(),
		      currentLocation());
	size_t nameMarkupIndex;
	if (currentMarkup())
	  nameMarkupIndex = currentMarkup()->size() - 1;
	text.subst(*syntax().generalSubstTable(), syntax().space());
	if (!parseAttributeParameter(mode == piPasMode ? asMode : mode, 1, curParm, netEnabling))
	  return 0;
	if (curParm == AttributeParameter::vi) {
	  specLength += text.size() + syntax().normsep();
	  if (!parseAttributeValueSpec(mode == piPasMode ? asMode : mode, text.string(), atts,	
				       specLength, newAttDef))
	    return 0;
	  // setup for next attribute
	  if (!parseAttributeParameter(mode, 0, curParm, netEnabling))
	    return 0;
	}
	else {
	  if (currentMarkup())
	    currentMarkup()->changeToAttributeValue(nameMarkupIndex);
	  if (!handleAttributeNameToken(text, atts, specLength))
	    return 0;
	}
      }
      break;
    case AttributeParameter::nameToken:
      {
	Text text;
	text.addChars(currentInput()->currentTokenStart(),
		      currentInput()->currentTokenLength(),
		      currentLocation());
	text.subst(*syntax().generalSubstTable(), syntax().space());
	if (!handleAttributeNameToken(text, atts, specLength))
	  return 0;
	if (!parseAttributeParameter(mode, 0, curParm, netEnabling))
	  return 0;
      }
      break;
    case AttributeParameter::recoverUnquoted:
      {
	if (!atts.recoverUnquoted(currentToken(), currentLocation(), *this)) {
	  // Don't treat it as an unquoted attribute value.
	  currentInput()->endToken(1);
	  if (!atts.handleAsUnterminated(*this))
	    message(ParserMessages::attributeSpecCharacter,
		    StringMessageArg(currentToken()));
	  return 0;
	}
	if (!parseAttributeParameter(mode, 0, curParm, netEnabling))
	  return 0;
      }
      break;
    default:
      CANNOT_HAPPEN();
    }
  }
  atts.finish(*this);
  if (specLength > syntax().attsplen())
    message(ParserMessages::attsplen,
	    NumberMessageArg(syntax().attsplen()),
	    NumberMessageArg(specLength));
  return 1;
}

Boolean Parser::handleAttributeNameToken(Text &text,
					 AttributeList &atts,
					 unsigned &specLength)
{
  unsigned index;
  if (!atts.tokenIndex(text.string(), index)) {
    if (atts.handleAsUnterminated(*this))
      return 0;
    atts.noteInvalidSpec();
    message(ParserMessages::noSuchAttributeToken,
	    StringMessageArg(text.string()));
  }
  else if (sd().www() && !atts.tokenIndexUnique(text.string(), index)) {
    atts.noteInvalidSpec();
    message(ParserMessages::attributeTokenNotUnique,
	    StringMessageArg(text.string()));
  }
  else {
    if (!sd().attributeOmitName())
      message(ParserMessages::attributeNameShorttag);
    else if (options().warnMissingAttributeName)
      message(ParserMessages::missingAttributeName);
    atts.setSpec(index, *this);
    atts.setValueToken(index, text, *this, specLength);
  }
  return 1;
}

Boolean Parser::parseAttributeValueSpec(Mode mode,
					const StringC &name,
					AttributeList &atts,
					unsigned &specLength,
					Ptr<AttributeDefinitionList> &newAttDef)
{
  Markup *markup = currentMarkup();
  Token token = getToken(mode);
  if (token == tokenS) {
    if (markup) {
      do {
	markup->addS(currentChar());
	token = getToken(mode);
      } while (token == tokenS);
    }
    else {
      do {
	token = getToken(mode);
      } while (token == tokenS);
    }
  }
  unsigned index;
  if (!atts.attributeIndex(name, index)) {
    if (newAttDef.isNull())
      newAttDef = new AttributeDefinitionList(atts.def());
    AttributeDefinition *newDef = 0;
    if (!inInstance()) {
      // We are parsing a data attribute specification
      Ptr<Notation> notation;
      Dtd::NotationIter notationIter(currentDtdNonConst().notationIter());
      for (;;) {
        notation = notationIter.next();
        if (notation.isNull()
            || atts.def() == notation->attributeDef())
	  break;
      }
      ASSERT(!notation.isNull());
      if (!notation->defined()) {
        Notation *nt =
          lookupCreateNotation(syntax().rniReservedName(Syntax::rIMPLICIT));
        ConstPtr<AttributeDefinitionList> common = nt->attributeDef();
        if (!common.isNull() && common->attributeIndex(name, index)) {
          newDef = common->def(index)->copy();
          newDef->setSpecified(1);
        }
      }
      if (!newDef) {
        Notation *nt =
          lookupCreateNotation(syntax().rniReservedName(Syntax::rALL));
        ConstPtr<AttributeDefinitionList> common = nt->attributeDef();
        if (!common.isNull() && common->attributeIndex(name, index)) {
          newDef = common->def(index)->copy();
          newDef->setSpecified(0);
        }
      }
    }
    if (!newDef) {
      if (!implydefAttlist())
        message(ParserMessages::noSuchAttribute, StringMessageArg(name));
      newDef = new ImpliedAttributeDefinition(name,
                                              new CdataDeclaredValue);
    }
    newAttDef->append(newDef);
    atts.changeDef(newAttDef);
    index = atts.size() - 1;
  }
  atts.setSpec(index, *this);
  Text text;
  switch (token) {
  case tokenUnrecognized:
    if (reportNonSgmlCharacter())
      return 0;
    // fall through
  case tokenEtago:
  case tokenStago:
  case tokenNestc:
    message(ParserMessages::unquotedAttributeValue);
    extendUnquotedAttributeValue();
    if (markup)
      markup->addAttributeValue(currentInput());
    text.addChars(currentInput()->currentTokenStart(),
		  currentInput()->currentTokenLength(),
		  currentLocation());
    break;
  case tokenEe:
    if (mode != piPasMode) {
      message(ParserMessages::attributeSpecEntityEnd);
      return 0;
    }
  case tokenTagc:
  case tokenDsc:
  case tokenVi:
    message(ParserMessages::attributeValueExpected);
    return 0;
  case tokenNameStart:
  case tokenDigit:
  case tokenLcUcNmchar:
    if (!sd().attributeValueNotLiteral())
      message(ParserMessages::attributeValueShorttag);
    else if (options().warnAttributeValueNotLiteral)
      message(ParserMessages::attributeValueNotLiteral);
    extendNameToken(syntax().litlen() >= syntax().normsep()
		    ? syntax().litlen() - syntax().normsep()
		    : 0,
		    ParserMessages::attributeValueLength);
    if (markup)
      markup->addAttributeValue(currentInput());
    text.addChars(currentInput()->currentTokenStart(),
		  currentInput()->currentTokenLength(),
		  currentLocation());
    break;
  case tokenLit:
  case tokenLita:
    Boolean lita;
    lita = (token == tokenLita);
    if (!(atts.tokenized(index)
	  ? parseTokenizedAttributeValueLiteral(lita, text)
	  : parseAttributeValueLiteral(lita, text)))
      return 0;
    if (markup)
      markup->addLiteral(text);
    break;
  default:
      CANNOT_HAPPEN();
  }
  return atts.setValue(index, text, *this, specLength);
}


Boolean Parser::parseAttributeParameter(Mode mode,
					Boolean allowVi,
					AttributeParameter::Type &result,
					Boolean &netEnabling)
{
  Token token = getToken(mode);
  Markup *markup = currentMarkup();
  if (mode == piPasMode) {
    for (;;) {
      switch (token) {
      case tokenCom:
        if (!parseComment(comMode))
	  return 0;
	if (options().warnPsComment)
	  message(ParserMessages::psComment);
	// fall through
      case tokenS:
        token = getToken(mode);
	continue;
      default:
        break;
      }
      break;
    }
  }
  else if (markup) {
    while (token == tokenS) {
      markup->addS(currentChar());
      token = getToken(mode);
    }
  }
  else {
    while (token == tokenS)
      token = getToken(mode);
  }
  switch (token) {
  case tokenUnrecognized:
    if (reportNonSgmlCharacter())
      return 0;
    extendUnquotedAttributeValue();
    result = AttributeParameter::recoverUnquoted;
    break;
  case tokenEe:
    if (mode != piPasMode) {
      message(ParserMessages::attributeSpecEntityEnd);
      return 0;
    }
    result = AttributeParameter::end;
    break;
  case tokenEtago:
  case tokenStago:
    if (!sd().startTagUnclosed())
      message(ParserMessages::unclosedStartTagShorttag);
    result = AttributeParameter::end;
    currentInput()->ungetToken();
    netEnabling = 0;
    break;
  case tokenNestc:
    if (markup)
      markup->addDelim(Syntax::dNESTC);
    switch (sd().startTagNetEnable()) {
    case Sd::netEnableNo:
      message(ParserMessages::netEnablingStartTagShorttag);
      break;
    case Sd::netEnableImmednet:
      if (getToken(econnetMode) != tokenNet) 
	message(ParserMessages::nestcWithoutNet);
      currentInput()->ungetToken();
      break;
    case Sd::netEnableAll:
      break;
    }
    netEnabling = 1;
    result = AttributeParameter::end;
    break;
  case tokenTagc:
    if (markup)
      markup->addDelim(Syntax::dTAGC);
    netEnabling = 0;
    result = AttributeParameter::end;
    break;
  case tokenDsc:
    if (markup)
      markup->addDelim(Syntax::dDSC);
    result = AttributeParameter::end;
    break;
  case tokenNameStart:
    extendNameToken(syntax().namelen(), ParserMessages::nameTokenLength);
    if (markup)
      markup->addName(currentInput());
    result = AttributeParameter::name;
    break;
  case tokenDigit:
  case tokenLcUcNmchar:
    extendNameToken(syntax().namelen(), ParserMessages::nameTokenLength);
    if (markup)
      markup->addName(currentInput());
    result = AttributeParameter::nameToken;
    break;
  case tokenLit:
  case tokenLita:
    message(allowVi
	    ? ParserMessages::attributeSpecLiteral
	    : ParserMessages::attributeSpecNameTokenExpected);
    return 0;
  case tokenVi:
    if (!allowVi) {
      message(ParserMessages::attributeSpecNameTokenExpected);
      return 0;
    }
    if (markup)
      markup->addDelim(Syntax::dVI);
    result = AttributeParameter::vi;
    break;
  default:
    CANNOT_HAPPEN();
  }
  return 1;
}

void Parser::extendUnquotedAttributeValue()
{
  InputSource *in = currentInput();
  size_t length = in->currentTokenLength();
  const Syntax &syn = syntax();
  for (;;) {
    Xchar c = in->tokenChar(messenger());
    if (syn.isS(c)
	|| !syn.isSgmlChar(c)
	|| c == InputSource::eE
	|| c == syn.delimGeneral(Syntax::dTAGC)[0])
      break;
    length++;
  }
  in->endToken(length);
}

Boolean Parser::parseAttributeValueLiteral(Boolean lita, Text &text)
{
  size_t maxLength = (syntax().litlen() > syntax().normsep()
		      ? syntax().litlen() - syntax().normsep()
		      : 0);
  if (parseLiteral(lita ? alitaMode : alitMode, aliteMode,
		   maxLength,
		   ParserMessages::attributeValueLength,
		   literalNonSgml
 		   | (wantMarkup() ? unsigned(literalDelimInfo) : 0),
		   text)) {
    if (text.size() == 0
	&& syntax().normsep() > syntax().litlen())
      message(ParserMessages::attributeValueLengthNeg,
	      NumberMessageArg(syntax().normsep() - syntax().litlen()));
    return 1;
  }
  else
    return 0;
}

Boolean Parser::parseTokenizedAttributeValueLiteral(Boolean lita, Text &text)
{
  size_t maxLength = (syntax().litlen() > syntax().normsep()
		      ? syntax().litlen() - syntax().normsep()
		      : 0);
  if (parseLiteral(lita ? talitaMode : talitMode, taliteMode,
		   maxLength,
		   ParserMessages::tokenizedAttributeValueLength,
		   literalSingleSpace
		   | (wantMarkup() ? unsigned(literalDelimInfo) : 0),
		   text)) {
    if (text.size() == 0
	&& syntax().normsep() > syntax().litlen())
      message(ParserMessages::tokenizedAttributeValueLengthNeg,
	      NumberMessageArg(syntax().normsep() - syntax().litlen()));
    return 1;
  }
  else
    return 0;
}


Boolean Parser::skipAttributeSpec()
{
  AttributeParameter::Type parm;
  Boolean netEnabling;
  if (!parseAttributeParameter(tagMode, 0, parm, netEnabling))
    return 0;
  while (parm != AttributeParameter::end) {
    if (parm == AttributeParameter::name) {
      size_t nameMarkupIndex = 0;
      if (currentMarkup())
	nameMarkupIndex = currentMarkup()->size() - 1;
      if (!parseAttributeParameter(tagMode, 1, parm, netEnabling))
	return 0;
      if (parm == AttributeParameter::vi) {
	Token token = getToken(tagMode);
	while (token == tokenS) {
	  if (currentMarkup())
	    currentMarkup()->addS(currentChar());
	  token = getToken(tagMode);
	}
	switch (token) {
	case tokenUnrecognized:
	  if (!reportNonSgmlCharacter())
	    message(ParserMessages::attributeSpecCharacter,
		    StringMessageArg(currentToken()));
	  return 0;
	case tokenEe:
	  message(ParserMessages::attributeSpecEntityEnd);
	  return 0;
	case tokenEtago:
	case tokenStago:
	case tokenNestc:
	case tokenTagc:
	case tokenDsc:
	case tokenVi:
	  message(ParserMessages::attributeValueExpected);
	  return 0;
	case tokenNameStart:
	case tokenDigit:
	case tokenLcUcNmchar:
	  if (!sd().attributeValueNotLiteral())
	    message(ParserMessages::attributeValueShorttag);
	  extendNameToken(syntax().litlen() >= syntax().normsep()
			  ? syntax().litlen() - syntax().normsep()
			  : 0,
			  ParserMessages::attributeValueLength);
	  if (currentMarkup())
	    currentMarkup()->addAttributeValue(currentInput());
	  break;
	case tokenLit:
	case tokenLita:
	  {
	    Text text;
	    if (!parseLiteral(token == tokenLita ? talitaMode : talitMode,
			      taliteMode,
			      syntax().litlen(),
			      ParserMessages::tokenizedAttributeValueLength,
			      (currentMarkup() ? literalDelimInfo : 0)
			      | literalNoProcess,
			      text))
	      return 0;
	    if (currentMarkup())
	      currentMarkup()->addLiteral(text);
	  }
	  break;
	default:
	  CANNOT_HAPPEN();
	}
	if (!parseAttributeParameter(tagMode, 0, parm, netEnabling))
	  return 0;
      }
      else {
	if (currentMarkup())
	  currentMarkup()->changeToAttributeValue(nameMarkupIndex);
	if (!sd().attributeOmitName())
	  message(ParserMessages::attributeNameShorttag);
      }
    }
    else {
      // It's a name token.
      if (!parseAttributeParameter(tagMode, 0, parm, netEnabling))
	return 0;
      if (!sd().attributeOmitName())
	message(ParserMessages::attributeNameShorttag);
    }
  }
  if (netEnabling)
    message(ParserMessages::startTagGroupNet);
  return 1;
}

#ifdef SP_NAMESPACE
}
#endif