From 13714a1adf25a38fdc0c4b8e183a67b2b4e7e946 Mon Sep 17 00:00:00 2001 From: Packit Date: Sep 16 2020 09:27:51 +0000 Subject: Apply patch opensp-sigsegv.patch patch_name: opensp-sigsegv.patch present_in_specfile: true --- diff --git a/lib/ExtendEntityManager.cxx b/lib/ExtendEntityManager.cxx index fb73a07..19e44c0 100644 --- a/lib/ExtendEntityManager.cxx +++ b/lib/ExtendEntityManager.cxx @@ -1238,7 +1238,8 @@ StorageObjectSpec::StorageObjectSpec() } StorageObjectSpec::StorageObjectSpec(const StorageObjectSpec& x) -: codingSystemName(x.codingSystemName), +: storageManager(x.storageManager), + codingSystemName(x.codingSystemName), codingSystem(x.codingSystem), specId(x.specId), baseId(x.baseId), @@ -1253,6 +1254,7 @@ StorageObjectSpec::StorageObjectSpec(const StorageObjectSpec& x) StorageObjectSpec& StorageObjectSpec::operator=(const StorageObjectSpec& x) { if (this != &x) { + storageManager = x.storageManager; codingSystemName = x.codingSystemName; codingSystem = x.codingSystem; specId = x.specId; diff --git a/lib/ExtendEntityManager.cxx.sigsegv b/lib/ExtendEntityManager.cxx.sigsegv new file mode 100644 index 0000000..fb73a07 --- /dev/null +++ b/lib/ExtendEntityManager.cxx.sigsegv @@ -0,0 +1,2209 @@ +// Copyright (c) 1994, 1995, 1996 James Clark +// See the file COPYING for copying permission. + +#ifdef __GNUG__ +#pragma implementation +#endif + +#include "splib.h" +#include "ExtendEntityManager.h" +#include "Message.h" +#include "MessageArg.h" +#include "OffsetOrderedList.h" +#include "rtti.h" +#include "StorageManager.h" +#include "Vector.h" +#include "NCVector.h" +#include "Owner.h" +#include "constant.h" +#include "EntityManagerMessages.h" +#include "StorageObjectPosition.h" +#include "Owner.h" +#include "CodingSystem.h" +#include "CodingSystemKit.h" +#include "InputSource.h" +#include "Mutex.h" +#include "macros.h" +#include "EntityCatalog.h" +#include "CharMap.h" + +#include +#include +#include +#include +#include + +#ifdef DECLARE_MEMMOVE +extern "C" { + void *memmove(void *, const void *, size_t); +} +#endif + +#ifdef SP_NAMESPACE +namespace SP_NAMESPACE { +#endif + +const char EOFCHAR = '\032'; // Control-Z + +class ExternalInputSource; + +class EntityManagerImpl : public ExtendEntityManager { +public: + EntityManagerImpl(StorageManager *defaultStorageManager, + const InputCodingSystem *defaultCodingSystem, + const ConstPtr &, + Boolean internalCharsetIsDocCharset); + void setCatalogManager(CatalogManager *catalogManager); + void registerStorageManager(StorageManager *); + InputSource *open(const StringC &sysid, + const CharsetInfo &, + InputSourceOrigin *, + unsigned flags, + Messenger &); + const CharsetInfo &charset() const; + Boolean internalCharsetIsDocCharset() const; + ConstPtr makeCatalog(StringC &systemId, + const CharsetInfo &charset, + Messenger &mgr); + Boolean expandSystemId(const StringC &, + const Location &, + Boolean isNdata, + const CharsetInfo &, + const StringC *, + Messenger &, + StringC &); + Boolean mergeSystemIds(const Vector &, + Boolean mapCatalogDocument, + const CharsetInfo &, + Messenger &mgr, + StringC &) const; + StorageManager *lookupStorageType(const StringC &, const CharsetInfo &) const; + StorageManager *lookupStorageType(const char *) const; + StorageManager *guessStorageType(const StringC &, const CharsetInfo &) const; + const InputCodingSystem *lookupCodingSystem(const StringC &, + const CharsetInfo &, + Boolean isBctf, + const char *&) const; + Boolean resolveSystemId(const StringC &str, + const CharsetInfo &idCharset, + Messenger &mgr, + const Location &defLocation, + Boolean isNdata, + ParsedSystemId &parsedSysid) const; + Boolean parseSystemId(const StringC &str, + const CharsetInfo &idCharset, + Boolean isNdata, + const StorageObjectLocation *def, + Messenger &mgr, + ParsedSystemId &parsedSysid) const; + const CharsetInfo &internalCharset(const CharsetInfo &docCharset) const { + if (internalCharsetIsDocCharset_) + return docCharset; + else + return charset(); + } +private: + EntityManagerImpl(const EntityManagerImpl &); // undefined + void operator=(const EntityManagerImpl &); // undefined + static Boolean defLocation(const Location &, StorageObjectLocation &); + static Boolean matchKey(const StringC &type, const char *s, + const CharsetInfo &internalCharset); + NCVector > storageManagers_; + Owner defaultStorageManager_; + const InputCodingSystem *defaultCodingSystem_; + Owner catalogManager_; + Boolean internalCharsetIsDocCharset_; + ConstPtr codingSystemKit_; + friend class FSIParser; +}; + +class ExternalInfoImpl : public ExternalInfo { + RTTI_CLASS +public: + ExternalInfoImpl(ParsedSystemId &parsedSysid); + const StorageObjectSpec &spec(size_t i) const; + size_t nSpecs() const; + const ParsedSystemId &parsedSystemId() const; + void noteRS(Offset); + void noteStorageObjectEnd(Offset); + void noteInsertedRSs(); + void setDecoder(size_t i, Decoder *); + void setId(size_t i, StringC &); + void getId(size_t i, StringC &) const; + Boolean convertOffset(Offset, StorageObjectLocation &) const; +private: + ParsedSystemId parsedSysid_; + NCVector position_; + size_t currentIndex_; + // list of inserted RSs + OffsetOrderedList rsList_; + Boolean notrack_; + Mutex mutex_; +}; + +class ExternalInputSource : public InputSource { +public: + ExternalInputSource(ParsedSystemId &parsedSysid, + const CharsetInfo &internalCharset, + const CharsetInfo &docCharset, + Boolean internalCharsetIsDocCharset, + Char replacementChar, + InputSourceOrigin *origin, + unsigned flags); + void pushCharRef(Char, const NamedCharRef &); + ~ExternalInputSource(); +private: + Xchar fill(Messenger &); + Boolean rewind(Messenger &); + void willNotRewind(); + void setDocCharset(const CharsetInfo &, const CharsetInfo &); + void willNotSetDocCharset(); + + void init(); + void noteRS(); + void noteRSAt(const Char *); + void reallocateBuffer(size_t size); + void insertChar(Char); + void buildMap(const CharsetInfo &internalCharset, + const CharsetInfo &docCharset); + void buildMap1(const CharsetInfo &, const CharsetInfo &); + static const Char *findNextCr(const Char *start, const Char *end); + static const Char *findNextLf(const Char *start, const Char *end); + static const Char *findNextCrOrLf(const Char *start, const Char *end); + + ExternalInfoImpl *info_; + Char *buf_; + const Char *bufLim_; + Offset bufLimOffset_; + size_t bufSize_; + size_t readSize_; + NCVector > sov_; + StorageObject *so_; + size_t soIndex_; + Boolean insertRS_; + Decoder *decoder_; + const char *leftOver_; + size_t nLeftOver_; + Boolean mayRewind_; + Boolean maySetDocCharset_; + Boolean mayNotExist_; + enum RecordType { + unknown, + crUnknown, + crlf, + lf, + cr, + asis + }; + RecordType recordType_; + Boolean zapEof_; + Boolean internalCharsetIsDocCharset_; + Char replacementChar_; + Ptr > map_; +}; + +class FSIParser { +public: + FSIParser(const StringC &, const CharsetInfo &idCharset, + Boolean isNdata, + const StorageObjectLocation *defLoc, + const EntityManagerImpl *em, + Messenger &mgr); + Boolean parse(ParsedSystemId &parsedSysid); + static const char *recordsName(StorageObjectSpec::Records records); + struct RecordType { + const char *name; + StorageObjectSpec::Records value; + }; +private: + Boolean handleInformal(size_t startIndex, ParsedSystemId &parsedSysid); + Boolean convertId(StringC &, Xchar smcrd, const StorageManager *); + Xchar get(); + void unget(); + StorageManager *lookupStorageType(const StringC &key, Boolean &neutral); + Boolean matchKey(const StringC &, const char *); + Boolean matchChar(Xchar, char); + Boolean isS(Xchar); + Boolean convertDigit(Xchar c, int &weight); + void uncharref(StringC &); + Boolean setAttributes(StorageObjectSpec &sos, Boolean neutral, + Xchar &smcrd, Boolean &fold); + Boolean setCatalogAttributes(ParsedSystemId &parsedSysid); + void setDefaults(StorageObjectSpec &sos); + Boolean parseAttribute(StringC &token, Boolean &gotValue, StringC &value); + Boolean lookupRecords(const StringC &token, StorageObjectSpec::Records &); + void convertMinimumLiteral(const StringC &from, StringC &to); + + const StringC &str_; + size_t strIndex_; + Messenger &mgr_; + const EntityManagerImpl *em_; + const StorageObjectSpec *defSpec_; + const StringC *defId_; + const CharsetInfo &idCharset_; + Boolean isNdata_; + static RecordType recordTypeTable[]; +}; + +const Char RS = '\n'; +const Char RE = '\r'; + + +ExtendEntityManager::~ExtendEntityManager() +{ +} + +ExtendEntityManager::CatalogManager::~CatalogManager() +{ +} + +ExtendEntityManager *ExtendEntityManager::make(StorageManager *sm, + const InputCodingSystem *cs, + const ConstPtr &csKit, + Boolean internalCharsetIsDocCharset) +{ + return new EntityManagerImpl(sm, cs, csKit, internalCharsetIsDocCharset); +} + +Boolean ExtendEntityManager::externalize(const ExternalInfo *info, + Offset off, + StorageObjectLocation &loc) +{ + if (!info) + return false; + const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info); + if (!p) + return false; + return p->convertOffset(off, loc); +} + +const ParsedSystemId * +ExtendEntityManager::externalInfoParsedSystemId(const ExternalInfo *info) +{ + if (!info) + return 0; + const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info); + if (!p) + return 0; + return &p->parsedSystemId(); +} + +EntityManagerImpl::EntityManagerImpl(StorageManager *defaultStorageManager, + const InputCodingSystem *defaultCodingSystem, + const ConstPtr &codingSystemKit, + Boolean internalCharsetIsDocCharset) +: defaultStorageManager_(defaultStorageManager), + defaultCodingSystem_(defaultCodingSystem), + codingSystemKit_(codingSystemKit), + internalCharsetIsDocCharset_(internalCharsetIsDocCharset) +{ +} + +Boolean EntityManagerImpl::internalCharsetIsDocCharset() const +{ + return internalCharsetIsDocCharset_; +} + +const CharsetInfo &EntityManagerImpl::charset() const +{ + return codingSystemKit_->systemCharset(); +} + +InputSource *EntityManagerImpl::open(const StringC &sysid, + const CharsetInfo &docCharset, + InputSourceOrigin *origin, + unsigned flags, + Messenger &mgr) +{ + ParsedSystemId parsedSysid; + if (!parseSystemId(sysid, docCharset, (flags & ExtendEntityManager::isNdata) != 0, + 0, mgr, parsedSysid) + || !catalogManager_->mapCatalog(parsedSysid, this, mgr)) + return 0; + return new ExternalInputSource(parsedSysid, + charset(), + docCharset, + internalCharsetIsDocCharset_, + codingSystemKit_->replacementChar(), + origin, flags); +} + + +ConstPtr +EntityManagerImpl::makeCatalog(StringC &systemId, + const CharsetInfo &docCharset, + Messenger &mgr) +{ + return catalogManager_->makeCatalog(systemId, docCharset, this, mgr); +} + +Boolean +EntityManagerImpl::mergeSystemIds(const Vector &sysids, + Boolean mapCatalogDocument, + const CharsetInfo &docCharset, + Messenger &mgr, + StringC &result) const +{ + ParsedSystemId parsedSysid; + if (mapCatalogDocument) { + parsedSysid.maps.resize(parsedSysid.maps.size() + 1); + parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument; + } + for (size_t i = 0; i < sysids.size(); i++) + if (!parseSystemId(sysids[i], + docCharset, + 0, + 0, + mgr, + parsedSysid)) + return 0; + parsedSysid.unparse(internalCharset(docCharset), 0, result); + return 1; +} + +Boolean +EntityManagerImpl::expandSystemId(const StringC &str, + const Location &defLoc, + Boolean isNdata, + const CharsetInfo &docCharset, + const StringC *mapCatalogPublic, + Messenger &mgr, + StringC &result) +{ + ParsedSystemId parsedSysid; + StorageObjectLocation defSoLoc; + const StorageObjectLocation *defSoLocP; + if (defLocation(defLoc, defSoLoc)) + defSoLocP = &defSoLoc; + else + defSoLocP = 0; + if (!parseSystemId(str, docCharset, isNdata, defSoLocP, mgr, parsedSysid)) + return 0; + if (mapCatalogPublic) { + ParsedSystemId::Map map; + map.type = ParsedSystemId::Map::catalogPublic; + map.publicId = *mapCatalogPublic; + parsedSysid.maps.insert(parsedSysid.maps.begin(), 1, map); + } + parsedSysid.unparse(internalCharset(docCharset), isNdata, result); + return 1; +} + +Boolean EntityManagerImpl::parseSystemId(const StringC &str, + const CharsetInfo &docCharset, + Boolean isNdata, + const StorageObjectLocation *defLoc, + Messenger &mgr, + ParsedSystemId &parsedSysid) const +{ + FSIParser fsiParser(str, internalCharset(docCharset), isNdata, defLoc, this, mgr); + return fsiParser.parse(parsedSysid); +} + +StorageManager * +EntityManagerImpl::guessStorageType(const StringC &type, + const CharsetInfo &internalCharset) const +{ + for (size_t i = 0; i < storageManagers_.size(); i++) + if (storageManagers_[i]->guessIsId(type, internalCharset)) + return storageManagers_[i].pointer(); + if (defaultStorageManager_->guessIsId(type, internalCharset)) + return defaultStorageManager_.pointer(); + return 0; +} + +StorageManager * +EntityManagerImpl::lookupStorageType(const StringC &type, + const CharsetInfo &internalCharset) const +{ + if (type.size() == 0) + return 0; + if (matchKey(type, defaultStorageManager_->type(), internalCharset)) + return defaultStorageManager_.pointer(); + for (size_t i = 0; i < storageManagers_.size(); i++) + if (matchKey(type, storageManagers_[i]->type(), internalCharset)) + return storageManagers_[i].pointer(); + return 0; +} + +StorageManager * +EntityManagerImpl::lookupStorageType(const char *type) const +{ + if (type == defaultStorageManager_->type()) + return defaultStorageManager_.pointer(); + for (size_t i = 0; i < storageManagers_.size(); i++) + if (type == storageManagers_[i]->type()) + return storageManagers_[i].pointer(); + return 0; +} + +const InputCodingSystem * +EntityManagerImpl::lookupCodingSystem(const StringC &type, + const CharsetInfo &internalCharset, + Boolean isBctf, + const char *&name) const +{ + return codingSystemKit_->makeInputCodingSystem(type, internalCharset, isBctf, name); +} + +Boolean +EntityManagerImpl::matchKey(const StringC &type, + const char *s, + const CharsetInfo &internalCharset) +{ + if (strlen(s) != type.size()) + return false; + for (size_t i = 0; i < type.size(); i++) + if (internalCharset.execToDesc(toupper(s[i])) != type[i] + && internalCharset.execToDesc(tolower(s[i])) != type[i]) + return false; + return true; +} + +void EntityManagerImpl::registerStorageManager(StorageManager *sm) +{ + storageManagers_.resize(storageManagers_.size() + 1); + storageManagers_.back() = sm; +} + +void EntityManagerImpl::setCatalogManager(CatalogManager *catalogManager) +{ + catalogManager_ = catalogManager; +} + +Boolean +EntityManagerImpl::defLocation(const Location &defLocation, + StorageObjectLocation &soLoc) +{ + Offset off; + const ExternalInfo *info; + const Origin *origin = defLocation.origin().pointer(); + Index index = defLocation.index(); + for (;;) { + if (!origin) + return 0; + const InputSourceOrigin *inputSourceOrigin = origin->asInputSourceOrigin(); + if (inputSourceOrigin) { + off = inputSourceOrigin->startOffset(index); + info = inputSourceOrigin->externalInfo(); + if (info) + break; + if (!inputSourceOrigin->defLocation(off, origin, index)) + return 0; + } + else { + const Location &parentLoc = origin->parent(); + origin = parentLoc.origin().pointer(); + index = parentLoc.index(); + } + } + return ExtendEntityManager::externalize(info, off, soLoc); +} + +class UnbufferingStorageObject : public StorageObject { +public: + UnbufferingStorageObject(StorageObject *sub, + const Boolean *unbuffer) + : sub_(sub), buf_(0), bufAvail_(0), bufNext_(0), unbuffer_(unbuffer) { } + ~UnbufferingStorageObject() { delete [] buf_; } + Boolean read(char *buf, size_t bufSize, Messenger &mgr, + size_t &nread) { + if (bufNext_ >= bufAvail_) { + bufAvail_ = bufNext_ = 0; + if (!*unbuffer_) + return sub_->read(buf, bufSize, mgr, nread); + if (buf_ == 0) + buf_ = new char[bufSize_ = bufSize]; + if (!sub_->read(buf_, bufSize_, mgr, bufAvail_)) + return 0; + } + *buf = buf_[bufNext_++]; + nread = 1; + return 1; + } + Boolean rewind(Messenger &mgr) { + bufAvail_ = bufNext_ = 0; + return sub_->rewind(mgr); + } + void willNotRewind() { sub_->willNotRewind(); } + size_t getBlockSize() const { return sub_->getBlockSize(); } +private: + Owner sub_; + size_t bufSize_; + size_t bufAvail_; + size_t bufNext_; + char *buf_; + const Boolean *unbuffer_; +}; + +class MappingDecoder : public Decoder { +public: + MappingDecoder(Decoder *, + const ConstPtr > &); + Boolean convertOffset(unsigned long &offset) const; + size_t decode(Char *, const char *, size_t, const char **); +private: + Owner sub_; + ConstPtr > map_; +}; + +MappingDecoder::MappingDecoder(Decoder *sub, + const ConstPtr > &map) +: Decoder(sub->minBytesPerChar()), sub_(sub), map_(map) +{ +} + +size_t MappingDecoder::decode(Char *to, const char *s, + size_t slen, const char **rest) +{ + size_t n = sub_->decode(to, s, slen, rest); + const CharMap &map = *map_; + for (size_t i = 0; i < n; i++) { + Unsigned32 d = map[to[i]]; + if (d & (unsigned(1) << 31)) + to[i] = (d & ~(unsigned(1) << 31)); + else + to[i] += d; + } + return n; +} + +Boolean MappingDecoder::convertOffset(unsigned long &offset) const +{ + return sub_->convertOffset(offset); +} + +ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid, + const CharsetInfo &systemCharset, + const CharsetInfo &docCharset, + Boolean internalCharsetIsDocCharset, + Char replacementChar, + InputSourceOrigin *origin, + unsigned flags) +: InputSource(origin, 0, 0), + mayRewind_((flags & EntityManager::mayRewind) != 0), + mayNotExist_((flags & ExtendEntityManager::mayNotExist) != 0), + sov_(parsedSysid.size()), + internalCharsetIsDocCharset_(internalCharsetIsDocCharset), + // hack + maySetDocCharset_((flags & EntityManager::maySetDocCharset) != 0), + replacementChar_(replacementChar) +{ + for (size_t i = 0; i < parsedSysid.size(); i++) { + if (parsedSysid[i].codingSystemType + != (internalCharsetIsDocCharset + ? StorageObjectSpec::bctf + : StorageObjectSpec::encoding) + && parsedSysid[i].codingSystemType != StorageObjectSpec::special) { + map_ = new CharMapResource; + buildMap(systemCharset, docCharset); + break; + } + } + for (size_t i = 0; i < sov_.size(); i++) + sov_[i] = 0; + init(); + info_ = new ExternalInfoImpl(parsedSysid); + origin->setExternalInfo(info_); +} + +void ExternalInputSource::setDocCharset(const CharsetInfo &docCharset, + const CharsetInfo &systemCharset) +{ + if (!map_.isNull()) + buildMap(systemCharset, docCharset); + willNotSetDocCharset(); +} + +void ExternalInputSource::willNotSetDocCharset() +{ + maySetDocCharset_ = 0; +} + +void ExternalInputSource::buildMap(const CharsetInfo &systemCharset, + const CharsetInfo &docCharset) +{ + CharMap &map = *map_; + // FIXME How should invalidChar be chosen when internalCharsetIsDocCharset_? + Char invalidChar + = internalCharsetIsDocCharset_ ? 0 : replacementChar_; + map.setAll((Unsigned32(1) << 31) | invalidChar); + if (internalCharsetIsDocCharset_) + buildMap1(systemCharset, docCharset); + else + buildMap1(docCharset, systemCharset); +} + +void ExternalInputSource::buildMap1(const CharsetInfo &fromCharset, + const CharsetInfo &toCharset) +{ + UnivCharsetDescIter iter(fromCharset.desc()); + for (;;) { + WideChar descMin, descMax; + UnivChar univMin; + if (!iter.next(descMin, descMax, univMin)) + break; + if (descMin > charMax) + break; + if (descMax > charMax) + descMax = charMax; + WideChar totalCount = 1 + (descMax - descMin); + do { + WideChar count; + WideChar toMin; + ISet set; + int nMap = toCharset.univToDesc(univMin, toMin, set, count); + if (count > totalCount) + count = totalCount; + if (nMap && toMin <= charMax) { + Char toMax; + if (count - 1 > charMax - toMin) + toMax = charMax; + else + toMax = toMin + (count - 1); + map_->setRange(descMin, descMin + (toMax - toMin), Char(toMin - descMin)); + } + descMin += count; + univMin += count; + totalCount -= count; + } while (totalCount > 0); + } +} + +void ExternalInputSource::init() +{ + so_ = 0; + buf_ = 0; + bufSize_ = 0; + bufLim_ = 0; + bufLimOffset_ = 0; + insertRS_ = true; + soIndex_ = 0; + leftOver_ = 0; + nLeftOver_ = 0; +} + +ExternalInputSource::~ExternalInputSource() +{ + if (buf_) + delete [] buf_; +} + +Boolean ExternalInputSource::rewind(Messenger &mgr) +{ + reset(0, 0); + if (buf_) + delete [] buf_; + // reset makes a new EntityOrigin + ParsedSystemId parsedSysid(info_->parsedSystemId()); + ExternalInfoImpl *oldInfo = info_; + info_ = new ExternalInfoImpl(parsedSysid); + so_ = 0; + for (size_t i = 0; i < soIndex_; i++) { + if (sov_[i] && !sov_[i]->rewind(mgr)) + return 0; + StringC tem; + oldInfo->getId(i, tem); + info_->setId(i, tem); + } + inputSourceOrigin()->setExternalInfo(info_); + init(); + return 1; +} + +void ExternalInputSource::willNotRewind() +{ + for (size_t i = 0; i < sov_.size(); i++) + if (sov_[i]) + sov_[i]->willNotRewind(); + mayRewind_ = 0; +} + + +// Round up N so that it is a power of TO. +// TO must be a power of 2. + +inline +size_t roundUp(size_t n, size_t to) +{ + return (n + (to - 1)) & ~(to - 1); +} + +inline +void ExternalInputSource::noteRSAt(const Char *p) +{ + info_->noteRS(bufLimOffset_ - (bufLim_ - p)); +} + +inline +void ExternalInputSource::noteRS() +{ + noteRSAt(cur()); +} + +Xchar ExternalInputSource::fill(Messenger &mgr) +{ + ASSERT(cur() == end()); + while (end() >= bufLim_) { + // need more data + while (so_ == 0) { + if (soIndex_ >= sov_.size()) + return eE; + if (soIndex_ > 0) + info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end())); + const StorageObjectSpec &spec = info_->spec(soIndex_); + if (!sov_[soIndex_]) { + StringC id; + if (mayNotExist_) { + NullMessenger nullMgr; + sov_[soIndex_] + = spec.storageManager->makeStorageObject(spec.specId, spec.baseId, + spec.search, + mayRewind_, nullMgr, id); + } + else + sov_[soIndex_] + = spec.storageManager->makeStorageObject(spec.specId, spec.baseId, + spec.search, + mayRewind_, mgr, id); + info_->setId(soIndex_, id); + } + so_ = sov_[soIndex_].pointer(); + if (so_) { + decoder_ = spec.codingSystem->makeDecoder(); + if (spec.codingSystemType != StorageObjectSpec::special + && spec.codingSystemType != (internalCharsetIsDocCharset_ + ? StorageObjectSpec::bctf + : StorageObjectSpec::encoding)) { + decoder_ = new MappingDecoder(decoder_, map_); + if (maySetDocCharset_) { + sov_[soIndex_] = new UnbufferingStorageObject(sov_[soIndex_].extract(), &maySetDocCharset_); + so_ = sov_[soIndex_].pointer(); + } + } + info_->setDecoder(soIndex_, decoder_); + zapEof_ = spec.zapEof; + switch (spec.records) { + case StorageObjectSpec::asis: + recordType_ = asis; + insertRS_ = false; + break; + case StorageObjectSpec::cr: + recordType_ = cr; + break; + case StorageObjectSpec::lf: + recordType_ = lf; + break; + case StorageObjectSpec::crlf: + recordType_ = crlf; + break; + case StorageObjectSpec::find: + recordType_ = unknown; + break; + default: + CANNOT_HAPPEN(); + } + soIndex_++; + readSize_ = so_->getBlockSize(); + nLeftOver_ = 0; + break; + } + else + setAccessError(); + soIndex_++; + } + + size_t keepSize = end() - start(); + const size_t align = sizeof(int)/sizeof(Char); + size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char); + readSizeChars = roundUp(readSizeChars, align); + size_t neededSize; // in Chars + size_t startOffset; + // compute neededSize and readSize + unsigned minBytesPerChar = decoder_->minBytesPerChar(); + if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) { + // In this case we want to do decoding in place. + // FIXME It might be a win on some systems (Irix?) to arrange that the + // read buffer is on a page boundary. + + if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_) + abort(); // FIXME throw an exception + + // Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0 + if (readSizeChars + > size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize) + abort(); + neededSize = roundUp(readSizeChars + keepSize + insertRS_, align); + startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_) + - readSizeChars - insertRS_ - keepSize); + } + else { + // Needs to be room for everything before decoding. + neededSize = (keepSize + insertRS_ + readSizeChars + + (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char)); + // Also must be room for everything after decoding. + size_t neededSize2 + = (keepSize + insertRS_ + // all the converted characters + + (nLeftOver_ + readSize_)/minBytesPerChar + // enough Chars to contain left over bytes + + ((readSize_ % minBytesPerChar + sizeof(Char) - 1) + / sizeof(Char))); + if (neededSize2 > neededSize) + neededSize = neededSize2; + neededSize = roundUp(neededSize, align); + if (neededSize > size_t(-1)/sizeof(Char)) + abort(); + startOffset = 0; + } + if (bufSize_ < neededSize) + reallocateBuffer(neededSize); + Char *newStart = buf_ + startOffset; + if (newStart != start() && keepSize > 0) + memmove(newStart, start(), keepSize*sizeof(Char)); + char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_; + if (nLeftOver_ > 0 && leftOver_ != bytesStart) + memmove(bytesStart, leftOver_, nLeftOver_); + moveStart(newStart); + bufLim_ = end(); + + size_t nread; + if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_, + mgr, nread)) { + if (nread > 0) { + const char *bytesEnd = bytesStart + nLeftOver_ + nread; + size_t nChars = decoder_->decode((Char *)end() + insertRS_, + bytesStart, + nLeftOver_ + nread + - (zapEof_ && bytesEnd[-1] == EOFCHAR), + &leftOver_); + nLeftOver_ = bytesEnd - leftOver_; + if (nChars > 0) { + if (insertRS_) { + noteRS(); + *(Char *)end() = RS; + advanceEnd(end() + 1); + insertRS_ = false; + bufLim_ += 1; + bufLimOffset_ += 1; + } + bufLim_ += nChars; + bufLimOffset_ += nChars; + break; + } + } + } + else + so_ = 0; + } + ASSERT(end() < bufLim_); + if (insertRS_) { + noteRS(); + insertChar(RS); + insertRS_ = false; + bufLimOffset_ += 1; + } + switch (recordType_) { + case unknown: + { + const Char *e = findNextCrOrLf(end(), bufLim_); + if (e) { + if (*e == '\n') { + recordType_ = lf; + info_->noteInsertedRSs(); + *(Char *)e = RE; + advanceEnd(e + 1); + insertRS_ = true; + } + else { + if (e + 1 < bufLim_) { + if (e[1] == '\n') { + recordType_ = crlf; + advanceEnd(e + 1); + if (e + 2 == bufLim_) { + bufLim_--; + bufLimOffset_--; + insertRS_ = true; + } + } + else { + advanceEnd(e + 1); + recordType_ = cr; + info_->noteInsertedRSs(); + insertRS_ = true; + } + } + else { + recordType_ = crUnknown; + advanceEnd(e + 1); + } + } + } + else + advanceEnd(bufLim_); + } + break; + case crUnknown: + { + if (*cur() == '\n') { + noteRS(); + advanceEnd(cur() + 1); + recordType_ = crlf; + } + else { + advanceEnd(cur() + 1); + insertRS_ = true; + recordType_ = cr; + info_->noteInsertedRSs(); + } + } + break; + case lf: + { + Char *e = (Char *)findNextLf(end(), bufLim_); + if (e) { + advanceEnd(e + 1); + *e = RE; + insertRS_ = true; + } + else + advanceEnd(bufLim_); + } + break; + case cr: + { + const Char *e = findNextCr(end(), bufLim_); + if (e) { + advanceEnd(e + 1); + insertRS_ = true; + } + else + advanceEnd(bufLim_); + } + break; + case crlf: + { + const Char *e = end(); + for (;;) { + e = findNextLf(e, bufLim_); + if (!e) { + advanceEnd(bufLim_); + break; + } + // Need to delete final RS if not followed by anything. + if (e + 1 == bufLim_) { + bufLim_--; + bufLimOffset_--; + advanceEnd(e); + insertRS_ = true; + if (cur() == end()) + return fill(mgr); + break; + } + noteRSAt(e); + e++; + } + } + break; + case asis: + advanceEnd(bufLim_); + break; + default: + CANNOT_HAPPEN(); + } + ASSERT(cur() < end()); + return nextChar(); +} + +const Char *ExternalInputSource::findNextCr(const Char *start, + const Char *end) +{ + for (; start < end; start++) + if (*start == '\r') + return start; + return 0; +} + +const Char *ExternalInputSource::findNextLf(const Char *start, + const Char *end) +{ + for (; start < end; start++) + if (*start == '\n') + return start; + return 0; +} + +const Char *ExternalInputSource::findNextCrOrLf(const Char *start, + const Char *end) +{ + for (; start < end; start++) + if (*start == '\n' || *start == '\r') + return start; + return 0; +} + +void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref) +{ + ASSERT(cur() == start()); + noteCharRef(startIndex() + (cur() - start()), ref); + insertChar(ch); +} + +void ExternalInputSource::insertChar(Char ch) +{ + if (start() > buf_) { + if (cur() > start()) + memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char)); + moveLeft(); + *(Char *)cur() = ch; + } + else { + // must have start == buf + if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char)) + == bufLim_) { + if (bufSize_ == size_t(-1)) + abort(); // FIXME throw an exception + reallocateBuffer(bufSize_ + 1); + } + else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) { + char *s = (char *)(buf_ + bufSize_) - nLeftOver_; + memmove(s, leftOver_, nLeftOver_); + leftOver_ = s; + } + if (cur() < bufLim_) + memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char)); + *(Char *)cur() = ch; + advanceEnd(end() + 1); + bufLim_ += 1; + } +} + +void ExternalInputSource::reallocateBuffer(size_t newSize) +{ + Char *newBuf = new Char[newSize]; + + memcpy(newBuf, buf_, bufSize_*sizeof(Char)); + bufSize_ = newSize; + changeBuffer(newBuf, buf_); + bufLim_ = newBuf + (bufLim_ - buf_); + if (nLeftOver_ > 0) { + char *s = (char *)(newBuf + bufSize_) - nLeftOver_; + memmove(s, + (char *)newBuf + (leftOver_ - (char *)buf_), + nLeftOver_); + leftOver_ = s; + } + delete [] buf_; + buf_ = newBuf; +} + +RTTI_DEF1(ExternalInfoImpl, ExternalInfo) + +ExternalInfoImpl::ExternalInfoImpl(ParsedSystemId &parsedSysid) +: currentIndex_(0), position_(parsedSysid.size()) +{ + parsedSysid.swap(parsedSysid_); + if (parsedSysid_.size() > 0) + notrack_ = parsedSysid_[0].notrack; +} + +void ExternalInfoImpl::setId(size_t i, StringC &id) +{ + Mutex::Lock lock(&mutex_); + id.swap(position_[i].id); +} + +void ExternalInfoImpl::getId(size_t i, StringC &id) const +{ + Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_); + id = position_[i].id; +} + +void ExternalInfoImpl::setDecoder(size_t i, Decoder *decoder) +{ + Mutex::Lock lock(&mutex_); + position_[i].decoder = decoder; +} + +void ExternalInfoImpl::noteInsertedRSs() +{ + position_[currentIndex_].insertedRSs = 1; +} + +void ExternalInfoImpl::noteRS(Offset offset) +{ + // We do the locking in OffsetOrderedList. + if (!notrack_) + rsList_.append(offset); + if (offset + == (currentIndex_ == 0 ? 0 : position_[currentIndex_- 1].endOffset)) + position_[currentIndex_].startsWithRS = 1; +} + +void ExternalInfoImpl::noteStorageObjectEnd(Offset offset) +{ + Mutex::Lock lock(&mutex_); + ASSERT(currentIndex_ < position_.size()); + // The last endOffset_ must be -1. + if (currentIndex_ < position_.size() - 1) { + position_[currentIndex_++].endOffset = offset; + position_[currentIndex_].line1RS = rsList_.size(); + notrack_ = parsedSysid_[currentIndex_].notrack; + } +} + +Boolean ExternalInfoImpl::convertOffset(Offset off, + StorageObjectLocation &ret) const +{ + Mutex::Lock lock(&((ExternalInfoImpl *)this)->mutex_); + if (off == Offset(-1) || position_.size() == 0) + return false; + // the last endOffset_ is Offset(-1), so this will + // terminate + int i; + for (i = 0; off >= position_[i].endOffset; i++) + ; + for (; position_[i].id.size() == 0; i--) + if (i == 0) + return false; + ret.storageObjectSpec = &parsedSysid_[i]; + ret.actualStorageId = position_[i].id; + Offset startOffset = i == 0 ? 0 : position_[i - 1].endOffset; + ret.storageObjectOffset = off - startOffset; + ret.byteIndex = ret.storageObjectOffset; + if (parsedSysid_[i].notrack + || parsedSysid_[i].records == StorageObjectSpec::asis) { + ret.lineNumber = (unsigned long)-1; + if (parsedSysid_[i].records != StorageObjectSpec::asis) { + if (position_[i].insertedRSs) + ret.byteIndex = (unsigned long)-1; + else if (ret.byteIndex > 0 && position_[i].startsWithRS) + ret.byteIndex--; // first RS is inserted + } + ret.columnNumber = (unsigned long)-1; + return true; + } + else { + size_t line1RS = position_[i].line1RS; + // line1RS is now the number of RSs that are before or on the current line. + size_t j; + Offset colStart; + if (rsList_.findPreceding(off, j, colStart)) { + if (position_[i].insertedRSs) + ret.byteIndex -= j + 1 - line1RS; + else if (ret.byteIndex > 0 && position_[i].startsWithRS) + ret.byteIndex--; // first RS is inserted + j++; + colStart++; + } + else { + j = 0; + colStart = 0; + } + // j is now the number of RSs that are before or on the current line + // colStart is the offset of the first column + ret.lineNumber = j - line1RS + 1 - position_[i].startsWithRS; + // the offset of the first column + if (colStart < startOffset) + colStart = startOffset; + // the RS that starts a line will be in column 0; + // the first real character of a line will be column 1 + ret.columnNumber = 1 + off - colStart; + } + if (!position_[i].decoder + || !position_[i].decoder->convertOffset(ret.byteIndex)) + ret.byteIndex = (unsigned long)-1; + return true; +} + +const StorageObjectSpec &ExternalInfoImpl::spec(size_t i) const +{ + return parsedSysid_[i]; +} + +size_t ExternalInfoImpl::nSpecs() const +{ + return parsedSysid_.size(); +} + +const ParsedSystemId &ExternalInfoImpl::parsedSystemId() const +{ + return parsedSysid_; +} + +StorageObjectSpec::StorageObjectSpec() +: storageManager(0), codingSystem(0), codingSystemName(0), notrack(0), + records(find), zapEof(1), search(1) +{ +} + +StorageObjectSpec::StorageObjectSpec(const StorageObjectSpec& x) +: codingSystemName(x.codingSystemName), + codingSystem(x.codingSystem), + specId(x.specId), + baseId(x.baseId), + records(x.records), + notrack(x.notrack), + zapEof(x.zapEof), + search(x.search), + codingSystemType(x.codingSystemType) +{ +} + +StorageObjectSpec& StorageObjectSpec::operator=(const StorageObjectSpec& x) +{ + if (this != &x) { + codingSystemName = x.codingSystemName; + codingSystem = x.codingSystem; + specId = x.specId; + baseId = x.baseId; + records = x.records; + notrack = x.notrack; + zapEof = x.zapEof; + search = x.search; + codingSystemType = x.codingSystemType; + } + return *this; +} + +StorageObjectSpec::~StorageObjectSpec() +{ +} + +StorageObjectPosition::StorageObjectPosition() +: endOffset(Offset(-1)), line1RS(0), startsWithRS(0), insertedRSs(0) +{ +} + +FSIParser::FSIParser(const StringC &str, + const CharsetInfo &idCharset, + Boolean isNdata, + const StorageObjectLocation *defLoc, + const EntityManagerImpl *em, + Messenger &mgr) +: str_(str), + strIndex_(0), + idCharset_(idCharset), + isNdata_(isNdata), + defSpec_(defLoc ? defLoc->storageObjectSpec : 0), + defId_(defLoc ? &defLoc->actualStorageId : 0), + em_(em), + mgr_(mgr) +{ +} + +Xchar FSIParser::get() +{ + if (strIndex_ < str_.size()) + return str_[strIndex_++]; + else + return -1; +} + +void FSIParser::unget() +{ + if (strIndex_ > 0) + strIndex_ -= 1; +} + +Boolean FSIParser::matchKey(const StringC &str, const char *s) +{ + if (strlen(s) != str.size()) + return false; + for (size_t i = 0; i < str.size(); i++) + if (idCharset_.execToDesc(toupper(s[i])) != str[i] + && idCharset_.execToDesc(tolower(s[i])) != str[i]) + return false; + return true; +} + +Boolean FSIParser::matchChar(Xchar ch, char execC) +{ + return ch == idCharset_.execToDesc(execC); +} + +Boolean FSIParser::isS(Xchar c) +{ + return (matchChar(c, ' ') + || matchChar(c, '\r') + || matchChar(c, '\n') + || matchChar(c, ' ')); +} + +Boolean FSIParser::convertDigit(Xchar c, int &weight) +{ + static const char digits[] = "0123456789"; + for (int i = 0; digits[i] != '\0'; i++) + if (matchChar(c, digits[i])) { + weight = i; + return 1; + } + return 0; +} + +Boolean FSIParser::parse(ParsedSystemId &parsedSysid) +{ + size_t startIndex = strIndex_; + if (!matchChar(get(), '<')) + return handleInformal(startIndex, parsedSysid); + StringC key; + for (;;) { + Xchar c = get(); + if (c == -1) + return handleInformal(startIndex, parsedSysid); + if (isS(c) || matchChar(c, '>')) + break; + key += Char(c); + } + unget(); + if (matchKey(key, "CATALOG")) { + if (!setCatalogAttributes(parsedSysid)) + return 0; + return parse(parsedSysid); + } + Boolean neutral; + StorageManager *sm = lookupStorageType(key, neutral); + if (!sm) + return handleInformal(startIndex, parsedSysid); + for (;;) { + parsedSysid.resize(parsedSysid.size() + 1); + StorageObjectSpec &sos = parsedSysid.back(); + sos.storageManager = sm; + Xchar smcrd; + Boolean fold; + if (!setAttributes(sos, neutral, smcrd, fold)) + return 0; + sm = 0; + StringC id; + Boolean hadData = 0; + for (;;) { + Xchar c = get(); + if (c == -1) + break; + if (matchChar(c, '<')) { + hadData = 1; + Char stago = c; + key.resize(0); + for (;;) { + c = get(); + if (c == -1) { + id += stago; + id += key; + break; + } + if (isS(c) || matchChar(c, '>')) { + unget(); + sm = lookupStorageType(key, neutral); + if (!sm) { + id += stago; + id += key; + } + break; + } + key += c; + } + if (sm) + break; + } + else if (!((!hadData && matchChar(c, '\r')) // ignored RE + || matchChar(c, '\n') )) { // ignored RS + hadData = 1; + id += c; + } + } + if (id.size() > 0 && matchChar(id[id.size() - 1], '\r')) + id.resize(id.size() - 1); + uncharref(id); + id.swap(sos.specId); + if (!convertId(sos.specId, smcrd, sos.storageManager)) + return 0; + if (neutral) { + if (!sos.storageManager->transformNeutral(sos.specId, fold, mgr_)) + return 0; + } + if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, + sos.search)) + sos.baseId.resize(0); + if (!sm) + break; + } + return 1; +} + +Boolean FSIParser::handleInformal(size_t index, ParsedSystemId &parsedSysid) +{ + parsedSysid.resize(parsedSysid.size() + 1); + StorageObjectSpec &sos = parsedSysid.back(); + sos.specId.assign(str_.data() + index, + str_.size() - index); + sos.storageManager = em_->guessStorageType(sos.specId, idCharset_); + if (!sos.storageManager) { + if (defSpec_ && defSpec_->storageManager->inheritable()) + sos.storageManager = defSpec_->storageManager; + else + sos.storageManager = em_->defaultStorageManager_.pointer(); + } + setDefaults(sos); + if (!convertId(sos.specId, -1, sos.storageManager)) + return 0; + if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search)) + sos.baseId.resize(0); + return 1; +} + +StorageManager *FSIParser::lookupStorageType(const StringC &key, + Boolean &neutral) +{ + if (matchKey(key, "NEUTRAL")) { + neutral = 1; + if (defSpec_ && defSpec_->storageManager->inheritable()) + return defSpec_->storageManager; + else + return em_->defaultStorageManager_.pointer(); + } + else { + StorageManager *sm = em_->lookupStorageType(key, idCharset_); + if (sm) + neutral = 0; + return sm; + } +} + +Boolean FSIParser::setCatalogAttributes(ParsedSystemId &parsedSysid) +{ + Boolean hadPublic = 0; + parsedSysid.maps.resize(parsedSysid.maps.size() + 1); + parsedSysid.maps.back().type = ParsedSystemId::Map::catalogDocument; + for (;;) { + StringC token, value; + Boolean gotValue; + if (!parseAttribute(token, gotValue, value)) { + mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_)); + return 0; + } + if (token.size() == 0) + break; + if (matchKey(token, "PUBLIC")) { + if (hadPublic) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(idCharset_.execToDesc("PUBLIC"))); + else if (gotValue) { + convertMinimumLiteral(value, parsedSysid.maps.back().publicId); + parsedSysid.maps.back().type = ParsedSystemId::Map::catalogPublic; + } + else + mgr_.message(EntityManagerMessages::fsiMissingValue, + StringMessageArg(token)); + hadPublic = 1; + } + else + mgr_.message(gotValue + ? EntityManagerMessages::fsiUnsupportedAttribute + : EntityManagerMessages::fsiUnsupportedAttributeToken, + StringMessageArg(token)); + } + return 1; +} + +void FSIParser::convertMinimumLiteral(const StringC &from, StringC &to) +{ + // Do just enough to ensure it can be reparsed. + to.resize(0); + for (size_t i = 0; i < from.size(); i++) { + Char c = from[i]; + if (matchChar(c, '"') || matchChar(c, '#')) + mgr_.message(EntityManagerMessages::fsiLookupChar, NumberMessageArg(c)); + else if (matchChar(c, ' ')) { + if (to.size() && to[to.size() - 1] != c) + to += c; + } + else + to += c; + } + if (to.size() && matchChar(to[to.size() - 1], ' ')) + to.resize(to.size() - 1); +} + +// FIXME This should be table driven. + +Boolean FSIParser::setAttributes(StorageObjectSpec &sos, + Boolean neutral, + Xchar &smcrd, + Boolean &fold) +{ + Boolean hadBctf = 0; + Boolean hadEncoding = 0; + Boolean hadTracking = 0; + Boolean hadSmcrd = 0; + smcrd = -1; + fold = 1; + Boolean hadRecords = 0; + Boolean hadBase = 0; + Boolean hadZapeof = 0; + Boolean hadSearch = 0; + Boolean hadFold = 0; + StorageObjectSpec::Records records; + setDefaults(sos); + for (;;) { + StringC token, value; + Boolean gotValue; + if (!parseAttribute(token, gotValue, value)) { + mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_)); + return 0; + } + if (token.size() == 0) + break; + if (matchKey(token, "BCTF")) { + if (sos.storageManager->requiredCodingSystem()) + mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable); + else if (hadBctf) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (hadEncoding) + mgr_.message(EntityManagerMessages::fsiBctfAndEncoding); + else if (gotValue) { + const char *codingSystemName; + const InputCodingSystem *codingSystem + = em_->lookupCodingSystem(value, idCharset_, 1, codingSystemName); + if (codingSystem) { + sos.codingSystem = codingSystem; + sos.codingSystemName = codingSystemName; + sos.codingSystemType = StorageObjectSpec::bctf; + } + else if (matchKey(value, "SAME")) { + if (!isNdata_) { + if (defSpec_) { + sos.codingSystem = defSpec_->codingSystem; + sos.codingSystemName = defSpec_->codingSystemName; + sos.codingSystemType = defSpec_->codingSystemType; + } + else { + sos.codingSystem = em_->defaultCodingSystem_; + sos.codingSystemName = 0; + sos.codingSystemType = (em_->internalCharsetIsDocCharset_ + ? StorageObjectSpec::bctf + : StorageObjectSpec::encoding); + } + } + } + else + mgr_.message(EntityManagerMessages::fsiUnknownBctf, + StringMessageArg(value)); + } + else + mgr_.message(EntityManagerMessages::fsiMissingValue, + StringMessageArg(token)); + hadBctf = 1; + } + else if (matchKey(token, "ENCODING")) { + if (sos.storageManager->requiredCodingSystem()) + mgr_.message(EntityManagerMessages::fsiBctfEncodingNotApplicable); + else if (hadEncoding) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (hadBctf) + mgr_.message(EntityManagerMessages::fsiBctfAndEncoding); + else if (gotValue) { + const char *codingSystemName; + const InputCodingSystem *codingSystem + = em_->lookupCodingSystem(value, idCharset_, 0, codingSystemName); + if (codingSystem) { + sos.codingSystem = codingSystem; + sos.codingSystemName = codingSystemName; + sos.codingSystemType = StorageObjectSpec::encoding; + } + else if (matchKey(value, "SAME")) { + if (!isNdata_) { + if (defSpec_) { + sos.codingSystem = defSpec_->codingSystem; + sos.codingSystemName = defSpec_->codingSystemName; + sos.codingSystemType = defSpec_->codingSystemType; + } + else { + sos.codingSystem = em_->defaultCodingSystem_; + sos.codingSystemName = 0; + sos.codingSystemType = (em_->internalCharsetIsDocCharset_ + ? StorageObjectSpec::bctf + : StorageObjectSpec::encoding); + } + } + } + else + mgr_.message(EntityManagerMessages::fsiUnknownEncoding, + StringMessageArg(value)); + } + else + mgr_.message(EntityManagerMessages::fsiMissingValue, + StringMessageArg(token)); + hadEncoding = 1; + } + else if (matchKey(token, "TRACKING")) { + if (hadTracking) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (gotValue) { + if (matchKey(value, "NOTRACK")) + sos.notrack = 1; + else if (!matchKey(value, "TRACK")) + mgr_.message(EntityManagerMessages::fsiBadTracking, + StringMessageArg(value)); + } + else + mgr_.message(EntityManagerMessages::fsiMissingValue, + StringMessageArg(token)); + hadTracking = 1; + } + else if (matchKey(token, "ZAPEOF")) { + if (sos.storageManager->requiredCodingSystem()) + mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable); + else if (hadZapeof) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (gotValue) { + if (matchKey(value, "ZAPEOF")) + sos.zapEof = 1; + else if (matchKey(value, "NOZAPEOF")) + sos.zapEof = 0; + else + mgr_.message(EntityManagerMessages::fsiBadZapeof, + StringMessageArg(value)); + } + else + sos.zapEof = 1; + hadZapeof = 1; + } + else if (matchKey(token, "NOZAPEOF")) { + if (sos.storageManager->requiredCodingSystem()) + mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable); + else if (hadZapeof) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(idCharset_.execToDesc("ZAPEOF"))); + else if (gotValue) + mgr_.message(EntityManagerMessages::fsiValueAsName, + StringMessageArg(token)); + else + sos.zapEof = 0; + hadZapeof = 1; + } + else if (matchKey(token, "SEARCH")) { + if (hadSearch) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (gotValue) { + if (matchKey(value, "SEARCH")) + sos.search = 1; + else if (matchKey(value, "NOSEARCH")) + sos.search = 0; + else + mgr_.message(EntityManagerMessages::fsiBadSearch, + StringMessageArg(value)); + } + else + sos.search = 1; + hadSearch = 1; + } + else if (matchKey(token, "NOSEARCH")) { + if (hadSearch) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(idCharset_.execToDesc("SEARCH"))); + else if (gotValue) + mgr_.message(EntityManagerMessages::fsiValueAsName, + StringMessageArg(token)); + else + sos.search = 0; + hadSearch = 1; + } + else if (matchKey(token, "FOLD")) { + if (!neutral) + mgr_.message(EntityManagerMessages::fsiFoldNotNeutral); + else if (hadFold) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (gotValue) { + if (matchKey(value, "FOLD")) + fold = 1; + else if (matchKey(value, "NOFOLD")) + fold = 0; + else + mgr_.message(EntityManagerMessages::fsiBadFold, + StringMessageArg(value)); + } + else + fold = 1; + hadFold = 1; + } + else if (matchKey(token, "NOFOLD")) { + if (!neutral) + mgr_.message(EntityManagerMessages::fsiFoldNotNeutral); + else if (hadFold) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(idCharset_.execToDesc("FOLD"))); + else if (gotValue) + mgr_.message(EntityManagerMessages::fsiValueAsName, + StringMessageArg(token)); + else + fold = 0; + hadFold = 1; + } + else if (matchKey(token, "SMCRD")) { + if (hadSmcrd) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (gotValue) { + if (value.size() == 0) + smcrd = -1; + else if (value.size() == 1) + smcrd = value[0]; + else + mgr_.message(EntityManagerMessages::fsiBadSmcrd, + StringMessageArg(value)); + } + else + mgr_.message(EntityManagerMessages::fsiMissingValue, + StringMessageArg(token)); + hadSmcrd = 1; + } + else if (matchKey(token, "RECORDS")) { + if (sos.storageManager->requiresCr()) + mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable); + else if (hadRecords) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (gotValue) { + if (!lookupRecords(value, sos.records)) + mgr_.message(EntityManagerMessages::fsiUnsupportedRecords, + StringMessageArg(value)); + } + else + mgr_.message(EntityManagerMessages::fsiMissingValue, + StringMessageArg(token)); + hadRecords = 1; + } + else if (matchKey(token, "SOIBASE")) { + if (hadBase) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(token)); + else if (gotValue) + value.swap(sos.baseId); + else { + mgr_.message(EntityManagerMessages::fsiMissingValue, + StringMessageArg(token)); + sos.baseId.resize(0); + } + hadBase = 1; + } + else if (lookupRecords(token, records)) { + if (sos.storageManager->requiresCr()) + mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable); + else if (hadRecords) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(idCharset_.execToDesc("RECORDS"))); + else if (!gotValue) + sos.records = records; + else + mgr_.message(EntityManagerMessages::fsiValueAsName, + StringMessageArg(token)); + hadRecords = 1; + } + else if (matchKey(token, "NOTRACK")) { + if (hadTracking) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(idCharset_.execToDesc("TRACKING"))); + else if (!gotValue) + sos.notrack = 1; + else + mgr_.message(EntityManagerMessages::fsiValueAsName, + StringMessageArg(token)); + hadTracking = 1; + } + else if (matchKey(token, "TRACK")) { + if (hadTracking) + mgr_.message(EntityManagerMessages::fsiDuplicateAttribute, + StringMessageArg(idCharset_.execToDesc("TRACKING"))); + else if (gotValue) + mgr_.message(EntityManagerMessages::fsiValueAsName, + StringMessageArg(token)); + hadTracking = 1; + } + else + mgr_.message(gotValue + ? EntityManagerMessages::fsiUnsupportedAttribute + : EntityManagerMessages::fsiUnsupportedAttributeToken, + StringMessageArg(token)); + } + if (hadBase && sos.baseId.size() > 0) { + convertId(sos.baseId, smcrd, sos.storageManager); + if (neutral) { + if (!sos.storageManager->transformNeutral(sos.baseId, fold, mgr_)) + sos.baseId.resize(0); + } + } + if (!hadZapeof && hadRecords && sos.records == StorageObjectSpec::asis) + sos.zapEof = 0; + return 1; +} + +FSIParser::RecordType FSIParser::recordTypeTable[] = { + { "FIND", StorageObjectSpec::find }, + { "ASIS", StorageObjectSpec::asis }, + { "CR", StorageObjectSpec::cr }, + { "LF", StorageObjectSpec::lf }, + { "CRLF", StorageObjectSpec::crlf } +}; + +const char *FSIParser::recordsName(StorageObjectSpec::Records records) +{ + for (size_t i = 0; i < SIZEOF(recordTypeTable); i++) + if (records == recordTypeTable[i].value) + return recordTypeTable[i].name; + return 0; +} + +Boolean FSIParser::lookupRecords(const StringC &token, + StorageObjectSpec::Records &result) +{ + for (size_t i = 0; i < SIZEOF(recordTypeTable); i++) + if (matchKey(token, recordTypeTable[i].name)) { + result = recordTypeTable[i].value; + return 1; + } + return 0; +} + +void FSIParser::setDefaults(StorageObjectSpec &sos) +{ + if (sos.storageManager->requiresCr()) + sos.records = StorageObjectSpec::cr; + else if (isNdata_ + || (defSpec_ && defSpec_->records == StorageObjectSpec::asis)) + sos.records = StorageObjectSpec::asis; + if (isNdata_ || (defSpec_ && !defSpec_->zapEof)) + sos.zapEof = 0; + if (defSpec_ && defSpec_->storageManager == sos.storageManager) { + if (defId_) + sos.baseId = *defId_; + else { + sos.baseId = defSpec_->specId; + sos.storageManager->resolveRelative(defSpec_->baseId, + sos.baseId, + 0); + } + } + sos.codingSystem = sos.storageManager->requiredCodingSystem(); + if (sos.codingSystem) { + sos.zapEof = 0; // hack + sos.codingSystemType = StorageObjectSpec::special; + } + else { + sos.codingSystem = em_->defaultCodingSystem_; + sos.codingSystemType + = (em_->internalCharsetIsDocCharset_ + ? StorageObjectSpec::bctf + : StorageObjectSpec::encoding); + if (isNdata_) { + sos.codingSystem = em_->codingSystemKit_->identityInputCodingSystem(); + sos.codingSystemType = StorageObjectSpec::special; + } + else if (defSpec_) { + sos.codingSystem = defSpec_->codingSystem; + sos.codingSystemName = defSpec_->codingSystemName; + sos.codingSystemType = defSpec_->codingSystemType; + } + } +} + +Boolean FSIParser::parseAttribute(StringC &token, Boolean &gotValue, + StringC &value) +{ + Xchar c = get(); + while (isS(c)) + c = get(); + if (c == -1) { + return 0; + } + token.resize(0); + if (matchChar(c, '>')) + return 1; + if (matchChar(c, '"') || matchChar(c, '\'') || matchChar(c, '=')) + return 0; + for (;;) { + token += c; + c = get(); + if (c == -1) + return 0; + if (isS(c)) + break; + if (matchChar(c, '>') || matchChar(c, '=')) + break; + } + while (isS(c)) + c = get(); + if (c == -1) + return 0; + if (!matchChar(c, '=')) { + unget(); + gotValue = 0; + return 1; + } + gotValue = 1; + value.resize(0); + + c = get(); + while (isS(c)) + c = get(); + if (matchChar(c, '>') || matchChar(c, '=')) + return 0; + if (matchChar(c, '"') || matchChar(c, '\'')) { + Char lit = c; + for (;;) { + Xchar c = get(); + if (c == lit) + break; + if (c == -1) + return 0; + if (matchChar(c, '\n')) + ; + else if (matchChar(c, '\r') || matchChar(c, '\t')) + value += idCharset_.execToDesc(' '); + else + value += c; + } + uncharref(value); + } + else { + for (;;) { + value += c; + c = get(); + if (c == -1) + return 0; + if (isS(c)) + break; + if (matchChar(c, '>') || matchChar(c, '=')) { + unget(); + break; + } + } + } + return 1; +} + +void FSIParser::uncharref(StringC &str) +{ + size_t j = 0; + size_t i = 0; + while (i < str.size()) { + int digit; + if (matchChar(str[i], '&') + && i + 2 < str.size() + && matchChar(str[i + 1], '#') + && convertDigit(str[i + 2], digit)) { + unsigned long val = digit; + i += 3; + while (i < str.size() && convertDigit(str[i], digit)) { + val = val*10 + digit; + i++; + } + str[j++] = val; + if (i < str.size() && matchChar(str[i], ';')) + i++; + } + else + str[j++] = str[i++]; + } + str.resize(j); +} + +Boolean FSIParser::convertId(StringC &id, Xchar smcrd, + const StorageManager *sm) +{ + const CharsetInfo *smCharset = sm->idCharset(); + StringC newId; + size_t i = 0; + while (i < id.size()) { + UnivChar univ; + WideChar wide; + ISet wideSet; + int digit; + if (Xchar(id[i]) == smcrd + && i + 1 < id.size() + && convertDigit(id[i + 1], digit)) { + i += 2; + Char val = digit; + while (i < id.size() && convertDigit(id[i], digit)) { + val = val*10 + digit; + i++; + } + newId += val; + if (i < id.size() && matchChar(id[i], ';')) + i++; + } + else if (smCharset) { + if (!idCharset_.descToUniv(id[i++], univ)) + return 0; + if (univ == UnivCharsetDesc::rs) + ; + else if (univ == UnivCharsetDesc::re && sm->reString()) + newId += *sm->reString(); + else if (smCharset->univToDesc(univ, wide, wideSet) != 1 + || wide > charMax) + return 0; // FIXME give error + else + newId += Char(wide); + } + else + newId += id[i++]; + } + newId.swap(id); + return 1; +} + +ParsedSystemId::ParsedSystemId() +{ +} + +ParsedSystemId::Map::Map() +{ +} + +ParsedSystemId::Map::Map(const ParsedSystemId::Map& x) +: type(x.type), + publicId(x.publicId) +{ +} + +ParsedSystemId::Map::~Map() +{ +} + +ParsedSystemId::Map& ParsedSystemId::Map::operator=(const ParsedSystemId::Map& x) +{ + if (this != &x) { + type = x.type; + publicId = x.publicId; + } + return *this; +} + +static +void unparseSoi(const StringC &soi, + const CharsetInfo *idCharset, + const CharsetInfo &resultCharset, + StringC &result, + Boolean &needSmcrd); + +void ParsedSystemId::unparse(const CharsetInfo &resultCharset, + Boolean isNdata, + StringC &result) const +{ + size_t len = size(); + result.resize(0); + size_t i; + for (i = 0; i < maps.size(); i++) { + if (maps[i].type == Map::catalogDocument) + result += resultCharset.execToDesc(""); + else if (maps[i].type == Map::catalogPublic) { + result += resultCharset.execToDesc(""); + } + } + for (i = 0; i < len; i++) { + const StorageObjectSpec &sos = (*this)[i]; + result += resultCharset.execToDesc('<'); + result += resultCharset.execToDesc(sos.storageManager->type()); + if (sos.notrack) + result += resultCharset.execToDesc(" NOTRACK"); + if (!sos.search) + result += resultCharset.execToDesc(" NOSEARCH"); + if (!sos.storageManager->requiresCr() + && sos.records != (isNdata ? StorageObjectSpec::asis : StorageObjectSpec::find)) { + result += resultCharset.execToDesc(' '); + result += resultCharset.execToDesc(FSIParser::recordsName(sos.records)); + } + if (sos.codingSystemName && sos.codingSystemType != StorageObjectSpec::special) { + if (!sos.zapEof) + result += resultCharset.execToDesc(" NOZAPEOF"); + result += resultCharset.execToDesc(sos.codingSystemType == StorageObjectSpec::bctf + ? " BCTF=" + : " ENCODING="); + result += resultCharset.execToDesc(sos.codingSystemName); + } + Boolean needSmcrd = 0; + if (sos.baseId.size() != 0) { + result += resultCharset.execToDesc(" SOIBASE='"); + unparseSoi(sos.baseId, + sos.storageManager->idCharset(), + resultCharset, + result, + needSmcrd); + result += resultCharset.execToDesc('\''); + } + StringC tem; + unparseSoi(sos.specId, + sos.storageManager->idCharset(), + resultCharset, + tem, + needSmcrd); + if (needSmcrd) + result += resultCharset.execToDesc(" SMCRD='^'"); + result += resultCharset.execToDesc('>'); + result += tem; + } +} + +void unparseSoi(const StringC &soi, + const CharsetInfo *idCharset, + const CharsetInfo &resultCharset, + StringC &result, + Boolean &needSmcrd) +{ + if (!idCharset) { + for (size_t i = 0; i < soi.size(); i++) { + char buf[32]; + sprintf(buf, "&#%lu;", (unsigned long)soi[i]); + result += resultCharset.execToDesc(buf); + } + return; + } + for (size_t i = 0; i < soi.size(); i++) { + UnivChar univ; + WideChar to; + ISet toSet; + if (!idCharset->descToUniv(soi[i], univ) + || univ >= 127 + || univ < 32 + || univ == 36 // $ + || univ == 96 // ` +#ifndef SP_MSDOS_FILENAMES + || univ == 92 // backslash +#endif + || univ == 94 // ^ + || resultCharset.univToDesc(univ, to, toSet) != 1) { + needSmcrd = 1; + char buf[32]; + sprintf(buf, "^%lu;", (unsigned long)soi[i]); + result += resultCharset.execToDesc(buf); + } + else { + switch (univ) { + case 34: // double quote + case 35: // # + case 39: // apostrophe + case 60: // < + { + char buf[32]; + sprintf(buf, "&#%lu;", (unsigned long)to); + result += resultCharset.execToDesc(buf); + } + break; + default: + result += Char(to); + break; + } + } + } +} + +#ifdef SP_NAMESPACE +} +#endif