Blame lib/UnicodeCodingSystem.cxx

Packit 8a864e
// Copyright (c) 1994 James Clark, 2000 Matthias Clasen
Packit 8a864e
// See the file COPYING for copying permission.
Packit 8a864e
Packit 8a864e
#include "splib.h"
Packit 8a864e
Packit 8a864e
#ifdef SP_MULTI_BYTE
Packit 8a864e
Packit 8a864e
#include "UnicodeCodingSystem.h"
Packit 8a864e
#include "UTF16CodingSystem.h"
Packit 8a864e
#include "macros.h"
Packit 8a864e
#include "Owner.h"
Packit 8a864e
Packit 8a864e
#include <stddef.h>
Packit 8a864e
#include <string.h>
Packit 8a864e
#ifdef DECLARE_MEMMOVE
Packit 8a864e
extern "C" {
Packit 8a864e
  void *memmove(void *, const void *, size_t);
Packit 8a864e
}
Packit 8a864e
#endif
Packit 8a864e
Packit 8a864e
#ifdef SP_NAMESPACE
Packit 8a864e
namespace SP_NAMESPACE {
Packit 8a864e
#endif
Packit 8a864e
Packit 8a864e
const unsigned short byteOrderMark = 0xfeff;
Packit 8a864e
const unsigned short swappedByteOrderMark = 0xfffe;
Packit 8a864e
Packit 8a864e
class UnicodeDecoder : public Decoder {
Packit 8a864e
public:
Packit 8a864e
  UnicodeDecoder(const InputCodingSystem *sub);
Packit 8a864e
  size_t decode(Char *to, const char *from, size_t fromLen,
Packit 8a864e
		const char **rest);
Packit 8a864e
  Boolean convertOffset(unsigned long &offset) const;
Packit 8a864e
private:
Packit 8a864e
  PackedBoolean hadByteOrderMark_;
Packit 8a864e
  PackedBoolean swapBytes_;
Packit 8a864e
  Owner<Decoder> subDecoder_;
Packit 8a864e
  const InputCodingSystem *subCodingSystem_;
Packit 8a864e
};
Packit 8a864e
Packit 8a864e
class UnicodeEncoder : public Encoder {
Packit 8a864e
public:
Packit 8a864e
  UnicodeEncoder();
Packit 8a864e
  void output(const Char *, size_t, OutputByteStream *);
Packit 8a864e
  void startFile(OutputByteStream *);
Packit 8a864e
private:
Packit 8a864e
  Owner<Encoder> subEncoder_;
Packit 8a864e
};
Packit 8a864e
Packit 8a864e
UnicodeCodingSystem::UnicodeCodingSystem(const InputCodingSystem *sub)
Packit 8a864e
: sub_(sub)
Packit 8a864e
{
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
Decoder *UnicodeCodingSystem::makeDecoder() const
Packit 8a864e
{
Packit 8a864e
  return new UnicodeDecoder(sub_);
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
Encoder *UnicodeCodingSystem::makeEncoder() const
Packit 8a864e
{
Packit 8a864e
  return new UnicodeEncoder;
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
UnicodeDecoder::UnicodeDecoder(const InputCodingSystem *subCodingSystem)
Packit 8a864e
: Decoder(subCodingSystem ? 1 : 2), subCodingSystem_(subCodingSystem),
Packit 8a864e
  hadByteOrderMark_(0), swapBytes_(0)
Packit 8a864e
{
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
Packit 8a864e
size_t UnicodeDecoder::decode(Char *to, const char *from, size_t fromLen,
Packit 8a864e
			      const char **rest)
Packit 8a864e
{
Packit 8a864e
  union U {
Packit 8a864e
    unsigned short word;
Packit 8a864e
    char bytes[2];
Packit 8a864e
  };
Packit 8a864e
    
Packit 8a864e
  if (subDecoder_)
Packit 8a864e
    return subDecoder_->decode(to, from, fromLen, rest);
Packit 8a864e
    if (fromLen < 2) {
Packit 8a864e
      *rest = from;
Packit 8a864e
      return 0;
Packit 8a864e
    }
Packit 8a864e
    minBytesPerChar_ = 2;
Packit 8a864e
    U u;
Packit 8a864e
    u.bytes[0] = from[0];
Packit 8a864e
    u.bytes[1] = from[1];
Packit 8a864e
    if (u.word == byteOrderMark) {
Packit 8a864e
      hadByteOrderMark_ = 1;
Packit 8a864e
      from += 2;
Packit 8a864e
      fromLen -= 2;
Packit 8a864e
    }
Packit 8a864e
    else if (u.word == swappedByteOrderMark) {
Packit 8a864e
      hadByteOrderMark_ = 1;
Packit 8a864e
      from += 2;
Packit 8a864e
      fromLen -= 2;
Packit 8a864e
      swapBytes_ = 1;
Packit 8a864e
    }
Packit 8a864e
  if (hadByteOrderMark_ || !subCodingSystem_)
Packit 8a864e
    subCodingSystem_ = new UTF16CodingSystem;
Packit 8a864e
  subDecoder_ = subCodingSystem_->makeDecoder(swapBytes_);
Packit 8a864e
      minBytesPerChar_ = subDecoder_->minBytesPerChar();
Packit 8a864e
      return subDecoder_->decode(to, from, fromLen, rest);
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
Boolean UnicodeDecoder::convertOffset(unsigned long &n) const
Packit 8a864e
{
Packit 8a864e
  subDecoder_->convertOffset(n);
Packit 8a864e
  if (hadByteOrderMark_)
Packit 8a864e
    n += 2;
Packit 8a864e
  return true;
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
UnicodeEncoder::UnicodeEncoder()
Packit 8a864e
{
Packit 8a864e
  UTF16CodingSystem utf16;
Packit 8a864e
  subEncoder_ = utf16.makeEncoder();
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
void UnicodeEncoder::startFile(OutputByteStream *sb)
Packit 8a864e
{
Packit 8a864e
  const unsigned short n = byteOrderMark;
Packit 8a864e
  sb->sputn((char *)&n, 2);
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
void UnicodeEncoder::output(const Char *s, size_t n, OutputByteStream *sb)
Packit 8a864e
{
Packit 8a864e
  subEncoder_->output(s, n, sb);
Packit 8a864e
}
Packit 8a864e
Packit 8a864e
#ifdef SP_NAMESPACE
Packit 8a864e
}
Packit 8a864e
#endif
Packit 8a864e
Packit 8a864e
#else /* not SP_MULTI_BYTE */
Packit 8a864e
Packit 8a864e
#ifndef __GNUG__
Packit 8a864e
static char non_empty_translation_unit;	// sigh
Packit 8a864e
#endif
Packit 8a864e
Packit 8a864e
#endif /* not SP_MULTI_BYTE */