Blob Blame History Raw
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nscore.h"
#include "nsCyrillicProb.h"
#include <stdio.h>

#include "nsCOMPtr.h"
#include "nsISupports.h"
#include "nsICharsetDetector.h"
#include "nsICharsetDetectionObserver.h"
#include "nsIStringCharsetDetector.h"
#include "nsCyrillicDetector.h"

//----------------------------------------------------------------------
// Interface nsISupports [implementation]
NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)
NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)

void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen) {
  uint8_t cls;
  const char* b;
  uint32_t i;
  if (mDone) return;
  for (i = 0, b = aBuf; i < aLen; i++, b++) {
    for (unsigned j = 0; j < mItems; j++) {
      if (0x80 & *b)
        cls = mCyrillicClass[j][(*b) & 0x7F];
      else
        cls = 0;
      NS_ASSERTION(cls <= 32, "illegal character class");
      mProb[j] += gCyrillicProb[mLastCls[j]][cls];
      mLastCls[j] = cls;
    }
  }
  // We now only based on the first block we receive
  DataEnd();
}

//---------------------------------------------------------------------
#define THRESHOLD_RATIO 1.5f
void nsCyrillicDetector::DataEnd() {
  uint32_t max = 0;
  uint8_t maxIdx = 0;
  uint8_t j;
  if (mDone) return;
  for (j = 0; j < mItems; j++) {
    if (mProb[j] > max) {
      max = mProb[j];
      maxIdx = j;
    }
  }

  if (0 == max)  // if we didn't get any 8 bits data
    return;

#ifdef DEBUG
  for (j = 0; j < mItems; j++)
    printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
#endif
  this->Report(mCharsets[maxIdx]);
  mDone = true;
}

//---------------------------------------------------------------------
nsCyrXPCOMDetector::nsCyrXPCOMDetector(uint8_t aItems,
                                       const uint8_t** aCyrillicClass,
                                       const char** aCharsets)
    : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) {
  mObserver = nullptr;
}

//---------------------------------------------------------------------
nsCyrXPCOMDetector::~nsCyrXPCOMDetector() {}

//---------------------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::Init(nsICharsetDetectionObserver* aObserver) {
  NS_ASSERTION(mObserver == nullptr, "Init twice");
  if (nullptr == aObserver) return NS_ERROR_ILLEGAL_VALUE;

  mObserver = aObserver;
  return NS_OK;
}

//----------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(const char* aBuf, uint32_t aLen,
                                       bool* oDontFeedMe) {
  NS_ASSERTION(mObserver != nullptr, "have not init yet");

  if ((nullptr == aBuf) || (nullptr == oDontFeedMe))
    return NS_ERROR_ILLEGAL_VALUE;

  this->HandleData(aBuf, aLen);
  *oDontFeedMe = false;
  return NS_OK;
}

//----------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMDetector::Done() {
  NS_ASSERTION(mObserver != nullptr, "have not init yet");
  this->DataEnd();
  return NS_OK;
}

//----------------------------------------------------------
void nsCyrXPCOMDetector::Report(const char* aCharset) {
  NS_ASSERTION(mObserver != nullptr, "have not init yet");
  mObserver->Notify(aCharset, eBestAnswer);
}

//---------------------------------------------------------------------
nsCyrXPCOMStringDetector::nsCyrXPCOMStringDetector(
    uint8_t aItems, const uint8_t** aCyrillicClass, const char** aCharsets)
    : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets) {}

//---------------------------------------------------------------------
nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector() {}

//---------------------------------------------------------------------
void nsCyrXPCOMStringDetector::Report(const char* aCharset) {
  mResult = aCharset;
}

//---------------------------------------------------------------------
NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen,
                                             const char** oCharset,
                                             nsDetectionConfident& oConf) {
  mResult = nullptr;
  mDone = false;
  this->HandleData(aBuf, aLen);
  this->DataEnd();
  *oCharset = mResult;
  oConf = eBestAnswer;
  return NS_OK;
}