1
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
 *
5
 * The contents of this file are subject to the Mozilla Public License Version
6
 * 1.1 (the "License"); you may not use this file except in compliance with
7
 * the License. You may obtain a copy of the License at
8
 * http://www.mozilla.org/MPL/
9
 *
10
 * Software distributed under the License is distributed on an "AS IS" basis,
11
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
 * for the specific language governing rights and limitations under the
13
 * License.
14
 *
15
 * The Original Code is Mozilla Universal charset detector code.
16
 *
17
 * The Initial Developer of the Original Code is
18
 * Netscape Communications Corporation.
19
 * Portions created by the Initial Developer are Copyright (C) 2001
20
 * the Initial Developer. All Rights Reserved.
21
 *
22
 * Contributor(s):
23
 *          Shy Shalom <shooshX@gmail.com>
24
 *
25
 * Alternatively, the contents of this file may be used under the terms of
26
 * either the GNU General Public License Version 2 or later (the "GPL"), or
27
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
 * in which case the provisions of the GPL or the LGPL are applicable instead
29
 * of those above. If you wish to allow use of your version of this file only
30
 * under the terms of either the GPL or the LGPL, and not to allow others to
31
 * use your version of this file under the terms of the MPL, indicate your
32
 * decision by deleting the provisions above and replace them with the notice
33
 * and other provisions required by the GPL or the LGPL. If you do not delete
34
 * the provisions above, a recipient may use your version of this file under
35
 * the terms of any one of the MPL, the GPL or the LGPL.
36
 *
37
 * ***** END LICENSE BLOCK ***** */
38
#include <stdio.h>
39
#include "nsSBCharSetProber.h"
40
41
nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
42
{
43
  unsigned char order;
44
45
  for (PRUint32 i = 0; i < aLen; i++)
46
  {
47
    order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
48
49
    if (order < SYMBOL_CAT_ORDER)
50
      mTotalChar++;
51
    if (order < SAMPLE_SIZE)
52
    {
53
        mFreqChar++;
54
55
      if (mLastOrder < SAMPLE_SIZE)
56
      {
57
        mTotalSeqs++;
58
        if (!mReversed)
59
          ++(mSeqCounters[mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]);
60
        else // reverse the order of the letters in the lookup
61
          ++(mSeqCounters[mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]);
62
      }
63
    }
64
    mLastOrder = order;
65
  }
66
67
  if (mState == eDetecting)
68
    if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
69
    {
70
      float cf = GetConfidence();
71
      if (cf > POSITIVE_SHORTCUT_THRESHOLD)
72
        mState = eFoundIt;
73
      else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
74
        mState = eNotMe;
75
    }
76
77
  return mState;
78
}
79
80
void  nsSingleByteCharSetProber::Reset(void)
81
{
82
  mState = eDetecting;
83
  mLastOrder = 255;
84
  for (PRUint32 i = 0; i < NUMBER_OF_SEQ_CAT; i++)
85
    mSeqCounters[i] = 0;
86
  mTotalSeqs = 0;
87
  mTotalChar = 0;
88
  mFreqChar = 0;
89
}
90
91
//#define NEGATIVE_APPROACH 1
92
93
float nsSingleByteCharSetProber::GetConfidence(void)
94
{
95
#ifdef NEGATIVE_APPROACH
96
  if (mTotalSeqs > 0)
97
    if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
98
      return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
99
  return (float)0.01;
100
#else  //POSITIVE_APPROACH
101
  float r;
102
103
  if (mTotalSeqs > 0) {
104
    r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
105
    r = r*mFreqChar/mTotalChar;
106
    if (r >= (float)1.00)
107
      r = (float)0.99;
108
    return r;
109
  }
110
  return (float)0.01;
111
#endif
112
}
113
114
const char* nsSingleByteCharSetProber::GetCharSetName() 
115
{
116
  if (!mNameProber)
117
    return mModel->charsetName;
118
  return mNameProber->GetCharSetName();
119
}
120
121
#ifdef DEBUG_chardet
122
void nsSingleByteCharSetProber::DumpStatus()
123
{
124
  printf("  SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
125
}
126
#endif