1
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4
 *
5
 * The contents of this file are subject to the Mozilla Public License Version
6
 * 1.1 (the "License"); you may not use this file except in compliance with
7
 * the License. You may obtain a copy of the License at
8
 * http://www.mozilla.org/MPL/
9
 *
10
 * Software distributed under the License is distributed on an "AS IS" basis,
11
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
 * for the specific language governing rights and limitations under the
13
 * License.
14
 *
15
 * The Original Code is Mozilla Universal charset detector code.
16
 *
17
 * The Initial Developer of the Original Code is
18
 * Netscape Communications Corporation.
19
 * Portions created by the Initial Developer are Copyright (C) 2001
20
 * the Initial Developer. All Rights Reserved.
21
 *
22
 * Contributor(s):
23
 *          Shy Shalom <shooshX@gmail.com>
24
 *			Proofpoint, Inc.
25
 *
26
 * Alternatively, the contents of this file may be used under the terms of
27
 * either the GNU General Public License Version 2 or later (the "GPL"), or
28
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29
 * in which case the provisions of the GPL or the LGPL are applicable instead
30
 * of those above. If you wish to allow use of your version of this file only
31
 * under the terms of either the GPL or the LGPL, and not to allow others to
32
 * use your version of this file under the terms of the MPL, indicate your
33
 * decision by deleting the provisions above and replace them with the notice
34
 * and other provisions required by the GPL or the LGPL. If you do not delete
35
 * the provisions above, a recipient may use your version of this file under
36
 * the terms of any one of the MPL, the GPL or the LGPL.
37
 *
38
 * ***** END LICENSE BLOCK ***** */
39
#include <stdio.h>
40
41
#include "nsMBCSGroupProber.h"
42
#include "nsUniversalDetector.h"
43
44
#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
45
const char *ProberName[] = 
46
{
47
  "UTF8",
48
  "SJIS",
49
  "EUCJP",
50
  "GB18030",
51
  "EUCKR",
52
  "Big5",
53
  "EUCTW",
54
};
55
56
#endif
57
58
nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
59
{
60
  for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
61
    mProbers[i] = nsnull;
62
63
  mProbers[0] = new nsUTF8Prober();
64
  if (aLanguageFilter & NS_FILTER_JAPANESE) 
65
  {
66
    mProbers[1] = new nsSJISProber(aLanguageFilter == NS_FILTER_JAPANESE);
67
    mProbers[2] = new nsEUCJPProber(aLanguageFilter == NS_FILTER_JAPANESE);
68
  }
69
  if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED)
70
    mProbers[3] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
71
  if (aLanguageFilter & NS_FILTER_KOREAN)
72
    mProbers[4] = new nsEUCKRProber(aLanguageFilter == NS_FILTER_KOREAN);
73
  if (aLanguageFilter & NS_FILTER_CHINESE_TRADITIONAL) 
74
  {
75
    mProbers[5] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
76
    mProbers[6] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
77
  }
78
  Reset();
79
}
80
81
nsMBCSGroupProber::~nsMBCSGroupProber()
82
{
83
  for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
84
  {
85
    delete mProbers[i];
86
  }
87
}
88
89
const char* nsMBCSGroupProber::GetCharSetName()
90
{
91
  if (mBestGuess == -1)
92
  {
93
    GetConfidence();
94
    if (mBestGuess == -1)
95
      mBestGuess = 0;
96
  }
97
  return mProbers[mBestGuess]->GetCharSetName();
98
}
99
100
void  nsMBCSGroupProber::Reset(void)
101
{
102
  mActiveNum = 0;
103
  for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
104
  {
105
    if (mProbers[i])
106
    {
107
      mProbers[i]->Reset();
108
      mIsActive[i] = PR_TRUE;
109
      ++mActiveNum;
110
    }
111
    else
112
      mIsActive[i] = PR_FALSE;
113
  }
114
  mBestGuess = -1;
115
  mState = eDetecting;
116
  mKeepNext = 0;
117
}
118
119
nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
120
{
121
  nsProbingState st;
122
  PRUint32 start = 0;
123
  PRUint32 keepNext = mKeepNext;
124
125
  //do filtering to reduce load to probers
126
  for (PRUint32 pos = 0; pos < aLen; ++pos)
127
  {
128
    if (aBuf[pos] & 0x80)
129
    {
130
      if (!keepNext)
131
        start = pos;
132
      keepNext = 2;
133
    }
134
    else if (keepNext)
135
    {
136
      if (--keepNext == 0)
137
      {
138
        for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
139
        {
140
          if (!mIsActive[i])
141
            continue;
142
          st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
143
          if (st == eFoundIt)
144
          {
145
            mBestGuess = i;
146
            mState = eFoundIt;
147
            return mState;
148
          }
149
        }
150
      }
151
    }
152
  }
153
154
  if (keepNext) {
155
    for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
156
    {
157
      if (!mIsActive[i])
158
        continue;
159
      st = mProbers[i]->HandleData(aBuf + start, aLen - start);
160
      if (st == eFoundIt)
161
      {
162
        mBestGuess = i;
163
        mState = eFoundIt;
164
        return mState;
165
      }
166
    }
167
  }
168
  mKeepNext = keepNext;
169
170
  return mState;
171
}
172
173
float nsMBCSGroupProber::GetConfidence(void)
174
{
175
  PRUint32 i;
176
  float bestConf = 0.0, cf;
177
178
  switch (mState)
179
  {
180
  case eFoundIt:
181
    return (float)0.99;
182
  case eNotMe:
183
    return (float)0.01;
184
  default:
185
    for (i = 0; i < NUM_OF_PROBERS; i++)
186
    {
187
      if (!mIsActive[i])
188
        continue;
189
      cf = mProbers[i]->GetConfidence();
190
      if (bestConf < cf)
191
      {
192
        bestConf = cf;
193
        mBestGuess = i;
194
      }
195
    }
196
  }
197
  return bestConf;
198
}
199
200
#ifdef DEBUG_chardet
201
void nsMBCSGroupProber::DumpStatus()
202
{
203
  PRUint32 i;
204
  float cf;
205
  
206
  GetConfidence();
207
  for (i = 0; i < NUM_OF_PROBERS; i++)
208
  {
209
    if (!mIsActive[i])
210
      printf("  MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
211
    else
212
    {
213
      cf = mProbers[i]->GetConfidence();
214
      printf("  MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
215
    }
216
  }
217
}
218
#endif
219
220
#ifdef DEBUG_jgmyers
221
void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], PRUint32 &offset)
222
{
223
  for (PRUint32 i = 0; i < NUM_OF_PROBERS; ++i) {
224
    states[offset].name = ProberName[i];
225
    states[offset].isActive = mIsActive[i];
226
    states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
227
    ++offset;
228
  }
229
}
230
#endif /* DEBUG_jgmyers */