/**************************************************************************** ** ** Implementation of TQUtf{8,16}Codec class ** ** Created : 981015 ** ** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved. ** ** This file is part of the tools module of the TQt GUI Toolkit. ** ** This file may be used under the terms of the GNU General ** Public License versions 2.0 or 3.0 as published by the Free ** Software Foundation and appearing in the files LICENSE.GPL2 ** and LICENSE.GPL3 included in the packaging of this file. ** Alternatively you may (at your option) use any later version ** of the GNU General Public License if such license has been ** publicly approved by Trolltech ASA (or its successors, if any) ** and the KDE Free TQt Foundation. ** ** Please review the following information to ensure GNU General ** Public Licensing requirements will be met: ** http://trolltech.com/products/qt/licenses/licensing/opensource/. ** If you are unsure which license is appropriate for your use, please ** review the following information: ** http://trolltech.com/products/qt/licenses/licensing/licensingoverview ** or contact the sales department at sales@trolltech.com. ** ** This file may be used under the terms of the Q Public License as ** defined by Trolltech ASA and appearing in the file LICENSE.TQPL ** included in the packaging of this file. Licensees holding valid TQt ** Commercial licenses may use this file in accordance with the TQt ** Commercial License Agreement provided with the Software. ** ** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, ** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR ** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted ** herein. ** **********************************************************************/ #include "ntqutfcodec.h" #ifndef QT_NO_TEXTCODEC int TQUtf8Codec::mibEnum() const { return 106; } TQCString TQUtf8Codec::fromUnicode(const TQString& uc, int& lenInOut) const { int l = uc.length(); if (lenInOut > 0) l = TQMIN(l, lenInOut); int rlen = l*3+1; TQCString rstr(rlen); uchar* cursor = (uchar*)rstr.data(); const TQChar *ch = uc.unicode(); for (int i=0; i < l; i++) { uint u = ch->unicode(); if ( u < 0x80 ) { *cursor++ = (uchar)u; } else { if ( u < 0x0800 ) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { if (u >= 0xd800 && u < 0xdc00 && i < l-1) { unsigned short low = ch[1].unicode(); if (low >= 0xdc00 && low < 0xe000) { ++ch; ++i; u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; } } if (u > 0xffff) { // see TQString::fromUtf8() and TQString::utf8() for explanations if (u > 0x10fe00 && u < 0x10ff00) { *cursor++ = (u - 0x10fe00); ++ch; continue; } else { *cursor++ = 0xf0 | ((uchar) (u >> 18)); *cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f); } } else { *cursor++ = 0xe0 | ((uchar) (u >> 12)); } *cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f); } *cursor++ = 0x80 | ((uchar) (u&0x3f)); } ++ch; } *cursor = 0; lenInOut = cursor - (uchar*)rstr.data(); ((TQByteArray&)rstr).resize(lenInOut+1); return rstr; } TQString TQUtf8Codec::toUnicode(const char* chars, int len) const { if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) { // starts with a byte order mark chars += 3; len -= 3; } return TQString::fromUtf8( chars, len ); } const char* TQUtf8Codec::name() const { return "UTF-8"; } int TQUtf8Codec::heuristicContentMatch(const char* chars, int len) const { int score = 0; for (int i=0; i<len; i++) { uchar ch = chars[i]; // No nulls allowed. if ( !ch ) return -1; if ( ch < 128 ) { // Inconclusive score++; } else if ( (ch&0xe0) == 0xc0 ) { if ( i < len-1 ) { uchar c2 = chars[++i]; if ( (c2&0xc0) != 0x80 ) return -1; score+=3; } } else if ( (ch&0xf0) == 0xe0 ) { if ( i < len-1 ) { uchar c2 = chars[++i]; if ( (c2&0xc0) != 0x80 ) { return -1; #if 0 if ( i < len-1 ) { uchar c3 = chars[++i]; if ( (c3&0xc0) != 0x80 ) return -1; score+=3; } #endif } score+=2; } } } return score; } class TQUtf8Decoder : public TQTextDecoder { uint uc; uint min_uc; int need; bool headerDone; public: TQUtf8Decoder() : need(0), headerDone(FALSE) { } TQString toUnicode(const char* chars, int len) { TQString result; result.setLength( len + 1 ); // worst case TQChar *qch = (TQChar *)result.unicode(); uchar ch; int error = -1; for (int i=0; i<len; i++) { ch = chars[i]; if (need) { if ( (ch&0xc0) == 0x80 ) { uc = (uc << 6) | (ch & 0x3f); need--; if ( !need ) { if (uc > 0xffff) { // surrogate pair uc -= 0x10000; unsigned short high = uc/0x400 + 0xd800; unsigned short low = uc%0x400 + 0xdc00; *qch++ = TQChar(high); *qch++ = TQChar(low); headerDone = TRUE; } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { *qch++ = TQChar::replacement; } else { if (headerDone || TQChar(uc) != TQChar::byteOrderMark) *qch++ = uc; headerDone = TRUE; } } } else { // error i = error; *qch++ = TQChar::replacement; need = 0; } } else { if ( ch < 128 ) { *qch++ = ch; headerDone = TRUE; } else if ((ch & 0xe0) == 0xc0) { uc = ch & 0x1f; need = 1; error = i; min_uc = 0x80; } else if ((ch & 0xf0) == 0xe0) { uc = ch & 0x0f; need = 2; error = i; min_uc = 0x800; } else if ((ch&0xf8) == 0xf0) { uc = ch & 0x07; need = 3; error = i; min_uc = 0x10000; } else { // error *qch++ = TQChar::replacement; } } } result.truncate( qch - result.unicode() ); return result; } }; TQTextDecoder* TQUtf8Codec::makeDecoder() const { return new TQUtf8Decoder; } int TQUtf16Codec::mibEnum() const { return 1000; } const char* TQUtf16Codec::name() const { return "ISO-10646-UCS-2"; } int TQUtf16Codec::heuristicContentMatch(const char* chars, int len) const { uchar* uchars = (uchar*)chars; if ( len >= 2 && ((uchars[0] == 0xff && uchars[1] == 0xfe) || (uchars[1] == 0xff && uchars[0] == 0xfe)) ) return len; else return 0; } class TQUtf16Encoder : public TQTextEncoder { bool headerdone; public: TQUtf16Encoder() : headerdone(FALSE) { } TQCString fromUnicode(const TQString& uc, int& lenInOut) { if ( headerdone ) { lenInOut = uc.length()*sizeof(TQChar); TQCString d(lenInOut); memcpy(d.data(),uc.unicode(),lenInOut); return d; } else { headerdone = TRUE; lenInOut = (1+uc.length())*sizeof(TQChar); TQCString d(lenInOut); memcpy(d.data(),&TQChar::byteOrderMark,sizeof(TQChar)); memcpy(d.data()+sizeof(TQChar),uc.unicode(),uc.length()*sizeof(TQChar)); return d; } } }; class TQUtf16Decoder : public TQTextDecoder { uchar buf; bool half; bool swap; bool headerdone; public: TQUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE) { } TQString toUnicode(const char* chars, int len) { TQString result; result.setLength( len + 1 ); // worst case TQChar *qch = (TQChar *)result.unicode(); TQChar ch; while ( len-- ) { if ( half ) { if ( swap ) { ch.setRow( *chars++ ); ch.setCell( buf ); } else { ch.setRow( buf ); ch.setCell( *chars++ ); } if ( !headerdone ) { if ( ch == TQChar::byteOrderSwapped ) { swap = !swap; } else if ( ch == TQChar::byteOrderMark ) { // Ignore ZWNBSP } else { *qch++ = ch; } headerdone = TRUE; } else *qch++ = ch; half = FALSE; } else { buf = *chars++; half = TRUE; } } result.truncate( qch - result.unicode() ); return result; } }; TQTextDecoder* TQUtf16Codec::makeDecoder() const { return new TQUtf16Decoder; } TQTextEncoder* TQUtf16Codec::makeEncoder() const { return new TQUtf16Encoder; } #endif //QT_NO_TEXTCODEC