summaryrefslogtreecommitdiffstats
path: root/tqtinterface/qt4/src/codecs/tqrtlcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tqtinterface/qt4/src/codecs/tqrtlcodec.cpp')
-rw-r--r--tqtinterface/qt4/src/codecs/tqrtlcodec.cpp617
1 files changed, 0 insertions, 617 deletions
diff --git a/tqtinterface/qt4/src/codecs/tqrtlcodec.cpp b/tqtinterface/qt4/src/codecs/tqrtlcodec.cpp
deleted file mode 100644
index a5640aa..0000000
--- a/tqtinterface/qt4/src/codecs/tqrtlcodec.cpp
+++ /dev/null
@@ -1,617 +0,0 @@
-/****************************************************************************
-**
-** Implementation of TQTextCodec class
-**
-** Created : 981015
-**
-** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved.
-**
-** This file is part of the tools module of the TQt GUI Toolkit.
-**
-** This file may be used under the terms of the GNU General
-** Public License versions 2.0 or 3.0 as published by the Free
-** Software Foundation and appearing in the files LICENSE.GPL2
-** and LICENSE.GPL3 included in the packaging of this file.
-** Alternatively you may (at your option) use any later version
-** of the GNU General Public License if such license has been
-** publicly approved by Trolltech ASA (or its successors, if any)
-** and the KDE Free TQt Foundation.
-**
-** Please review the following information to ensure GNU General
-** Public Licensing requirements will be met:
-** http://trolltech.com/products/qt/licenses/licensing/opensource/.
-** If you are unsure which license is appropriate for your use, please
-** review the following information:
-** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
-** or contact the sales department at sales@trolltech.com.
-**
-** This file may be used under the terms of the Q Public License as
-** defined by Trolltech ASA and appearing in the file LICENSE.TQPL
-** included in the packaging of this file. Licensees holding valid TQt
-** Commercial licenses may use this file in accordance with the TQt
-** Commercial License Agreement provided with the Software.
-**
-** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
-** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
-** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted
-** herein.
-**
-**********************************************************************/
-
-#include "tqrtlcodec.h"
-#include <private/tqtextengine_p.h>
-
-#ifndef TQT_NO_CODEC_HEBREW
-
-// NOT REVISED
-
-static const uchar unkn = '?'; // BLACK STQUARE (94) would be better
-
-static const ushort heb_to_tqunicode[128] = {
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
- 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
- 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
- 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
- 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
- 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
- 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
- 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
- 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
- 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD
-};
-
-static const uchar tqunicode_to_heb_00[32] = {
- 0xA0, unkn, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
- 0xA8, 0xA9, 0xD7, 0xAB, 0xAC, 0xAD, 0xAE, unkn,
- 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
- 0xB8, 0xB9, 0xF7, 0xBB, 0xBC, 0xBD, 0xBE, unkn,
-};
-
-static const uchar tqunicode_to_heb_05[32] = {
- 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
- 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
- 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
- 0xF8, 0xF9, 0xFA, unkn, unkn, unkn, unkn, unkn
-};
-
-static bool to8bit(const TQChar ch, TQCString *rstr)
-{
- bool converted = FALSE;
-
- if( ch.isMark() ) return TRUE; // ignore marks for conversion
-
- if ( ch.row() ) {
- if ( ch.row() == 0x05 ) {
- if ( ch.cell() > 0x91 )
- converted = TRUE;
- // 0x0591 - 0x05cf: Hebrew punctuation... dropped
- if ( ch.cell() >= 0xD0 )
- *rstr += (char)tqunicode_to_heb_05[ch.cell()- 0xD0];
- } else if ( ch.row() == 0x20 ) {
- if ( ch.cell() == 0x3E ) {
- *rstr += (char)0xAF;
- converted = TRUE;
- } else if ( ch.cell() == 0x17 ) {
- *rstr += (char)0xCF;
- converted = TRUE;
- }
- } else {
- converted = FALSE;
- }
- } else {
- if ( ch.cell() < 0x80 ) {
- *rstr += (char)ch.cell();
- converted = TRUE;
- } else if( ch.cell() < 0xA0 ) {
- *rstr += (char)tqunicode_to_heb_00[ch.cell() - 0x80];
- converted = TRUE;
- }
- }
-
- if(converted) return TRUE;
-
- // couldn't convert the char... lets try its decomposition
- TQString d = ch.decomposition();
- if(d.isNull())
- return FALSE;
-
- int l = d.length();
- for (int i=0; i<l; i++) {
- const TQChar ch = d[i];
-
- if(to8bit(ch, rstr))
- converted = TRUE;
- }
-
- return converted;
-}
-
-#if 0
-static TQString run(const TQString &input, unsigned int from, unsigned int to, TQChar::Direction runDir)
-{
- if ( to <= from )
- return TQString::null;
-
- TQString out;
- if ( runDir == TQChar::DirR ) {
- const TQChar *ch = input.tqunicode() + to - 1;
- int len = to - from;
- while (len--) {
- out += *ch;
- ch--;
- }
- } else {
- out = input.mid(from, to - from );
- }
- return out;
-}
-
-/*
- we might do better here, but I'm currently not sure if it's worth the effort. It will hopefully convert
- 90% of the visually ordered Hebrew correctly.
-*/
-static TQString reverseLine(const TQString &str, unsigned int from, unsigned int to, TQChar::Direction dir)
-{
- TQString out;
-
- if ( to <= from ) {
- out += str.at(from);
- return out;
- }
-
- // since we don't have embedding marks, we get around with bidi levels up to 2.
-
- // simple case: dir = RTL:
- // go through the line from right to left, and reverse all continuous Hebrew strings.
- if ( dir == TQChar::DirR ) {
- unsigned int pos = to;
- to = from;
- from = pos;
- TQChar::Direction runDir = TQChar::DirON;
-
- while ( pos > to ) {
- TQChar::Direction d = str.at(pos).direction();
- switch ( d ) {
- case TQChar::DirL:
- case TQChar::DirAN:
- case TQChar::DirEN:
- if ( runDir != TQChar::DirL ) {
- out += run( str, pos, from, runDir );
- from = pos - 1;
- }
- runDir = TQChar::DirL;
- break;
- case TQChar::DirON:
- if ( runDir == TQChar::DirON ) {
- runDir = TQChar::DirR;
- break;
- }
- // fall through
- case TQChar::DirR:
- if ( runDir != TQChar::DirR ) {
- out += run( str, pos, from, runDir );
- from = pos - 1;
- }
- runDir = TQChar::DirR;
- default:
- break;
- }
- pos--;
- }
- out += run( str, pos, from, runDir );
- } else {
- // basicDir == DirL. A bit more complicated, as we might need to reverse two times for numbers.
- unsigned int pos = from;
- TQChar::Direction runDir = TQChar::DirON;
-
- // first reversing. Ignore numbers
- while ( pos < to ) {
- TQChar::Direction d = str.at(pos).direction();
- switch ( d ) {
- case TQChar::DirL:
- if ( runDir != TQChar::DirL && runDir != TQChar::DirON ) {
- out += run( str, from, pos, runDir );
- qDebug( "out = %s", out.latin1() );
- from = pos;
- }
- runDir = TQChar::DirL;
- break;
- case TQChar::DirON:
- if ( runDir == TQChar::DirON ) {
- runDir = TQChar::DirL;
- break;
- }
- // fall through
- case TQChar::DirR:
- case TQChar::DirAN:
- case TQChar::DirEN:
- if ( runDir != TQChar::DirR && runDir != TQChar::DirON ) {
- out += run( str, from, pos, runDir );
- qDebug( "out = %s", out.latin1() );
- from = pos;
- }
- runDir = TQChar::DirR;
- default:
- break;
- }
- pos++;
- }
- out += run( str, from, pos, runDir );
- qDebug( "out = %s", out.latin1() );
- // second reversing for numbers
- TQString in = out;
- out = "";
- pos = 0;
- from = 0;
- to = in.length() - 1;
- runDir = TQChar::DirON;
- while ( pos < to ) {
- TQChar::Direction d = str.at(pos).direction();
- switch ( d ) {
- case TQChar::DirL:
- case TQChar::DirON:
- case TQChar::DirR:
- if ( runDir == TQChar::DirEN && runDir != TQChar::DirON ) {
- out += run( in, from, pos, TQChar::DirR ); //DirR ensures reversing
- qDebug( "out = %s", out.latin1() );
- runDir = TQChar::DirR;
- from = pos;
- }
- runDir = TQChar::DirL;
- break;
- case TQChar::DirAN:
- case TQChar::DirEN:
- if ( runDir != TQChar::DirEN && runDir != TQChar::DirON ) {
- out += in.mid(from, pos-from+1);
- qDebug( "out = %s", out.latin1() );
- from = pos;
- }
- runDir = TQChar::DirEN;
- default:
- break;
- }
- pos++;
- }
- out += run( str, from, pos, runDir );
-
- }
- return out;
-}
-#endif
-
-/* this function assuems the TQString is still visually ordered.
- * Finding the basic direction of the text is not easy in this case, since
- * a string like "my friend MOLAHS" could (in logical order) mean aswell
- * "SHALOM my friend" or "my friend SHALOM", depending on the basic direction
- * one assumes for the text.
- *
- * So this function uses some heuristics to find the right answer...
- */
-static TQChar::Direction findBasicDirection(TQString str)
-{
- unsigned int pos;
- unsigned int len = str.length();
- TQChar::Direction dir1 = TQChar::DirON;
- TQChar::Direction dir2 = TQChar::DirON;
-
- unsigned int startLine = 0;
- // If the visual representation of the first line starts and ends with the same
- // directionality, we know the answer.
- pos = 0;
- while (pos < len) {
- if ( str.at(pos) == '\n' )
- startLine = pos;
- if (str.at(pos).direction() < 2) { // DirR or DirL
- dir1 = str.at(pos).direction();
- break;
- }
- pos++;
- }
-
- if( pos == len ) // no directional chars, assume TQChar::DirL
- return TQChar::DirL;
-
- // move to end of line
- while( pos < len && str.at(pos) != '\n' )
- pos++;
-
- while (pos > startLine) {
- if (str.at(pos).direction() < 2) { // DirR or DirL
- dir2 = str.at(pos).direction();
- break;
- }
- pos--;
- }
-
- // both are the same, so we have the direction!
- if ( dir1 == dir2 ) return dir1;
-
- // guess with the help of punktuation marks...
- // if the sentence ends with a punktuation, we should have a mark
- // at one side of the text...
-
- pos = 0;
- while (pos < len-1 ) {
- if(str.at(pos).category() == TQChar::Punctuation_Other) {
- if( str.at(pos) != (char)0xbf && str.at(pos) != (char)0xa1 ) // spanish inverted question and exclamation mark
- if( str.at(pos+1).direction() < 2 ) return TQChar::DirR;
- }
- pos++;
- }
-
- pos = len;
- while (pos < 1 && str.at(pos).direction() < 2 ) {
- if(str.at(pos).category() == TQChar::Punctuation_Other) {
- if( str.at(pos-1).direction() < 2 ) return TQChar::DirL;
- }
- pos--;
- }
-
- // don't know try DirR...
- return TQChar::DirR;
-}
-
-
-/*!
- \class TQHebrewCodec tqrtlcodec.h
- \reentrant
- \ingroup i18n
-
- \brief The TQHebrewCodec class provides conversion to and from
- visually ordered Hebrew.
-
- Hebrew as a semitic language is written from right to left.
- Because older computer systems couldn't handle reordering a string
- so that the first letter appears on the right, many older
- documents were encoded in visual order, so that the first letter
- of a line is the rightmost one in the string.
-
- In contrast to this, Unicode defines characters to be in logical
- order (the order you would read the string). This codec tries to
- convert visually ordered Hebrew (8859-8) to Unicode. This might
- not always work perfectly, because reversing the \e bidi
- (bi-directional) algorithm that transforms from logical to visual
- order is non-trivial.
-
- Transformation from Unicode to visual Hebrew (8859-8) is done
- using the bidi algorithm in TQt, and will produce correct results,
- so long as the codec is given the text a whole paragraph at a
- time. Places where newlines are supposed to go can be indicated by
- a newline character ('\n'). Note that these newline characters
- change the reordering behaviour of the algorithm, since the bidi
- reordering only takes place within one line of text, whereas
- line breaks are determined in visual order.
-
- Visually ordered Hebrew is still used quite often in some places,
- mainly in email communication (since most email programs still
- don't understand logically ordered Hebrew) and on web pages. The
- use on web pages is rapidly decreasing, due to the availability of
- browsers that correctly support logically ordered Hebrew.
-
- This codec has the name "iso8859-8". If you don't want any bidi
- reordering to happen during conversion, use the "iso8859-8-i"
- codec, which assumes logical order for the 8-bit string.
-*/
-
-/*! \reimp */
-int TQHebrewCodec::mibEnum() const
-{
- return 11;
-}
-
-/*! \reimp */
-const char* TQHebrewCodec::name() const
-{
- return "ISO 8859-8";
-}
-
-/*!
- Returns the codec's mime name.
-*/
-const char* TQHebrewCodec::mimeName() const
-{
- return "ISO-8859-8";
-}
-
-static TQString visualOrder(TQString logical, TQChar::Direction basicDir)
-{
- logical.replace(TQChar('\n'), TQChar(0x2028));
-
-#ifdef USE_QT4
- // [FIXME]
- printf("[WARNING] static TQString visualOrder(TQString logical, TQChar::Direction basicDir) unimplemented\n\r");
- return logical;
-#else // USE_QT4
- TQTextEngine e(logical, 0);
- e.direction = basicDir;
- e.itemize();
- TQ_UINT8 l[256];
- TQ_UINT8 *levels = l;
- int vo[256];
- int *visualOrder = vo;
- int nitems = e.items.size();
- if (nitems > 255) {
- levels = new TQ_UINT8[nitems];
- visualOrder = new int[nitems];
- }
- int i;
- for (i = 0; i < nitems; ++i) {
- //qDebug("item %d bidiLevel=%d", i, e.items[i].analysis.bidiLevel);
- levels[i] = e.items[i].analysis.bidiLevel;
- }
- e.bidiReorder(nitems, levels, visualOrder);
-
- TQString visual;
- for (i = 0; i < nitems; ++i) {
- TQScriptItem &si = e.items[visualOrder[i]];
- TQString sub = logical.mid(si.position, e.length(visualOrder[i]));
- if (si.analysis.bidiLevel % 2) {
- // reverse sub
- TQChar *a = (TQChar *)sub.tqunicode();
- TQChar *b = a + sub.length() - 1;
- while (a < b) {
- TQChar tmp = *a;
- *a = *b;
- *b = tmp;
- ++a;
- --b;
- }
- a = (TQChar *)sub.tqunicode();
- b = a + sub.length();
- while (a<b) {
- *a = a->mirroredChar();
- ++a;
- }
- }
- visual += sub;
- }
- // replace Unicode newline back with \n to compare.
- visual.replace(TQChar(0x2028), TQChar('\n'));
- if (l != levels) {
- delete [] levels;
- delete [] visualOrder;
- }
- return visual;
-#endif // USE_QT4
-}
-
-/*!
- \reimp
-
- Since Hebrew (and Arabic) is written from left to right, but
- iso8859-8 assumes visual ordering (as opposed to the logical
- ordering of Unicode), we must reverse the order of the input
- string (the first \a len characters of \a chars) to put it into
- logical order.
-
- One problem is that the basic text direction is unknown. So this
- function uses some heuristics to guess it, and if it can't guess
- the right one, it assumes, the basic text direction is right to
- left.
-
- This behaviour can be overridden, by putting a control character
- at the beginning of the text to indicate which basic text
- direction to use. If the basic text direction is left-to-right,
- the control character should be (uchar) 0xFE. For right-to-left it
- should be 0xFF. Both characters are undefined in the iso 8859-8
- charset.
-
- Example: A visually ordered string "english WERBEH american" would
- be recognized as having a basic left to right direction. So the
- logically ordered TQString would be "english HEBREW american".
-
- By prepending a (uchar)0xFF at the start of the string,
- TQHebrewCodec::toUnicode() would use a basic text direction of
- right to left, and the string would thus become "american HEBREW
- english".
-*/
-TQString TQHebrewCodec::toUnicode(const char* chars, int len ) const
-{
- TQString r;
- const unsigned char * c = (const unsigned char *)chars;
- TQChar::Direction basicDir = TQChar::DirON; // neutral, we don't know
-
- if( len == 0 ) return TQString::null;
-
- // Test, if the user gives us a directionality.
- // We use 0xFE and 0xFF in ISO8859-8 for that.
- // These chars are undefined in the charset, and are mapped to
- // RTL overwrite
- if( c[0] == 0xfe ) {
- basicDir = TQChar::DirL;
- c++; // skip directionality hint
- }
- if( c[0] == 0xff ) {
- basicDir = TQChar::DirR;
- c++; // skip directionality hint
- }
-
- for( int i=0; i<len; i++ ) {
- if ( c[i] > 127 )
- r[i] = heb_to_tqunicode[c[i]-128];
- else
- r[i] = c[i];
- }
-
- // do transformation from visual byte ordering to logical byte
- // ordering
- if( basicDir == TQChar::DirON )
- basicDir = findBasicDirection(r);
-
- return visualOrder(r, basicDir);
-}
-
-/*!
- Transforms the logically ordered TQString, \a uc, into a visually
- ordered string in the 8859-8 encoding. TQt's bidi algorithm is used
- to perform this task. Note that newline characters affect the
- reordering, since reordering is done on a line by line basis.
-
- The algorithm is designed to work on whole paragraphs of text, so
- processing a line at a time may produce incorrect results. This
- approach is taken because the reordering of the contents of a
- particular line in a paragraph may depend on the previous line in
- the same paragraph.
-
- Some encodings (for example Japanese or UTF-8) are multibyte (so
- one input character is mapped to two output characters). The \a
- lenInOut argument specifies the number of TQChars that should be
- converted and is set to the number of characters returned.
-*/
-TQCString TQHebrewCodec::fromUnicode(const TQString& uc, int& lenInOut) const
-{
- // process only len chars...
- int l;
- if( lenInOut > 0 )
- l = TQMIN((int)uc.length(),lenInOut);
- else
- l = (int)uc.length();
-
- TQCString rstr;
- if( l == 1 ) {
- if( !to8bit( uc[0], &rstr ) )
- rstr += (char)unkn;
- } else {
- TQString tmp = uc;
- tmp.truncate(l);
- TQString vis = visualOrder(tmp, TQChar::DirON);
-
- for (int i=0; i<l; i++) {
- const TQChar ch = vis[i];
-
- if( !to8bit( ch, &rstr ) )
- rstr += (char)unkn;
- }
- // lenInOut = cursor - result;
- }
- if( l > 0 && !rstr.length() )
- rstr += (char)unkn;
-
- return rstr;
-}
-
-/*! \reimp
- */
-int TQHebrewCodec::heuristicContentMatch(const char* chars, int len) const
-{
- const unsigned char * c = (const unsigned char *)chars;
-
- int score = 0;
- for (int i=0; i<len; i++) {
- if(c[i] > 0x80 ) {
- if ( heb_to_tqunicode[c[i] - 0x80] != 0xFFFD)
- score++;
- else
- return -1;
- }
- }
- return score;
-}
-
-#endif