diff options
author | Timothy Pearson <kb9vqf@pearsoncomputing.net> | 2011-07-10 15:24:15 -0500 |
---|---|---|
committer | Timothy Pearson <kb9vqf@pearsoncomputing.net> | 2011-07-10 15:24:15 -0500 |
commit | bd0f3345a938b35ce6a12f6150373b0955b8dd12 (patch) | |
tree | 7a520322212d48ebcb9fbe1087e7fca28b76185c /src/kernel/qscriptengine_x11.cpp | |
download | qt3-bd0f3345a938b35ce6a12f6150373b0955b8dd12.tar.gz qt3-bd0f3345a938b35ce6a12f6150373b0955b8dd12.zip |
Add Qt3 development HEAD version
Diffstat (limited to 'src/kernel/qscriptengine_x11.cpp')
-rw-r--r-- | src/kernel/qscriptengine_x11.cpp | 3752 |
1 files changed, 3752 insertions, 0 deletions
diff --git a/src/kernel/qscriptengine_x11.cpp b/src/kernel/qscriptengine_x11.cpp new file mode 100644 index 0000000..7d2b77d --- /dev/null +++ b/src/kernel/qscriptengine_x11.cpp @@ -0,0 +1,3752 @@ +/**************************************************************************** +** +** Copyright (C) 2003-2008 Trolltech ASA. All rights reserved. +** +** This file is part of the kernel module of the Qt GUI Toolkit. +** +** This file may be used under the terms of the GNU General +** Public License versions 2.0 or 3.0 as published by the Free +** Software Foundation and appearing in the files LICENSE.GPL2 +** and LICENSE.GPL3 included in the packaging of this file. +** Alternatively you may (at your option) use any later version +** of the GNU General Public License if such license has been +** publicly approved by Trolltech ASA (or its successors, if any) +** and the KDE Free Qt Foundation. +** +** Please review the following information to ensure GNU General +** Public Licensing requirements will be met: +** http://trolltech.com/products/qt/licenses/licensing/opensource/. +** If you are unsure which license is appropriate for your use, please +** review the following information: +** http://trolltech.com/products/qt/licenses/licensing/licensingoverview +** or contact the sales department at sales@trolltech.com. +** +** This file may be used under the terms of the Q Public License as +** defined by Trolltech ASA and appearing in the file LICENSE.QPL +** included in the packaging of this file. Licensees holding valid Qt +** Commercial licenses may use this file in accordance with the Qt +** Commercial License Agreement provided with the Software. +** +** This file is provided "AS IS" with NO WARRANTY OF ANY KIND, +** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR +** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted +** herein. +** +**********************************************************************/ + +// ------------------------------------------------------------------------------------------------------------------ +// +// Continuation of middle eastern languages +// +// ------------------------------------------------------------------------------------------------------------------ + +// #### stil missing: identify invalid character combinations +static bool syriac_shape(QShaperItem *item) +{ + Q_ASSERT(item->script == QFont::Syriac); + +#ifndef QT_NO_XFTFREETYPE + QOpenType *openType = item->font->openType(); + if (openType && openType->supportsScript(QFont::Syriac)) { + bool ot_ok; + if (arabicSyriacOpenTypeShape(openType, item, &ot_ok)) + return true; + if (ot_ok) + return false; + // fall through to the non OT code + } +#endif + return basic_shape(item); +} + + +static bool thaana_shape(QShaperItem *item) +{ + Q_ASSERT(item->script == QFont::Thaana); + +#ifndef QT_NO_XFTFREETYPE + QOpenType *openType = item->font->openType(); + + if (openType && openType->supportsScript(item->script)) { + openType->selectScript(QFont::Thaana); + if (item->font->stringToCMap(item->string->unicode()+item->from, item->length, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + heuristicSetGlyphAttributes(item); + openType->shape(item); + return openType->positionAndAdd(item); + } +#endif + return basic_shape(item); +} + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Indic languages +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + +enum Form { + Invalid = 0x0, + Unknown = Invalid, + Consonant, + Nukta, + Halant, + Matra, + VowelMark, + StressMark, + IndependentVowel, + LengthMark, + Control, + Other +}; + +static const unsigned char indicForms[0xe00-0x900] = { + // Devangari + Invalid, VowelMark, VowelMark, VowelMark, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Nukta, Other, Matra, Matra, + + Matra, Matra, Matra, Matra, + Matra, Matra, Matra, Matra, + Matra, Matra, Matra, Matra, + Matra, Halant, Unknown, Unknown, + + Other, StressMark, StressMark, StressMark, + StressMark, Unknown, Unknown, Unknown, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + IndependentVowel, IndependentVowel, VowelMark, VowelMark, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Consonant, + Consonant, Consonant /* ??? */, Consonant, Consonant, + + // Bengali + Invalid, VowelMark, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, Invalid, Invalid, IndependentVowel, + + IndependentVowel, Invalid, Invalid, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Invalid, Consonant, Invalid, + Invalid, Invalid, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Nukta, Other, Matra, Matra, + + Matra, Matra, Matra, Matra, + Matra, Invalid, Invalid, Matra, + Matra, Invalid, Invalid, Matra, + Matra, Halant, Consonant, Unknown, + + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, VowelMark, + Invalid, Invalid, Invalid, Invalid, + Consonant, Consonant, Invalid, Consonant, + + IndependentVowel, IndependentVowel, VowelMark, VowelMark, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Consonant, Consonant, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + // Gurmukhi + Invalid, VowelMark, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, Invalid, + Invalid, Invalid, Invalid, IndependentVowel, + + IndependentVowel, Invalid, Invalid, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Invalid, Consonant, Consonant, + Invalid, Consonant, Consonant, Invalid, + Consonant, Consonant, Unknown, Unknown, + Nukta, Other, Matra, Matra, + + Matra, Matra, Matra, Invalid, + Invalid, Invalid, Invalid, Matra, + Matra, Invalid, Invalid, Matra, + Matra, Halant, Unknown, Unknown, + + Invalid, Invalid, Invalid, Invalid, + Invalid, Unknown, Unknown, Unknown, + Invalid, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Invalid, + + Other, Other, Invalid, Invalid, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + StressMark, StressMark, Consonant, Consonant, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + // Gujarati + Invalid, VowelMark, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, Invalid, IndependentVowel, + + IndependentVowel, IndependentVowel, Invalid, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Invalid, Consonant, Consonant, + Invalid, Consonant, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Nukta, Other, Matra, Matra, + + Matra, Matra, Matra, Matra, + Matra, Matra, Invalid, Matra, + Matra, Matra, Invalid, Matra, + Matra, Halant, Unknown, Unknown, + + Other, Unknown, Unknown, Unknown, + Unknown, Unknown, Unknown, Unknown, + Unknown, Unknown, Unknown, Unknown, + Unknown, Unknown, Unknown, Unknown, + + IndependentVowel, IndependentVowel, VowelMark, VowelMark, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + // Oriya + Invalid, VowelMark, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, Invalid, Invalid, IndependentVowel, + + IndependentVowel, Invalid, Invalid, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Invalid, Consonant, Consonant, + Invalid, Consonant, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Nukta, Other, Matra, Matra, + + Matra, Matra, Matra, Matra, + Invalid, Invalid, Invalid, Matra, + Matra, Invalid, Invalid, Matra, + Matra, Halant, Unknown, Unknown, + + Other, Invalid, Invalid, Invalid, + Invalid, Unknown, LengthMark, LengthMark, + Invalid, Invalid, Invalid, Invalid, + Consonant, Consonant, Invalid, Consonant, + + IndependentVowel, IndependentVowel, Invalid, Invalid, + Invalid, Invalid, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Other, Consonant, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + //Tamil + Invalid, Invalid, VowelMark, Other, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, Invalid, + Invalid, Invalid, IndependentVowel, IndependentVowel, + + IndependentVowel, Invalid, IndependentVowel, IndependentVowel, + IndependentVowel, Consonant, Invalid, Invalid, + Invalid, Consonant, Consonant, Invalid, + Consonant, Invalid, Consonant, Consonant, + + Invalid, Invalid, Invalid, Consonant, + Consonant, Invalid, Invalid, Invalid, + Consonant, Consonant, Consonant, Invalid, + Invalid, Invalid, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Invalid, Invalid, Matra, Matra, + + Matra, Matra, Matra, Invalid, + Invalid, Invalid, Matra, Matra, + Matra, Invalid, Matra, Matra, + Matra, Halant, Invalid, Invalid, + + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, LengthMark, + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, Invalid, + + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + // Telugu + Invalid, VowelMark, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, Invalid, IndependentVowel, IndependentVowel, + + IndependentVowel, Invalid, IndependentVowel, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Invalid, Consonant, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Invalid, Invalid, Matra, Matra, + + Matra, Matra, Matra, Matra, + Matra, Invalid, Matra, Matra, + Matra, Invalid, Matra, Matra, + Matra, Halant, Invalid, Invalid, + + Invalid, Invalid, Invalid, Invalid, + Invalid, LengthMark, Matra, Invalid, + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, Invalid, + + IndependentVowel, IndependentVowel, Invalid, Invalid, + Invalid, Invalid, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + // Kannada + Invalid, Invalid, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, Invalid, IndependentVowel, IndependentVowel, + + IndependentVowel, Invalid, IndependentVowel, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Invalid, Consonant, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Nukta, Other, Matra, Matra, + + Matra, Matra, Matra, Matra, + Matra, Invalid, Matra, Matra, + Matra, Invalid, Matra, Matra, + Matra, Halant, Invalid, Invalid, + + Invalid, Invalid, Invalid, Invalid, + Invalid, LengthMark, LengthMark, Invalid, + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Consonant, Invalid, + + IndependentVowel, IndependentVowel, VowelMark, VowelMark, + Invalid, Invalid, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + // Malayalam + Invalid, Invalid, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, Invalid, IndependentVowel, IndependentVowel, + + IndependentVowel, Invalid, IndependentVowel, IndependentVowel, + IndependentVowel, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Unknown, Unknown, + Invalid, Invalid, Matra, Matra, + + Matra, Matra, Matra, Matra, + Invalid, Invalid, Matra, Matra, + Matra, Invalid, Matra, Matra, + Matra, Halant, Invalid, Invalid, + + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, LengthMark, + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, Invalid, + + IndependentVowel, IndependentVowel, Invalid, Invalid, + Invalid, Invalid, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, + + // Sinhala + Invalid, Invalid, VowelMark, VowelMark, + Invalid, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + + IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel, + IndependentVowel, IndependentVowel, IndependentVowel, Invalid, + Invalid, Invalid, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + + Consonant, Consonant, Invalid, Consonant, + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Consonant, + Invalid, Consonant, Invalid, Invalid, + + Consonant, Consonant, Consonant, Consonant, + Consonant, Consonant, Consonant, Invalid, + Invalid, Invalid, Halant, Invalid, + Invalid, Invalid, Invalid, Matra, + + Matra, Matra, Matra, Matra, + Matra, Invalid, Matra, Invalid, + Matra, Matra, Matra, Matra, + Matra, Matra, Matra, Matra, + + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, Invalid, + Invalid, Invalid, Invalid, Invalid, + + Invalid, Invalid, Matra, Matra, + Other, Other, Other, Other, + Other, Other, Other, Other, + Other, Other, Other, Other, +}; + +enum Position { + None, + Pre, + Above, + Below, + Post, + Split, + Base, + Reph, + Vattu, + Inherit +}; + +static const unsigned char indicPosition[0xe00-0x900] = { + // Devanagari + None, Above, Above, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + Below, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, Post, Pre, + + Post, Below, Below, Below, + Below, Above, Above, Above, + Above, Post, Post, Post, + Post, None, None, None, + + None, Above, Below, Above, + Above, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, Below, Below, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Bengali + None, Above, Post, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + Below, None, None, Post, + + Below, None, None, None, + None, None, None, None, + None, None, None, None, + Below, None, Post, Pre, + + Post, Below, Below, Below, + Below, None, None, Pre, + Pre, None, None, Split, + Split, Below, None, None, + + None, None, None, None, + None, None, None, Post, + None, None, None, None, + None, None, None, None, + + None, None, Below, Below, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Gurmukhi + None, Above, Above, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, Post, + + Below, None, None, None, + None, Below, None, None, + None, Below, None, None, + Below, None, Post, Pre, + + Post, Below, Below, None, + None, None, None, Above, + Above, None, None, Above, + Above, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + Above, Above, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Gujarati + None, Above, Above, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + Below, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, Post, Pre, + + Post, Below, Below, Below, + Below, Above, None, Above, + Above, Post, None, Post, + Post, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, Below, Below, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Oriya + None, Above, Post, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + Below, None, None, None, + Below, None, None, None, + Below, Below, Below, Post, + + Below, None, Below, Below, + None, None, None, None, + None, None, None, None, + None, None, Post, Above, + + Post, Below, Below, Below, + None, None, None, Pre, + Split, None, None, Split, + Split, None, None, None, + + None, None, None, None, + None, None, Above, Post, + None, None, None, None, + None, None, None, Post, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, Below, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Tamil + None, None, Above, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, Post, Post, + + Above, Below, Below, None, + None, None, Pre, Pre, + Pre, None, Split, Split, + Split, Halant, None, None, + + None, None, None, None, + None, None, None, Post, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Telugu + None, Post, Post, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, Below, Below, Below, + Below, Below, Below, Below, + Below, Below, Below, Below, + + Below, Below, Below, Below, + Below, Below, Below, Below, + Below, None, Below, Below, + Below, Below, Below, Below, + + Below, None, Below, Below, + None, Below, Below, Below, + Below, Below, None, None, + None, None, Post, Above, + + Above, Post, Post, Post, + Post, None, Above, Above, + Split, None, Post, Above, + Above, Halant, None, None, + + None, None, None, None, + None, Above, Below, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Kannada + None, None, Post, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, Below, Below, Below, + Below, Below, Below, Below, + Below, Below, Below, Below, + + Below, Below, Below, Below, + Below, Below, Below, Below, + Below, Below, Below, Below, + Below, Below, Below, Below, + + Below, None, Below, Below, + None, Below, Below, Below, + Below, Below, None, None, + None, None, Post, Above, + + Split, Post, Post, Post, + Post, None, Above, Split, + Split, None, Split, Split, + Above, Halant, None, None, + + None, None, None, None, + None, Post, Post, None, + None, None, None, None, + None, None, Below, None, + + None, None, Below, Below, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Malayalam + None, None, Post, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, Post, + + Post, None, Below, None, + None, Post, None, None, + None, None, None, None, + None, None, Post, Post, + + Post, Post, Post, Post, + None, None, Pre, Pre, + Pre, None, Split, Split, + Split, Halant, None, None, + + None, None, None, None, + None, None, None, Post, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + // Sinhala + None, None, Post, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, Post, + + Post, Post, Above, Above, + Below, None, Below, None, + Post, Pre, Split, Pre, + Split, Split, Split, Post, + + None, None, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None, + + None, None, Post, Post, + None, None, None, None, + None, None, None, None, + None, None, None, None +}; + +static inline Form form(unsigned short uc) { + if (uc < 0x900 || uc > 0xdff) { + if (uc == 0x25cc) + return Consonant; + if (uc == 0x200c || uc == 0x200d) + return Control; + return Other; + } + return (Form)indicForms[uc-0x900]; +} + +static inline Position indic_position(unsigned short uc) { + if (uc < 0x900 || uc > 0xdff) + return None; + return (Position) indicPosition[uc-0x900]; +} + + +enum IndicScriptProperties { + HasReph = 0x01, + HasSplit = 0x02 +}; + +const uchar scriptProperties[10] = { + // Devanagari, + HasReph, + // Bengali, + HasReph|HasSplit, + // Gurmukhi, + 0, + // Gujarati, + HasReph, + // Oriya, + HasReph|HasSplit, + // Tamil, + HasSplit, + // Telugu, + HasSplit, + // Kannada, + HasSplit|HasReph, + // Malayalam, + HasSplit, + // Sinhala, + HasSplit +}; + +struct IndicOrdering { + Form form; + Position position; +}; + +static const IndicOrdering devanagari_order [] = { + { Consonant, Below }, + { Matra, Below }, + { VowelMark, Below }, + { StressMark, Below }, + { Matra, Above }, + { Matra, Post }, + { Consonant, Reph }, + { VowelMark, Above }, + { StressMark, Above }, + { VowelMark, Post }, + { (Form)0, None } +}; + +static const IndicOrdering bengali_order [] = { + { Consonant, Below }, + { Matra, Below }, + { Matra, Above }, + { Consonant, Reph }, + { VowelMark, Above }, + { Consonant, Post }, + { Matra, Post }, + { VowelMark, Post }, + { (Form)0, None } +}; + +static const IndicOrdering gurmukhi_order [] = { + { Consonant, Below }, + { Matra, Below }, + { Matra, Above }, + { Consonant, Post }, + { Matra, Post }, + { VowelMark, Above }, + { (Form)0, None } +}; + +static const IndicOrdering tamil_order [] = { + { Matra, Above }, + { Matra, Post }, + { VowelMark, Post }, + { (Form)0, None } +}; + +static const IndicOrdering telugu_order [] = { + { Matra, Above }, + { Matra, Below }, + { Matra, Post }, + { Consonant, Below }, + { Consonant, Post }, + { VowelMark, Post }, + { (Form)0, None } +}; + +static const IndicOrdering kannada_order [] = { + { Matra, Above }, + { Matra, Post }, + { Consonant, Below }, + { Consonant, Post }, + { LengthMark, Post }, + { Consonant, Reph }, + { VowelMark, Post }, + { (Form)0, None } +}; + +static const IndicOrdering malayalam_order [] = { + { Consonant, Below }, + { Matra, Below }, + { Consonant, Reph }, + { Consonant, Post }, + { Matra, Post }, + { VowelMark, Post }, + { (Form)0, None } +}; + +static const IndicOrdering sinhala_order [] = { + { Matra, Below }, + { Matra, Above }, + { Matra, Post }, + { VowelMark, Post }, + { (Form)0, None } +}; + +static const IndicOrdering * const indic_order[] = { + devanagari_order, // Devanagari + bengali_order, // Bengali + gurmukhi_order, // Gurmukhi + devanagari_order, // Gujarati + bengali_order, // Oriya + tamil_order, // Tamil + telugu_order, // Telugu + kannada_order, // Kannada + malayalam_order, // Malayalam + sinhala_order // Sinhala +}; + + + +// vowel matras that have to be split into two parts. +static const unsigned short split_matras[] = { + // matra, split1, split2 + + // bengalis + 0x9cb, 0x9c7, 0x9be, + 0x9cc, 0x9c7, 0x9d7, + // oriya + 0xb48, 0xb47, 0xb56, + 0xb4b, 0xb47, 0xb3e, + 0xb4c, 0xb47, 0xb57, + // tamil + 0xbca, 0xbc6, 0xbbe, + 0xbcb, 0xbc7, 0xbbe, + 0xbcc, 0xbc6, 0xbd7, + // telugu + 0xc48, 0xc46, 0xc56, + // kannada + 0xcc0, 0xcbf, 0xcd5, + 0xcc7, 0xcc6, 0xcd5, + 0xcc8, 0xcc6, 0xcd6, + 0xcca, 0xcc6, 0xcc2, + 0xccb, 0xcca, 0xcd5, + // malayalam + 0xd4a, 0xd46, 0xd3e, + 0xd4b, 0xd47, 0xd3e, + 0xd4c, 0xd46, 0xd57, + // sinhala + 0xdda, 0xdd9, 0xdca, + 0xddc, 0xdd9, 0xdcf, + 0xddd, 0xddc, 0xdca, + 0xdde, 0xdd9, 0xddf, + 0xffff +}; + +static inline void splitMatra(unsigned short *reordered, int matra, int &len, int &base) +{ + unsigned short matra_uc = reordered[matra]; + //qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]); + + const unsigned short *split = split_matras; + while (split[0] < matra_uc) + split += 3; + + assert(*split == matra_uc); + ++split; + + if (indic_position(*split) == Pre) { + reordered[matra] = split[1]; + memmove(reordered + 1, reordered, len*sizeof(unsigned short)); + reordered[0] = split[0]; + base++; + } else { + memmove(reordered + matra + 1, reordered + matra, (len-matra)*sizeof(unsigned short)); + reordered[matra] = split[0]; + reordered[matra+1] = split[1]; + } + len++; +} + +enum IndicProperties { + // these two are already defined +// CcmpProperty = 0x1, +// InitProperty = 0x2, + NuktaProperty = 0x4, + AkhantProperty = 0x8, + RephProperty = 0x10, + PreFormProperty = 0x20, + BelowFormProperty = 0x40, + AboveFormProperty = 0x80, + HalfFormProperty = 0x100, + PostFormProperty = 0x200, + VattuProperty = 0x400, + PreSubstProperty = 0x800, + BelowSubstProperty = 0x1000, + AboveSubstProperty = 0x2000, + PostSubstProperty = 0x4000, + HalantProperty = 0x8000, + CligProperty = 0x10000 +}; + +#ifndef QT_NO_XFTFREETYPE +static const QOpenType::Features indic_features[] = { + { FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { FT_MAKE_TAG('i', 'n', 'i', 't'), InitProperty }, + { FT_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty }, + { FT_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty }, + { FT_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty }, + { FT_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty }, + { FT_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty }, + { FT_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty }, + { FT_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty }, + { FT_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty }, + { FT_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty }, + { FT_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty }, + { FT_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty }, + { FT_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty }, + { 0, 0 } +}; +#endif + +// #define INDIC_DEBUG +#ifdef INDIC_DEBUG +#define IDEBUG qDebug +#else +#define IDEBUG if(0) qDebug +#endif + +#ifdef INDIC_DEBUG +static QString propertiesToString(int properties) +{ + QString res; + properties = ~properties; + if (properties & CcmpProperty) + res += "Ccmp "; + if (properties & InitProperty) + res += "Init "; + if (properties & NuktaProperty) + res += "Nukta "; + if (properties & AkhantProperty) + res += "Akhant "; + if (properties & RephProperty) + res += "Reph "; + if (properties & PreFormProperty) + res += "PreForm "; + if (properties & BelowFormProperty) + res += "BelowForm "; + if (properties & AboveFormProperty) + res += "AboveForm "; + if (properties & HalfFormProperty) + res += "HalfForm "; + if (properties & PostFormProperty) + res += "PostForm "; + if (properties & VattuProperty) + res += "Vattu "; + if (properties & PreSubstProperty) + res += "PreSubst "; + if (properties & BelowSubstProperty) + res += "BelowSubst "; + if (properties & AboveSubstProperty) + res += "AboveSubst "; + if (properties & PostSubstProperty) + res += "PostSubst "; + if (properties & HalantProperty) + res += "Halant "; + if (properties & CligProperty) + res += "Clig "; + return res; +} +#endif + +static bool indic_shape_syllable(QOpenType *openType, QShaperItem *item, bool invalid) +{ + Q_UNUSED(openType) + int script = item->script; + Q_ASSERT(script >= QFont::Devanagari && script <= QFont::Sinhala); + const unsigned short script_base = 0x0900 + 0x80*(script-QFont::Devanagari); + const unsigned short ra = script_base + 0x30; + const unsigned short halant = script_base + 0x4d; + const unsigned short nukta = script_base + 0x3c; + + int len = item->length; + IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->from, item->length, invalid); + + if (item->num_glyphs < len+4) { + item->num_glyphs = len+4; + return FALSE; + } + + QVarLengthArray<unsigned short> reordered(len+4); + QVarLengthArray<unsigned char> position(len+4); + + unsigned char properties = scriptProperties[script-QFont::Devanagari]; + + if (invalid) { + *reordered.data() = 0x25cc; + memcpy(reordered.data()+1, item->string->unicode() + item->from, len*sizeof(QChar)); + len++; + } else { + memcpy(reordered.data(), item->string->unicode() + item->from, len*sizeof(QChar)); + } + if (reordered[len-1] == 0x200c) // zero width non joiner + len--; + + int i; + int base = 0; + int reph = -1; + +#ifdef INDIC_DEBUG + IDEBUG("original:"); + for (i = 0; i < len; i++) { + IDEBUG(" %d: %4x", i, reordered[i]); + } +#endif + + if (len != 1) { + unsigned short *uc = reordered.data(); + bool beginsWithRa = FALSE; + + // Rule 1: find base consonant + // + // The shaping engine finds the base consonant of the + // syllable, using the following algorithm: starting from the + // end of the syllable, move backwards until a consonant is + // found that does not have a below-base or post-base form + // (post-base forms have to follow below-base forms), or + // arrive at the first consonant. The consonant stopped at + // will be the base. + // + // * If the syllable starts with Ra + H (in a script that has + // 'Reph'), Ra is excluded from candidates for base + // consonants. + // + // * In Kannada and Telugu, the base consonant cannot be + // farther than 3 consonants from the end of the syllable. + // #### replace the HasReph property by testing if the feature exists in the font! + if (form(*uc) == Consonant || (script == QFont::Bengali && form(*uc) == IndependentVowel)) { + beginsWithRa = (properties & HasReph) && ((len > 2) && *uc == ra && *(uc+1) == halant); + + if (beginsWithRa && form(*(uc+2)) == Control) + beginsWithRa = FALSE; + + base = (beginsWithRa ? 2 : 0); + IDEBUG(" length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base); + + int lastConsonant = 0; + int matra = -1; + // we remember: + // * the last consonant since we need it for rule 2 + // * the matras position for rule 3 and 4 + + // figure out possible base glyphs + memset(position.data(), 0, len); + if (script == QFont::Devanagari || script == QFont::Gujarati) { + bool vattu = FALSE; + for (i = base; i < len; ++i) { + position[i] = form(uc[i]); + if (position[i] == Consonant) { + lastConsonant = i; + vattu = (!vattu && uc[i] == ra); + if (vattu) { + IDEBUG("excluding vattu glyph at %d from base candidates", i); + position[i] = Vattu; + } + } else if (position[i] == Matra) { + matra = i; + } + } + } else { + for (i = base; i < len; ++i) { + position[i] = form(uc[i]); + if (position[i] == Consonant) + lastConsonant = i; + else if (matra < 0 && position[i] == Matra) + matra = i; + } + } + int skipped = 0; + Position pos = Post; + for (i = len-1; i > base; i--) { + if (position[i] != Consonant && (position[i] != Control || script == QFont::Kannada)) + continue; + + Position charPosition = indic_position(uc[i]); + if (pos == Post && charPosition == Post) { + pos = Post; + } else if ((pos == Post || pos == Below) && charPosition == Below) { + if (script == QFont::Devanagari || script == QFont::Gujarati) + base = i; + pos = Below; + } else { + base = i; + break; + } + if (skipped == 2 && (script == QFont::Kannada || script == QFont::Telugu)) { + base = i; + break; + } + ++skipped; + } + + IDEBUG(" base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant); + + // Rule 2: + // + // If the base consonant is not the last one, Uniscribe + // moves the halant from the base consonant to the last + // one. + if (lastConsonant > base) { + int halantPos = 0; + if (uc[base+1] == halant) + halantPos = base + 1; + else if (uc[base+1] == nukta && uc[base+2] == halant) + halantPos = base + 2; + if (halantPos > 0) { + IDEBUG(" moving halant from %d to %d!", base+1, lastConsonant); + for (i = halantPos; i < lastConsonant; i++) + uc[i] = uc[i+1]; + uc[lastConsonant] = halant; + } + } + + // Rule 3: + // + // If the syllable starts with Ra + H, Uniscribe moves + // this combination so that it follows either: + + // * the post-base 'matra' (if any) or the base consonant + // (in scripts that show similarity to Devanagari, i.e., + // Devanagari, Gujarati, Bengali) + // * the base consonant (other scripts) + // * the end of the syllable (Kannada) + + Position matra_position = None; + if (matra > 0) + matra_position = indic_position(uc[matra]); + IDEBUG(" matra at %d with form %d, base=%d", matra, matra_position, base); + + if (beginsWithRa && base != 0) { + int toPos = base+1; + if (toPos < len && uc[toPos] == nukta) + toPos++; + if (toPos < len && uc[toPos] == halant) + toPos++; + if (toPos < len && uc[toPos] == 0x200d) + toPos++; + if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant) + toPos += 2; + if (script == QFont::Devanagari || script == QFont::Gujarati || script == QFont::Bengali) { + if (matra_position == Post || matra_position == Split) { + toPos = matra+1; + matra -= 2; + } + } else if (script == QFont::Kannada) { + toPos = len; + matra -= 2; + } + + IDEBUG("moving leading ra+halant to position %d", toPos); + for (i = 2; i < toPos; i++) + uc[i-2] = uc[i]; + uc[toPos-2] = ra; + uc[toPos-1] = halant; + base -= 2; + if (properties & HasReph) + reph = toPos-2; + } + + // Rule 4: + + // Uniscribe splits two- or three-part matras into their + // parts. This splitting is a character-to-character + // operation). + // + // Uniscribe describes some moving operations for these + // matras here. For shaping however all pre matras need + // to be at the begining of the syllable, so we just move + // them there now. + if (matra_position == Split) { + splitMatra(uc, matra, len, base); + // Handle three-part matras (0xccb in Kannada) + matra_position = indic_position(uc[matra]); + if (matra_position == Split) + splitMatra(uc, matra, len, base); + } else if (matra_position == Pre) { + unsigned short m = uc[matra]; + while (matra--) + uc[matra+1] = uc[matra]; + uc[0] = m; + base++; + } + } + + // Rule 5: + // + // Uniscribe classifies consonants and 'matra' parts as + // pre-base, above-base (Reph), below-base or post-base. This + // classification exists on the character code level and is + // language-dependent, not font-dependent. + for (i = 0; i < base; ++i) + position[i] = Pre; + position[base] = Base; + for (i = base+1; i < len; ++i) { + position[i] = indic_position(uc[i]); + // #### replace by adjusting table + if (uc[i] == nukta || uc[i] == halant) + position[i] = Inherit; + } + if (reph > 0) { + // recalculate reph, it might have changed. + for (i = base+1; i < len; ++i) + if (uc[i] == ra) + reph = i; + position[reph] = Reph; + position[reph+1] = Inherit; + } + + // all reordering happens now to the chars after the base + int fixed = base+1; + if (fixed < len && uc[fixed] == nukta) + fixed++; + if (fixed < len && uc[fixed] == halant) + fixed++; + if (fixed < len && uc[fixed] == 0x200d) + fixed++; + +#ifdef INDIC_DEBUG + for (i = fixed; i < len; ++i) + IDEBUG("position[%d] = %d, form=%d", i, position[i], form(uc[i])); +#endif + // we continuosly position the matras and vowel marks and increase the fixed + // until we reached the end. + const IndicOrdering *finalOrder = indic_order[script-QFont::Devanagari]; + + IDEBUG(" reordering pass:"); + //IDEBUG(" base=%d fixed=%d", base, fixed); + int toMove = 0; + while (finalOrder[toMove].form && fixed < len-1) { + //IDEBUG(" fixed = %d, moving form %d with pos %d", fixed, finalOrder[toMove].form, finalOrder[toMove].position); + for (i = fixed; i < len; i++) { + if (form(uc[i]) == finalOrder[toMove].form && + position[i] == finalOrder[toMove].position) { + // need to move this glyph + int to = fixed; + if (i < len-1 && position[i+1] == Inherit) { + IDEBUG(" moving two chars from %d to %d", i, to); + unsigned short ch = uc[i]; + unsigned short ch2 = uc[i+1]; + unsigned char pos = position[i]; + for (int j = i+1; j > to+1; j--) { + uc[j] = uc[j-2]; + position[j] = position[j-2]; + } + uc[to] = ch; + uc[to+1] = ch2; + position[to] = pos; + position[to+1] = pos; + fixed += 2; + } else { + IDEBUG(" moving one char from %d to %d", i, to); + unsigned short ch = uc[i]; + unsigned char pos = position[i]; + for (int j = i; j > to; j--) { + uc[j] = uc[j-1]; + position[j] = position[j-1]; + } + uc[to] = ch; + position[to] = pos; + fixed++; + } + } + } + toMove++; + } + + } + + if (reph > 0) { + // recalculate reph, it might have changed. + for (i = base+1; i < len; ++i) + if (reordered[i] == ra) + reph = i; + } + + if (item->font->stringToCMap((const QChar *)reordered.data(), len, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + + + IDEBUG(" base=%d, reph=%d", base, reph); + IDEBUG("reordered:"); + for (i = 0; i < len; i++) { + item->attributes[i].mark = FALSE; + item->attributes[i].clusterStart = FALSE; + item->attributes[i].justification = 0; + item->attributes[i].zeroWidth = FALSE; + IDEBUG(" %d: %4x", i, reordered[i]); + } + + // now we have the syllable in the right order, and can start running it through open type. + + bool control = FALSE; + for (i = 0; i < len; ++i) + control |= (form(reordered[i]) == Control); + +#ifndef QT_NO_XFTFREETYPE + if (openType) { + + // we need to keep track of where the base glyph is for some + // scripts and use the cluster feature for this. This + // also means we have to correct the logCluster output from + // the open type engine manually afterwards. for indic this + // is rather simple, as all chars just point to the first + // glyph in the syllable. + QVarLengthArray<unsigned short> clusters(len); + QVarLengthArray<unsigned int> properties(len); + + for (i = 0; i < len; ++i) + clusters[i] = i; + + // features we should always apply + for (i = 0; i < len; ++i) + properties[i] = ~(CcmpProperty + | NuktaProperty + | VattuProperty + | PreSubstProperty + | BelowSubstProperty + | AboveSubstProperty + | HalantProperty + | PositioningProperties); + + // Ccmp always applies + // Init + if (item->from == 0 + || !(item->string->unicode()[item->from-1].isLetter() || item->string->unicode()[item->from-1].isMark())) + properties[0] &= ~InitProperty; + + // Nukta always applies + // Akhant + for (i = 0; i <= base; ++i) + properties[i] &= ~AkhantProperty; + // Reph + if (reph >= 0) { + properties[reph] &= ~RephProperty; + properties[reph+1] &= ~RephProperty; + } + // BelowForm + for (i = base+1; i < len; ++i) + properties[i] &= ~BelowFormProperty; + + if (script == QFont::Devanagari || script == QFont::Gujarati) { + // vattu glyphs need this aswell + bool vattu = FALSE; + for (i = base-2; i > 1; --i) { + if (form(reordered[i]) == Consonant) { + vattu = (!vattu && reordered[i] == ra); + if (vattu) { + IDEBUG("forming vattu ligature at %d", i); + properties[i] &= ~BelowFormProperty; + properties[i+1] &= ~BelowFormProperty; + } + } + } + } + // HalfFormProperty + for (i = 0; i < base; ++i) + properties[i] &= ~HalfFormProperty; + if (control) { + for (i = 2; i < len; ++i) { + if (reordered[i] == 0x200d /* ZWJ */) { + properties[i-1] &= ~HalfFormProperty; + properties[i-2] &= ~HalfFormProperty; + } else if (reordered[i] == 0x200c /* ZWNJ */) { + properties[i-1] &= ~HalfFormProperty; + properties[i-2] &= ~HalfFormProperty; + } + } + } + // PostFormProperty + for (i = base+1; i < len; ++i) + properties[i] &= ~PostFormProperty; + // vattu always applies + // pres always applies + // blws always applies + // abvs always applies + + // psts + // ### this looks slightly different from before, but I believe it's correct + if (reordered[len-1] != halant || base != len-2) + properties[base] &= ~PostSubstProperty; + for (i = base+1; i < len; ++i) + properties[i] &= ~PostSubstProperty; + + // halant always applies + +#ifdef INDIC_DEBUG + { + IDEBUG("OT properties:"); + for (int i = 0; i < len; ++i) + qDebug(" i: %s", ::propertiesToString(properties[i]).toLatin1().data()); + } +#endif + + // initialize + item->log_clusters = clusters.data(); + openType->shape(item, properties.data()); + + int newLen = openType->len(); + OTL_GlyphItem otl_glyphs = openType->glyphs(); + + // move the left matra back to it's correct position in malayalam and tamil + if ((script == QFont::Malayalam || script == QFont::Tamil) && (form(reordered[0]) == Matra)) { +// qDebug("reordering matra, len=%d", newLen); + // need to find the base in the shaped string and move the matra there + int basePos = 0; + while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base) + basePos++; + --basePos; + if (basePos < newLen && basePos > 1) { +// qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen); + OTL_GlyphItemRec m = otl_glyphs[0]; + --basePos; + for (i = 0; i < basePos; ++i) + otl_glyphs[i] = otl_glyphs[i+1]; + otl_glyphs[basePos] = m; + } + } + + if (!openType->positionAndAdd(item, FALSE)) + return FALSE; + + if (control) { + IDEBUG("found a control char in the syllable"); + int i = 0, j = 0; + while (i < item->num_glyphs) { + if (form(reordered[otl_glyphs[i].cluster]) == Control) { + ++i; + if (i >= item->num_glyphs) + break; + } + item->glyphs[j] = item->glyphs[i]; + ++i; + ++j; + } + item->num_glyphs = j; + } + + } +#endif + + item->attributes[0].clusterStart = TRUE; + IDEBUG("<<<<<<"); + return TRUE; +} + + +/* syllables are of the form: + + (Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark? + (Consonant Nukta? Halant)* Consonant Halant + IndependentVowel VowelMark? StressMark? + + We return syllable boundaries on invalid combinations aswell +*/ +static int indic_nextSyllableBoundary(int script, const QString &s, int start, int end, bool *invalid) +{ + *invalid = FALSE; + IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end); + const QChar *uc = s.unicode()+start; + + int pos = 0; + Form state = form(uc[pos].unicode()); + IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos].unicode()); + pos++; + + if (state != Consonant && state != IndependentVowel) { + if (state != Other) + *invalid = TRUE; + goto finish; + } + + while (pos < end - start) { + Form newState = form(uc[pos].unicode()); + IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos].unicode()); + switch(newState) { + case Control: + newState = state; + if (state == Halant && uc[pos].unicode() == 0x200d /* ZWJ */) + break; + // the control character should be the last char in the item + ++pos; + goto finish; + case Consonant: + if (state == Halant && (script != QFont::Sinhala || uc[pos-1].unicode() == 0x200d /* ZWJ */)) + break; + goto finish; + case Halant: + if (state == Nukta || state == Consonant) + break; + // Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya + if (script == QFont::Bengali && pos == 1 && + (uc[0].unicode() == 0x0985 || uc[0].unicode() == 0x098f)) + break; + goto finish; + case Nukta: + if (state == Consonant) + break; + goto finish; + case StressMark: + if (state == VowelMark) + break; + // fall through + case VowelMark: + if (state == Matra || state == IndependentVowel) + break; + // fall through + case Matra: + if (state == Consonant || state == Nukta) + break; + // ### not sure if this is correct. If it is, does it apply only to Bengali or should + // it work for all Indic languages? + // the combination Independent_A + Vowel Sign AA is allowed. + if (script == QFont::Bengali && uc[pos].unicode() == 0x9be && uc[pos-1].unicode() == 0x985) + break; + if (script == QFont::Tamil && state == Matra) { + if (uc[pos-1].unicode() == 0x0bc6 && + (uc[pos].unicode() == 0xbbe || uc[pos].unicode() == 0xbd7)) + break; + if (uc[pos-1].unicode() == 0x0bc7 && uc[pos].unicode() == 0xbbe) + break; + } + goto finish; + + case LengthMark: + case IndependentVowel: + case Invalid: + case Other: + goto finish; + } + state = newState; + pos++; + } + finish: + return pos+start; +} + +static bool indic_shape(QShaperItem *item) +{ + Q_ASSERT(item->script >= QFont::Devanagari && item->script <= QFont::Sinhala); + +#ifndef QT_NO_XFTFREETYPE + QOpenType *openType = item->font->openType(); + if (openType) + openType->selectScript(item->script, indic_features); +#else + QOpenType *openType = 0; +#endif + unsigned short *logClusters = item->log_clusters; + + QShaperItem syllable = *item; + int first_glyph = 0; + + int sstart = item->from; + int end = sstart + item->length; + IDEBUG("indic_shape: from %d length %d", item->from, item->length); + while (sstart < end) { + bool invalid; + int send = indic_nextSyllableBoundary(item->script, *item->string, sstart, end, &invalid); + IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, + invalid ? "TRUE" : "FALSE"); + syllable.from = sstart; + syllable.length = send-sstart; + syllable.glyphs = item->glyphs + first_glyph; + syllable.offsets = item->offsets + first_glyph; + syllable.advances = item->advances + first_glyph; + syllable.attributes = item->attributes + first_glyph; + syllable.num_glyphs = item->num_glyphs - first_glyph; + if (!indic_shape_syllable(openType, &syllable, invalid)) { + IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); + item->num_glyphs += syllable.num_glyphs; + return FALSE; + } + item->has_positioning |= syllable.has_positioning; + + // fix logcluster array + IDEBUG("syllable:"); + int i; + for (i = first_glyph; i < first_glyph + syllable.num_glyphs; ++i) + IDEBUG(" %d -> glyph %x", i, item->glyphs[i]); + IDEBUG(" logclusters:"); + for (i = sstart; i < send; ++i) { + IDEBUG(" %d -> glyph %d", i, first_glyph); + logClusters[i-item->from] = first_glyph; + } + sstart = send; + first_glyph += syllable.num_glyphs; + } + item->num_glyphs = first_glyph; + return TRUE; +} + + +static void indic_attributes(int script, const QString &text, int from, int len, QCharAttributes *attributes) +{ + int end = from + len; + const QChar *uc = text.unicode() + from; + attributes += from; + int i = 0; + while (i < len) { + bool invalid; + int boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from; + attributes[i].charStop = TRUE; + + if (boundary > len-1) boundary = len; + i++; + while (i < boundary) { + attributes[i].charStop = FALSE; + ++uc; + ++i; + } + assert(i == boundary); + } + + +} + + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Thai and Lao +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + +#include <qtextcodec.h> +#include <qlibrary.h> + + +static void thaiWordBreaks(const QChar *string, const int len, QCharAttributes *attributes) +{ +#ifndef QT_NO_TEXTCODEC + typedef int (*th_brk_def)(const char*, int[], int); + static QTextCodec *thaiCodec = QTextCodec::codecForMib(2259); + static th_brk_def th_brk = 0; + +#ifndef QT_NO_LIBRARY + /* load libthai dynamically */ + if (!th_brk && thaiCodec) { + th_brk = (th_brk_def)QLibrary::resolve("thai", "th_brk"); + if (!th_brk) + thaiCodec = 0; + } +#endif + + if (!th_brk) + return; + + QCString cstr = thaiCodec->fromUnicode(QConstString(string, len).string()); + + int brp[128]; + int *break_positions = brp; + int numbreaks = th_brk(cstr.data(), break_positions, 128); + if (numbreaks > 128) { + break_positions = new int[numbreaks]; + numbreaks = th_brk(cstr.data(),break_positions, numbreaks); + } + + attributes[0].softBreak = TRUE; + int i; + for (i = 1; i < len; ++i) + attributes[i].softBreak = FALSE; + + for (i = 0; i < numbreaks; ++i) + attributes[break_positions[i]].softBreak = TRUE; + + if (break_positions != brp) + delete [] break_positions; +#endif +} + + +static void thai_attributes( int script, const QString &text, int from, int len, QCharAttributes *attributes ) +{ + Q_UNUSED(script); + Q_ASSERT(script == QFont::Thai); + thaiWordBreaks(text.unicode() + from, len, attributes); +} + + + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Tibetan +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + +// tibetan syllables are of the form: +// head position consonant +// first sub-joined consonant +// ....intermediate sub-joined consonants (if any) +// last sub-joined consonant +// sub-joined vowel (a-chung U+0F71) +// standard or compound vowel sign (or 'virama' for devanagari transliteration) + +enum TibetanForm { + TibetanOther, + TibetanHeadConsonant, + TibetanSubjoinedConsonant, + TibetanSubjoinedVowel, + TibetanVowel +}; + +// this table starts at U+0f40 +static const unsigned char tibetanForm[0x80] = { + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, + TibetanOther, TibetanOther, TibetanOther, TibetanOther, + + TibetanOther, TibetanVowel, TibetanVowel, TibetanVowel, + TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel, + TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel, + TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel, + + TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel, + TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel, + TibetanOther, TibetanOther, TibetanOther, TibetanOther, + TibetanOther, TibetanOther, TibetanOther, TibetanOther, + + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, + TibetanSubjoinedConsonant, TibetanOther, TibetanOther, TibetanOther +}; + + +static inline TibetanForm tibetan_form(const QChar &c) +{ + return (TibetanForm)tibetanForm[c.unicode() - 0x0f40]; +} + +#ifndef QT_NO_XFTFREETYPE +static const QOpenType::Features tibetan_features[] = { + { FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { FT_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty }, + { FT_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty }, + {0, 0} +}; +#endif + +static bool tibetan_shape_syllable(QOpenType *openType, QShaperItem *item, bool invalid) +{ + Q_UNUSED(openType) + int len = item->length; + + if (item->num_glyphs < item->length + 4) { + item->num_glyphs = item->length + 4; + return FALSE; + } + + int i; + QVarLengthArray<unsigned short> reordered(len+4); + + const QChar *str = item->string->unicode() + item->from; + if (invalid) { + *reordered.data() = 0x25cc; + memcpy(reordered.data()+1, str, len*sizeof(QChar)); + len++; + str = (QChar *)reordered.data(); + } + + if (item->font->stringToCMap(str, len, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + + for (i = 0; i < item->length; i++) { + item->attributes[i].mark = FALSE; + item->attributes[i].clusterStart = FALSE; + item->attributes[i].justification = 0; + item->attributes[i].zeroWidth = FALSE; + IDEBUG(" %d: %4x", i, str[i].unicode()); + } + + // now we have the syllable in the right order, and can start running it through open type. + +#ifndef QT_NO_XFTFREETYPE + if (openType && openType->supportsScript(QFont::Tibetan)) { + openType->selectScript(QFont::Tibetan, tibetan_features); + + openType->shape(item); + if (!openType->positionAndAdd(item, FALSE)) + return FALSE; + } +#endif + + item->attributes[0].clusterStart = TRUE; + return TRUE; +} + + +static int tibetan_nextSyllableBoundary(const QString &s, int start, int end, bool *invalid) +{ + const QChar *uc = s.unicode() + start; + + int pos = 0; + TibetanForm state = tibetan_form(*uc); + +// qDebug("state[%d]=%d (uc=%4x)", pos, state, uc[pos].unicode()); + pos++; + + if (state != TibetanHeadConsonant) { + if (state != TibetanOther) + *invalid = TRUE; + goto finish; + } + + while (pos < end - start) { + TibetanForm newState = tibetan_form(uc[pos]); + switch(newState) { + case TibetanSubjoinedConsonant: + case TibetanSubjoinedVowel: + if (state != TibetanHeadConsonant && + state != TibetanSubjoinedConsonant) + goto finish; + state = newState; + break; + case TibetanVowel: + if (state != TibetanHeadConsonant && + state != TibetanSubjoinedConsonant && + state != TibetanSubjoinedVowel) + goto finish; + break; + case TibetanOther: + case TibetanHeadConsonant: + goto finish; + } + pos++; + } + +finish: + *invalid = FALSE; + return start+pos; +} + +static bool tibetan_shape(QShaperItem *item) +{ + Q_ASSERT(item->script == QFont::Tibetan); + +#ifndef QT_NO_XFTFREETYPE + QOpenType *openType = item->font->openType(); + if (openType && !openType->supportsScript(item->script)) + openType = 0; +#else + QOpenType *openType = 0; +#endif + unsigned short *logClusters = item->log_clusters; + + QShaperItem syllable = *item; + int first_glyph = 0; + + int sstart = item->from; + int end = sstart + item->length; + while (sstart < end) { + bool invalid; + int send = tibetan_nextSyllableBoundary(*(item->string), sstart, end, &invalid); + IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, + invalid ? "TRUE" : "FALSE"); + syllable.from = sstart; + syllable.length = send-sstart; + syllable.glyphs = item->glyphs + first_glyph; + syllable.offsets = item->offsets + first_glyph; + syllable.advances = item->advances + first_glyph; + syllable.attributes = item->attributes + first_glyph; + syllable.num_glyphs = item->num_glyphs - first_glyph; + if (!tibetan_shape_syllable(openType, &syllable, invalid)) { + item->num_glyphs += syllable.num_glyphs; + return FALSE; + } + item->has_positioning |= syllable.has_positioning; + + // fix logcluster array + for (int i = sstart; i < send; ++i) + logClusters[i-item->from] = first_glyph; + sstart = send; + first_glyph += syllable.num_glyphs; + } + item->num_glyphs = first_glyph; + return TRUE; +} + +static void tibetan_attributes(int script, const QString &text, int from, int len, QCharAttributes *attributes) +{ + Q_UNUSED(script); + + int end = from + len; + const QChar *uc = text.unicode() + from; + attributes += from; + int i = 0; + while (i < len) { + bool invalid; + int boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from; + + attributes[i].charStop = TRUE; + + if (boundary > len-1) boundary = len; + i++; + while (i < boundary) { + attributes[i].charStop = FALSE; + ++uc; + ++i; + } + assert(i == boundary); + } +} + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Khmer +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + + +// Vocabulary +// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the +// center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels, +// split vowels, signs... but there is only one base in a syllable, it has to be coded as +// the first character of the syllable. +// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant). +// Khmer language has five of them. Khmer split vowels either have one part before the +// base and one after the base or they have a part before the base and a part above the base. +// The first part of all Khmer split vowels is the same character, identical to +// the glyph of Khmer dependent vowel SRA EI +// coeng --> modifier used in Khmer to construct coeng (subscript) consonants +// Differently than indian languages, the coeng modifies the consonant that follows it, +// not the one preceding it Each consonant has two forms, the base form and the subscript form +// the base form is the normal one (using the consonants code-point), the subscript form is +// displayed when the combination coeng + consonant is encountered. +// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant +// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO) +// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA) +// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds +// if it is attached to a consonant of the first series or a consonant of the second series +// Most consonants have an equivalent in the other series, but some of theme exist only in +// one series (for example SA). If we want to use the consonant SA with a vowel sound that +// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant +// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN +// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and +// MUSIKATOAN a second series consonant to have a first series vowel sound. +// Consonant shifter are both normally supercript marks, but, when they are followed by a +// superscript, they change shape and take the form of subscript dependent vowel SRA U. +// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they +// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should +// be placed after the coeng consonant. +// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base +// Each vowel has its own position. Only one vowel per syllable is allowed. +// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are +// Allowed in a syllable. +// +// +// order is important here! This order must be the same that is found in each horizontal +// line in the statetable for Khmer (see khmerStateTable) . +// +enum KhmerCharClassValues { + CC_RESERVED = 0, + CC_CONSONANT = 1, // Consonant of type 1 or independent vowel + CC_CONSONANT2 = 2, // Consonant of type 2 + CC_CONSONANT3 = 3, // Consonant of type 3 + CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C) + CC_CONSONANT_SHIFTER = 5, + CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table + CC_COENG = 7, // Subscript consonant combining character + CC_DEPENDENT_VOWEL = 8, + CC_SIGN_ABOVE = 9, + CC_SIGN_AFTER = 10, + CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character + CC_COUNT = 12 // This is the number of character classes +}; + + +enum KhmerCharClassFlags { + CF_CLASS_MASK = 0x0000FFFF, + + CF_CONSONANT = 0x01000000, // flag to speed up comparing + CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable + CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable + CF_COENG = 0x08000000, // flag to speed up comparing + CF_SHIFTER = 0x10000000, // flag to speed up comparing + CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing + + // position flags + CF_POS_BEFORE = 0x00080000, + CF_POS_BELOW = 0x00040000, + CF_POS_ABOVE = 0x00020000, + CF_POS_AFTER = 0x00010000, + CF_POS_MASK = 0x000f0000 +}; + + +// Characters that get refered to by name +enum KhmerChar { + C_SIGN_ZWNJ = 0x200C, + C_SIGN_ZWJ = 0x200D, + C_DOTTED_CIRCLE = 0x25CC, + C_RO = 0x179A, + C_VOWEL_AA = 0x17B6, + C_SIGN_NIKAHIT = 0x17C6, + C_VOWEL_E = 0x17C1, + C_COENG = 0x17D2 +}; + + +// simple classes, they are used in the statetable (in this file) to control the length of a syllable +// they are also used to know where a character should be placed (location in reference to the base character) +// and also to know if a character, when independently displayed, should be displayed with a dotted-circle to +// indicate error in syllable construction +// +enum { + _xx = CC_RESERVED, + _sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE, + _sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER, + _c1 = CC_CONSONANT | CF_CONSONANT, + _c2 = CC_CONSONANT2 | CF_CONSONANT, + _c3 = CC_CONSONANT3 | CF_CONSONANT, + _rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE, + _cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER, + _dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE, + _db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE, + _da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL, + _dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE, + _co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE, + + // split vowel + _va = _da | CF_SPLIT_VOWEL, + _vr = _dr | CF_SPLIT_VOWEL +}; + + +// Character class: a character class value +// ORed with character class flags. +// +typedef unsigned long KhmerCharClass; + + +// Character class tables +// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs... +// _sa Sign placed above the base +// _sp Sign placed after the base +// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants) +// _c2 Consonant of type 2 (only RO) +// _c3 Consonant of type 3 +// _rb Khmer sign robat u17CC. combining mark for subscript consonants +// _cd Consonant-shifter +// _dl Dependent vowel placed before the base (left of the base) +// _db Dependent vowel placed below the base +// _da Dependent vowel placed above the base +// _dr Dependent vowel placed behind the base (right of the base) +// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following +// it to create a subscript consonant or independent vowel +// _va Khmer split vowel in wich the first part is before the base and the second one above the base +// _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base +// +static const KhmerCharClass khmerCharClasses[] = { + _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, // 1780 - 178F + _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, // 1790 - 179F + _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, // 17A0 - 17AF + _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, // 17B0 - 17BF + _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, // 17C0 - 17CF + _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx // 17D0 - 17DF +}; + +// this enum must reflect the range of khmerCharClasses +enum KhmerCharClassesRange { + KhmerFirstChar = 0x1780, + KhmerLastChar = 0x17df +}; + +// Below we define how a character in the input string is either in the khmerCharClasses table +// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear +// within the syllable, but are not in the table) we also get their type back, or an unknown object +// in which case we get _xx (CC_RESERVED) back +// +static inline KhmerCharClass getKhmerCharClass(const QChar &uc) +{ + if (uc.unicode() == C_SIGN_ZWJ) { + return CC_ZERO_WIDTH_J_MARK; + } + + if (uc.unicode() == C_SIGN_ZWNJ) { + return CC_ZERO_WIDTH_NJ_MARK; + } + + if (uc.unicode() < KhmerFirstChar || uc.unicode() > KhmerLastChar) { + return CC_RESERVED; + } + + return khmerCharClasses[uc.unicode() - KhmerFirstChar]; +} + + +// The stateTable is used to calculate the end (the length) of a well +// formed Khmer Syllable. +// +// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable +// CharClassValues. This coincidence of values allows the follow up of the table. +// +// Each line corresponds to a state, which does not necessarily need to be a type +// of component... for example, state 2 is a base, with is always a first character +// in the syllable, but the state could be produced a consonant of any type when +// it is the first character that is analysed (in ground state). +// +// Differentiating 3 types of consonants is necessary in order to +// forbid the use of certain combinations, such as having a second +// coeng after a coeng RO, +// The inexistent possibility of having a type 3 after another type 3 is permitted, +// eliminating it would very much complicate the table, and it does not create typing +// problems, as the case above. +// +// The table is quite complex, in order to limit the number of coeng consonants +// to 2 (by means of the table). +// +// There a peculiarity, as far as Unicode is concerned: +// - The consonant-shifter is considered in two possible different +// locations, the one considered in Unicode 3.0 and the one considered in +// Unicode 4.0. (there is a backwards compatibility problem in this standard). +// +// +// xx independent character, such as a number, punctuation sign or non-khmer char +// +// c1 Khmer consonant of type 1 or an independent vowel +// that is, a letter in which the subscript for is only under the +// base, not taking any space to the right or to the left +// +// c2 Khmer consonant of type 2, the coeng form takes space under +// and to the left of the base (only RO is of this type) +// +// c3 Khmer consonant of type 3. Its subscript form takes space under +// and to the right of the base. +// +// cs Khmer consonant shifter +// +// rb Khmer robat +// +// co coeng character (u17D2) +// +// dv dependent vowel (including split vowels, they are treated in the same way). +// even if dv is not defined above, the component that is really tested for is +// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels +// +// zwj Zero Width joiner +// +// zwnj Zero width non joiner +// +// sa above sign +// +// sp post sign +// +// there are lines with equal content but for an easier understanding +// (and maybe change in the future) we did not join them +// +static const signed char khmerStateTable[][CC_COUNT] = +{ + // xx c1 c2 c3 zwnj cs rb co dv sa sp zwj + { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, // 0 - ground state + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state (or sign to the right of the syllable) + {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, // 2 - Base consonant + {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, // 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel + {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, // 4 - First register shifter + {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, // 5 - Robat + {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - First Coeng + {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 7 - First consonant of type 1 after coeng + {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, // 8 - First consonant of type 2 after coeng + {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 9 - First consonant or type 3 after ceong + {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 10 - Second Coeng (no register shifter before) + {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 11 - Second coeng consonant (or ind. vowel) no register shifter before + {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, // 12 - Second ZWNJ before a register shifter + {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 13 - Second register shifter + {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 14 - ZWJ before vowel + {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 15 - ZWNJ before vowel + {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, // 16 - dependent vowel + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, // 17 - sign above + {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, // 18 - ZWJ after vowel + {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, // 19 - Third coeng + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, // 20 - dependent vowel after a Robat +}; + + +// #define KHMER_DEBUG +#ifdef KHMER_DEBUG +#define KHDEBUG qDebug +#else +#define KHDEBUG if(0) qDebug +#endif + +// Given an input string of characters and a location in which to start looking +// calculate, using the state table, which one is the last character of the syllable +// that starts in the starting position. +// +static inline int khmer_nextSyllableBoundary(const QString &s, int start, int end, bool *invalid) +{ + *invalid = FALSE; + const QChar *uc = s.unicode() + start; + int state = 0; + int pos = start; + + while (pos < end) { + KhmerCharClass charClass = getKhmerCharClass(*uc); + if (pos == start) { + *invalid = (charClass > 0) && ! (charClass & CF_CONSONANT); + } + state = khmerStateTable[state][charClass & CF_CLASS_MASK]; + + KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state, + charClass, uc->unicode() ); + + if (state < 0) { + break; + } + ++uc; + ++pos; + } + return pos; +} + + +#ifndef QT_NO_XFTFREETYPE +static const QOpenType::Features khmer_features[] = { + { FT_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty }, + { FT_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty }, + { FT_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty }, + { FT_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty }, + { FT_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty }, + { FT_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty }, + { FT_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty }, + { FT_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty }, + { FT_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty }, + { 0, 0 } +}; +#endif + + +static bool khmer_shape_syllable(QOpenType *openType, QShaperItem *item) +{ +#ifndef QT_NO_XFTFREETYPE + if (openType) + openType->selectScript(QFont::Khmer, khmer_features); +#endif + // according to the specs this is the max length one can get + // ### the real value should be smaller + assert(item->length < 13); + + KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length, + item->string->mid(item->from, item->length).utf8().data()); + + int len = 0; + int syllableEnd = item->from + item->length; + unsigned short reordered[16]; + unsigned char properties[16]; + enum { + AboveForm = 0x01, + PreForm = 0x02, + PostForm = 0x04, + BelowForm = 0x08 + }; + memset(properties, 0, 16*sizeof(unsigned char)); + +#ifdef KHMER_DEBUG + qDebug("original:"); + for (int i = from; i < syllableEnd; i++) { + qDebug(" %d: %4x", i, string[i].unicode()); + } +#endif + + // write a pre vowel or the pre part of a split vowel first + // and look out for coeng + ro. RO is the only vowel of type 2, and + // therefore the only one that requires saving space before the base. + // + int coengRo = -1; // There is no Coeng Ro, if found this value will change + int i; + for (i = item->from; i < syllableEnd; i += 1) { + KhmerCharClass charClass = getKhmerCharClass(item->string->at(i)); + + // if a split vowel, write the pre part. In Khmer the pre part + // is the same for all split vowels, same glyph as pre vowel C_VOWEL_E + if (charClass & CF_SPLIT_VOWEL) { + reordered[len] = C_VOWEL_E; + properties[len] = PreForm; + ++len; + break; // there can be only one vowel + } + // if a vowel with pos before write it out + if (charClass & CF_POS_BEFORE) { + reordered[len] = item->string->at(i).unicode(); + properties[len] = PreForm; + ++len; + break; // there can be only one vowel + } + // look for coeng + ro and remember position + // works because coeng + ro is always in front of a vowel (if there is a vowel) + // and because CC_CONSONANT2 is enough to identify it, as it is the only consonant + // with this flag + if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) && + ( (getKhmerCharClass(item->string->at(i+1)) & CF_CLASS_MASK) == CC_CONSONANT2) ) { + coengRo = i; + } + } + + // write coeng + ro if found + if (coengRo > -1) { + reordered[len] = C_COENG; + properties[len] = PreForm; + ++len; + reordered[len] = C_RO; + properties[len] = PreForm; + ++len; + } + + // shall we add a dotted circle? + // If in the position in which the base should be (first char in the string) there is + // a character that has the Dotted circle flag (a character that cannot be a base) + // then write a dotted circle + if (getKhmerCharClass(item->string->at(item->from)) & CF_DOTTED_CIRCLE) { + reordered[len] = C_DOTTED_CIRCLE; + ++len; + } + + // copy what is left to the output, skipping before vowels and + // coeng Ro if they are present + for (i = item->from; i < syllableEnd; i += 1) { + QChar uc = item->string->at(i); + KhmerCharClass charClass = getKhmerCharClass(uc); + + // skip a before vowel, it was already processed + if (charClass & CF_POS_BEFORE) { + continue; + } + + // skip coeng + ro, it was already processed + if (i == coengRo) { + i += 1; + continue; + } + + switch (charClass & CF_POS_MASK) + { + case CF_POS_ABOVE : + reordered[len] = uc.unicode(); + properties[len] = AboveForm; + ++len; + break; + + case CF_POS_AFTER : + reordered[len] = uc.unicode(); + properties[len] = PostForm; + ++len; + break; + + case CF_POS_BELOW : + reordered[len] = uc.unicode(); + properties[len] = BelowForm; + ++len; + break; + + default: + // assign the correct flags to a coeng consonant + // Consonants of type 3 are taged as Post forms and those type 1 as below forms + if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) { + unsigned char property = (getKhmerCharClass(item->string->at(i+1)) & CF_CLASS_MASK) == CC_CONSONANT3 ? + PostForm : BelowForm; + reordered[len] = uc.unicode(); + properties[len] = property; + ++len; + i += 1; + reordered[len] = item->string->at(i).unicode(); + properties[len] = property; + ++len; + break; + } + + // if a shifter is followed by an above vowel change the shifter to below form, + // an above vowel can have two possible positions i + 1 or i + 3 + // (position i+1 corresponds to unicode 3, position i+3 to Unicode 4) + // and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two + // different positions, right after the shifter or after a vowel (Unicode 4) + if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) { + if (getKhmerCharClass(item->string->at(i+1)) & CF_ABOVE_VOWEL ) { + reordered[len] = uc.unicode(); + properties[len] = BelowForm; + ++len; + break; + } + if (i + 2 < syllableEnd && + (item->string->at(i+1).unicode() == C_VOWEL_AA) && + (item->string->at(i+2).unicode() == C_SIGN_NIKAHIT) ) + { + reordered[len] = uc.unicode(); + properties[len] = BelowForm; + ++len; + break; + } + if (i + 3 < syllableEnd && (getKhmerCharClass(item->string->at(i+3)) & CF_ABOVE_VOWEL) ) { + reordered[len] = uc.unicode(); + properties[len] = BelowForm; + ++len; + break; + } + if (i + 4 < syllableEnd && + (item->string->at(i+3).unicode() == C_VOWEL_AA) && + (item->string->at(i+4).unicode() == C_SIGN_NIKAHIT) ) + { + reordered[len] = uc.unicode(); + properties[len] = BelowForm; + ++len; + break; + } + } + + // default - any other characters + reordered[len] = uc.unicode(); + ++len; + break; + } // switch + } // for + + if (item->font->stringToCMap((const QChar *)reordered, len, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + + KHDEBUG("after shaping: len=%d", len); + for (i = 0; i < len; i++) { + item->attributes[i].mark = FALSE; + item->attributes[i].clusterStart = FALSE; + item->attributes[i].justification = 0; + item->attributes[i].zeroWidth = FALSE; + KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]); + } + + // now we have the syllable in the right order, and can start running it through open type. + +#ifndef QT_NO_XFTFREETYPE + if (openType) { + unsigned short logClusters[16]; + for (int i = 0; i < len; ++i) + logClusters[i] = i; + + uint where[16]; + + for (int i = 0; i < len; ++i) { + where[i] = ~(PreSubstProperty + | BelowSubstProperty + | AboveSubstProperty + | PostSubstProperty + | CligProperty + | PositioningProperties); + if (properties[i] == PreForm) + where[i] &= ~PreFormProperty; + else if (properties[i] == BelowForm) + where[i] &= ~BelowFormProperty; + else if (properties[i] == AboveForm) + where[i] &= ~AboveFormProperty; + else if (properties[i] == PostForm) + where[i] &= ~PostFormProperty; + } + + openType->shape(item, where); + if (!openType->positionAndAdd(item, FALSE)) + return FALSE; + } else +#endif + { + KHDEBUG("Not using openType"); + Q_UNUSED(openType); + } + + item->attributes[0].clusterStart = TRUE; + return TRUE; +} + +static bool khmer_shape(QShaperItem *item) +{ + assert(item->script == QFont::Khmer); + +#ifndef QT_NO_XFTFREETYPE + QOpenType *openType = item->font->openType(); + if (openType && !openType->supportsScript(item->script)) + openType = 0; +#else + QOpenType *openType = 0; +#endif + unsigned short *logClusters = item->log_clusters; + + QShaperItem syllable = *item; + int first_glyph = 0; + + int sstart = item->from; + int end = sstart + item->length; + KHDEBUG("khmer_shape: from %d length %d", item->from, item->length); + while (sstart < end) { + bool invalid; + int send = khmer_nextSyllableBoundary(*item->string, sstart, end, &invalid); + KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, + invalid ? "TRUE" : "FALSE"); + syllable.from = sstart; + syllable.length = send-sstart; + syllable.glyphs = item->glyphs + first_glyph; + syllable.offsets = item->offsets + first_glyph; + syllable.advances = item->advances + first_glyph; + syllable.attributes = item->attributes + first_glyph; + syllable.num_glyphs = item->num_glyphs - first_glyph; + if (!khmer_shape_syllable(openType, &syllable)) { + KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); + item->num_glyphs += syllable.num_glyphs; + return FALSE; + } + item->has_positioning |= syllable.has_positioning; + + // fix logcluster array + KHDEBUG("syllable:"); + int i; + for (i = first_glyph; i < first_glyph + syllable.num_glyphs; ++i) + KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]); + KHDEBUG(" logclusters:"); + for (i = sstart; i < send; ++i) { + KHDEBUG(" %d -> glyph %d", i, first_glyph); + logClusters[i-item->from] = first_glyph; + } + sstart = send; + first_glyph += syllable.num_glyphs; + } + item->num_glyphs = first_glyph; + return TRUE; +} + +static void khmer_attributes( int script, const QString &text, int from, int len, QCharAttributes *attributes ) +{ + Q_UNUSED(script); + + int end = from + len; + const QChar *uc = text.unicode() + from; + attributes += from; + int i = 0; + while ( i < len ) { + bool invalid; + int boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from; + + attributes[i].charStop = TRUE; + + if ( boundary > len-1 ) boundary = len; + i++; + while ( i < boundary ) { + attributes[i].charStop = FALSE; + ++uc; + ++i; + } + assert( i == boundary ); + } +} + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Myanmar +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + +enum MymrCharClassValues +{ + Mymr_CC_RESERVED = 0, + Mymr_CC_CONSONANT = 1, /* Consonant of type 1, that has subscript form */ + Mymr_CC_CONSONANT2 = 2, /* Consonant of type 2, that has no subscript form */ + Mymr_CC_NGA = 3, /* Consonant NGA */ + Mymr_CC_YA = 4, /* Consonant YA */ + Mymr_CC_RA = 5, /* Consonant RA */ + Mymr_CC_WA = 6, /* Consonant WA */ + Mymr_CC_HA = 7, /* Consonant HA */ + Mymr_CC_IND_VOWEL = 8, /* Independent vowel */ + Mymr_CC_ZERO_WIDTH_NJ_MARK = 9, /* Zero Width non joiner character (0x200C) */ + Mymr_CC_VIRAMA = 10, /* Subscript consonant combining character */ + Mymr_CC_PRE_VOWEL = 11, /* Dependent vowel, prebase (Vowel e) */ + Mymr_CC_BELOW_VOWEL = 12, /* Dependent vowel, prebase (Vowel u, uu) */ + Mymr_CC_ABOVE_VOWEL = 13, /* Dependent vowel, prebase (Vowel i, ii, ai) */ + Mymr_CC_POST_VOWEL = 14, /* Dependent vowel, prebase (Vowel aa) */ + Mymr_CC_SIGN_ABOVE = 15, + Mymr_CC_SIGN_BELOW = 16, + Mymr_CC_SIGN_AFTER = 17, + Mymr_CC_ZERO_WIDTH_J_MARK = 18, /* Zero width joiner character */ + Mymr_CC_COUNT = 19 /* This is the number of character classes */ +}; + +enum MymrCharClassFlags +{ + Mymr_CF_CLASS_MASK = 0x0000FFFF, + + Mymr_CF_CONSONANT = 0x01000000, /* flag to speed up comparing */ + Mymr_CF_MEDIAL = 0x02000000, /* flag to speed up comparing */ + Mymr_CF_IND_VOWEL = 0x04000000, /* flag to speed up comparing */ + Mymr_CF_DEP_VOWEL = 0x08000000, /* flag to speed up comparing */ + Mymr_CF_DOTTED_CIRCLE = 0x10000000, /* add a dotted circle if a character with this flag is the first in a syllable */ + Mymr_CF_VIRAMA = 0x20000000, /* flag to speed up comparing */ + + /* position flags */ + Mymr_CF_POS_BEFORE = 0x00080000, + Mymr_CF_POS_BELOW = 0x00040000, + Mymr_CF_POS_ABOVE = 0x00020000, + Mymr_CF_POS_AFTER = 0x00010000, + Mymr_CF_POS_MASK = 0x000f0000, + + Mymr_CF_AFTER_KINZI = 0x00100000 +}; + +/* Characters that get refrered to by name */ +enum MymrChar +{ + Mymr_C_SIGN_ZWNJ = 0x200C, + Mymr_C_SIGN_ZWJ = 0x200D, + Mymr_C_DOTTED_CIRCLE = 0x25CC, + Mymr_C_RA = 0x101B, + Mymr_C_YA = 0x101A, + Mymr_C_NGA = 0x1004, + Mymr_C_VOWEL_E = 0x1031, + Mymr_C_VIRAMA = 0x1039 +}; + +enum +{ + Mymr_xx = Mymr_CC_RESERVED, + Mymr_c1 = Mymr_CC_CONSONANT | Mymr_CF_CONSONANT | Mymr_CF_POS_BELOW, + Mymr_c2 = Mymr_CC_CONSONANT2 | Mymr_CF_CONSONANT, + Mymr_ng = Mymr_CC_NGA | Mymr_CF_CONSONANT | Mymr_CF_POS_ABOVE, + Mymr_ya = Mymr_CC_YA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_AFTER | Mymr_CF_AFTER_KINZI, + Mymr_ra = Mymr_CC_RA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BEFORE, + Mymr_wa = Mymr_CC_WA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW, + Mymr_ha = Mymr_CC_HA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW, + Mymr_id = Mymr_CC_IND_VOWEL | Mymr_CF_IND_VOWEL, + Mymr_vi = Mymr_CC_VIRAMA | Mymr_CF_VIRAMA | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE, + Mymr_dl = Mymr_CC_PRE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BEFORE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI, + Mymr_db = Mymr_CC_BELOW_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI, + Mymr_da = Mymr_CC_ABOVE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI, + Mymr_dr = Mymr_CC_POST_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI, + Mymr_sa = Mymr_CC_SIGN_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_ABOVE | Mymr_CF_AFTER_KINZI, + Mymr_sb = Mymr_CC_SIGN_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_BELOW | Mymr_CF_AFTER_KINZI, + Mymr_sp = Mymr_CC_SIGN_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI +}; + + +typedef int MymrCharClass; + + +static const MymrCharClass mymrCharClasses[] = +{ + Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_ng, Mymr_c1, Mymr_c1, Mymr_c1, + Mymr_c1, Mymr_c1, Mymr_c2, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, /* 1000 - 100F */ + Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, + Mymr_c1, Mymr_c1, Mymr_ya, Mymr_ra, Mymr_c1, Mymr_wa, Mymr_c1, Mymr_ha, /* 1010 - 101F */ + Mymr_c2, Mymr_c2, Mymr_xx, Mymr_id, Mymr_id, Mymr_id, Mymr_id, Mymr_id, + Mymr_xx, Mymr_id, Mymr_id, Mymr_xx, Mymr_dr, Mymr_da, Mymr_da, Mymr_db, /* 1020 - 102F */ + Mymr_db, Mymr_dl, Mymr_da, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_sa, Mymr_sb, + Mymr_sp, Mymr_vi, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1030 - 103F */ + Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, + Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1040 - 104F */ + Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, + Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1050 - 105F */ +}; + +static MymrCharClass +getMyanmarCharClass (const QChar &ch) +{ + if (ch.unicode() == Mymr_C_SIGN_ZWJ) + return Mymr_CC_ZERO_WIDTH_J_MARK; + + if (ch.unicode() == Mymr_C_SIGN_ZWNJ) + return Mymr_CC_ZERO_WIDTH_NJ_MARK; + + if (ch.unicode() < 0x1000 || ch.unicode() > 0x105f) + return Mymr_CC_RESERVED; + + return mymrCharClasses[ch.unicode() - 0x1000]; +} + +static const signed char mymrStateTable[][Mymr_CC_COUNT] = +{ +// xx c1, c2 ng ya ra wa ha id zwnj vi dl db da dr sa sb sp zwj + { 1, 4, 4, 2, 4, 4, 4, 4, 24, 1, 27, 17, 18, 19, 20, 21, 1, 1, 4}, // 0 - ground state + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state (or sp to the right of the syllable) + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 17, 18, 19, 20, 21, -1, -1, 4}, // 2 - NGA + {-1, 4, 4, 4, 4, 4, 4, 4, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 3 - Virama after NGA + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, 17, 18, 19, 20, 21, 1, 1, -1}, // 4 - Base consonant + {-2, 6, -2, -2, 7, 8, 9, 10, -2, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 5 - First virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 25, 17, 18, 19, 20, 21, -1, -1, -1}, // 6 - c1 after virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, // 7 - ya after virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, // 8 - ra after virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, // 9 - wa after virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, // 10 - ha after virama + {-1, -1, -1, -1, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 11 - Virama after NGA+zwj + {-2, -2, -2, -2, -2, -2, 13, 14, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 12 - Second virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 18, 19, 20, 21, -1, -1, -1}, // 13 - wa after virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, // 14 - ha after virama + {-2, -2, -2, -2, -2, -2, -2, 16, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 15 - Third virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, // 16 - ha after virama + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 20, 21, 1, 1, -1}, // 17 - dl, Dependent vowel e + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, -1, 21, 1, 1, -1}, // 18 - db, Dependent vowel u,uu + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, -1}, // 19 - da, Dependent vowel i,ii,ai + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, 1, 1, -1}, // 20 - dr, Dependent vowel aa + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1}, // 21 - sa, Sign anusvara + {-1, -1, -1, -1, -1, -1, -1, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 22 - atha + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1}, // 23 - zwnj for atha + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, // 24 - Independent vowel + {-2, -2, -2, -2, 26, 26, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 25 - Virama after subscript consonant + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, 1, -1}, // 26 - ra/ya after subscript consonant + virama + {-1, 6, -1, -1, 7, 8, 9, 10, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 27 - Virama after ground state +// exit state -2 is for invalid order of medials and combination of invalids +// with virama where virama should treat as start of next syllable +}; + + + +// #define MYANMAR_DEBUG +#ifdef MYANMAR_DEBUG +#define MMDEBUG qDebug +#else +#define MMDEBUG if(0) qDebug +#endif + +// Given an input string of characters and a location in which to start looking +// calculate, using the state table, which one is the last character of the syllable +// that starts in the starting position. +// +static inline int myanmar_nextSyllableBoundary(const QString &s, int start, int end, bool *invalid) +{ + *invalid = FALSE; + const QChar *uc = s.unicode() + start; + int state = 0; + int pos = start; + + while (pos < end) { + MymrCharClass charClass = getMyanmarCharClass(*uc); + state = mymrStateTable[state][charClass & Mymr_CF_CLASS_MASK]; + if (pos == start) + *invalid = charClass & Mymr_CF_DOTTED_CIRCLE; + + MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", pos - start, state, charClass, uc->unicode() ); + + if (state < 0) { + if (state < -1) + --pos; + break; + } + ++uc; + ++pos; + } + return pos; +} + + +#ifndef QT_NO_XFTFREETYPE +// ###### might have to change order of above and below forms and substitutions, +// but according to Unicode below comes before above +static const QOpenType::Features myanmar_features[] = { + { FT_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty }, + { FT_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty }, + { FT_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty }, + { FT_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty }, + { FT_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty }, + { FT_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty }, + { FT_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty }, + { FT_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty }, + { FT_MAKE_TAG( 'r', 'l', 'i', 'g' ), CligProperty }, // Myanmar1 uses this instead of the other features + { 0, 0 } +}; +#endif + + +// Visual order before shaping should be: +// +// [Vowel Mark E] +// [Virama + Medial Ra] +// [Base] +// [Virama + Consonant] +// [Nga + Virama] (Kinzi) ### should probably come before post forms (medial ya) +// [Vowels] +// [Marks] +// +// This means that we can keep the logical order apart from having to +// move the pre vowel, medial ra and kinzi + +static bool myanmar_shape_syllable(QOpenType *openType, QShaperItem *item, bool invalid) +{ +#ifndef QT_NO_XFTFREETYPE + if (openType) + openType->selectScript(QFont::Myanmar, myanmar_features); +#endif + // according to the table the max length of a syllable should be around 14 chars + assert(item->length < 32); + + MMDEBUG("\nsyllable from %d len %d, str='%s'", item->from, item->length, + item->string->mid(item->from, item->length).utf8().data()); + + const QChar *uc = item->string->unicode() + item->from; +#ifdef MYANMAR_DEBUG + qDebug("original:"); + for (int i = 0; i < item->length; i++) { + qDebug(" %d: %4x", i, uc[i].unicode()); + } +#endif + int vowel_e = -1; + int kinzi = -1; + int medial_ra = -1; + int base = -1; + int i; + for (i = 0; i < item->length; ++i) { + ushort chr = uc[i].unicode(); + + if (chr == Mymr_C_VOWEL_E) { + vowel_e = i; + continue; + } + if (i == 0 + && chr == Mymr_C_NGA + && i + 2 < item->length + && uc[i+1].unicode() == Mymr_C_VIRAMA) { + int mc = getMyanmarCharClass(uc[i+2]); + //MMDEBUG("maybe kinzi: mc=%x", mc); + if ((mc & Mymr_CF_CONSONANT) == Mymr_CF_CONSONANT) { + kinzi = i; + continue; + } + } + if (base >= 0 + && chr == Mymr_C_VIRAMA + && i + 1 < item->length + && uc[i+1].unicode() == Mymr_C_RA) { + medial_ra = i; + continue; + } + if (base < 0) + base = i; + } + + MMDEBUG("\n base=%d, vowel_e=%d, kinzi=%d, medial_ra=%d", base, vowel_e, kinzi, medial_ra); + int len = 0; + unsigned short reordered[32]; + unsigned char properties[32]; + enum { + AboveForm = 0x01, + PreForm = 0x02, + PostForm = 0x04, + BelowForm = 0x08 + }; + memset(properties, 0, 32*sizeof(unsigned char)); + + // write vowel_e if found + if (vowel_e >= 0) { + reordered[0] = Mymr_C_VOWEL_E; + len = 1; + } + // write medial_ra + if (medial_ra >= 0) { + reordered[len] = Mymr_C_VIRAMA; + reordered[len+1] = Mymr_C_RA; + properties[len] = PreForm; + properties[len+1] = PreForm; + len += 2; + } + + // shall we add a dotted circle? + // If in the position in which the base should be (first char in the string) there is + // a character that has the Dotted circle flag (a character that cannot be a base) + // then write a dotted circle + if (invalid) { + reordered[len] = C_DOTTED_CIRCLE; + ++len; + } + + bool lastWasVirama = FALSE; + int basePos = -1; + // copy the rest of the syllable to the output, inserting the kinzi + // at the correct place + for (i = 0; i < item->length; ++i) { + if (i == vowel_e) + continue; + if (i == medial_ra || i == kinzi) { + ++i; + continue; + } + + ushort chr = uc[i].unicode(); + MymrCharClass cc = getMyanmarCharClass(uc[i]); + if (kinzi >= 0 && i > base && (cc & Mymr_CF_AFTER_KINZI)) { + reordered[len] = Mymr_C_NGA; + reordered[len+1] = Mymr_C_VIRAMA; + properties[len-1] = AboveForm; + properties[len] = AboveForm; + len += 2; + kinzi = -1; + } + + if (lastWasVirama) { + int prop = 0; + switch(cc & Mymr_CF_POS_MASK) { + case Mymr_CF_POS_BEFORE: + prop = PreForm; + break; + case Mymr_CF_POS_BELOW: + prop = BelowForm; + break; + case Mymr_CF_POS_ABOVE: + prop = AboveForm; + break; + case Mymr_CF_POS_AFTER: + prop = PostForm; + break; + default: + break; + } + properties[len-1] = prop; + properties[len] = prop; + if(basePos >= 0 && basePos == len-2) + properties[len-2] = prop; + } + lastWasVirama = (chr == Mymr_C_VIRAMA); + if(i == base) + basePos = len; + + if ((chr != Mymr_C_SIGN_ZWNJ && chr != Mymr_C_SIGN_ZWJ) || !len) { + reordered[len] = chr; + ++len; + } + } + if (kinzi >= 0) { + reordered[len] = Mymr_C_NGA; + reordered[len+1] = Mymr_C_VIRAMA; + properties[len] = AboveForm; + properties[len+1] = AboveForm; + len += 2; + } + + if (item->font->stringToCMap((const QChar *)reordered, len, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + + MMDEBUG("after shaping: len=%d", len); + for (i = 0; i < len; i++) { + item->attributes[i].mark = FALSE; + item->attributes[i].clusterStart = FALSE; + item->attributes[i].justification = 0; + item->attributes[i].zeroWidth = FALSE; + MMDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]); + } + + // now we have the syllable in the right order, and can start running it through open type. + +#ifndef QT_NO_XFTFREETYPE + if (openType) { + unsigned short logClusters[32]; + for (int i = 0; i < len; ++i) + logClusters[i] = i; + + uint where[32]; + + for (int i = 0; i < len; ++i) { + where[i] = ~(PreSubstProperty + | BelowSubstProperty + | AboveSubstProperty + | PostSubstProperty + | CligProperty + | PositioningProperties); + if (properties[i] == PreForm) + where[i] &= ~PreFormProperty; + else if (properties[i] == BelowForm) + where[i] &= ~BelowFormProperty; + else if (properties[i] == AboveForm) + where[i] &= ~AboveFormProperty; + else if (properties[i] == PostForm) + where[i] &= ~PostFormProperty; + } + + openType->shape(item, where); + if (!openType->positionAndAdd(item, FALSE)) + return FALSE; + } else +#endif + { + MMDEBUG("Not using openType"); + Q_UNUSED(openType); + } + + item->attributes[0].clusterStart = TRUE; + return TRUE; +} + +static bool myanmar_shape(QShaperItem *item) +{ + assert(item->script == QFont::Myanmar); + +#ifndef QT_NO_XFTFREETYPE + QOpenType *openType = item->font->openType(); + if (openType && !openType->supportsScript(item->script)) + openType = 0; +#else + QOpenType *openType = 0; +#endif + unsigned short *logClusters = item->log_clusters; + + QShaperItem syllable = *item; + int first_glyph = 0; + + int sstart = item->from; + int end = sstart + item->length; + MMDEBUG("myanmar_shape: from %d length %d", item->from, item->length); + while (sstart < end) { + bool invalid; + int send = myanmar_nextSyllableBoundary(*item->string, sstart, end, &invalid); + MMDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart, + invalid ? "TRUE" : "FALSE"); + syllable.from = sstart; + syllable.length = send-sstart; + syllable.glyphs = item->glyphs + first_glyph; + syllable.offsets = item->offsets + first_glyph; + syllable.advances = item->advances + first_glyph; + syllable.attributes = item->attributes + first_glyph; + syllable.num_glyphs = item->num_glyphs - first_glyph; + if (!myanmar_shape_syllable(openType, &syllable, invalid)) { + MMDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs); + item->num_glyphs += syllable.num_glyphs; + return FALSE; + } + item->has_positioning |= syllable.has_positioning; + + // fix logcluster array + MMDEBUG("syllable:"); + int i; + for (i = first_glyph; i < first_glyph + syllable.num_glyphs; ++i) + MMDEBUG(" %d -> glyph %x", i, item->glyphs[i]); + MMDEBUG(" logclusters:"); + for (i = sstart; i < send; ++i) { + MMDEBUG(" %d -> glyph %d", i, first_glyph); + logClusters[i-item->from] = first_glyph; + } + sstart = send; + first_glyph += syllable.num_glyphs; + } + item->num_glyphs = first_glyph; + return TRUE; +} + +static void myanmar_attributes( int script, const QString &text, int from, int len, QCharAttributes *attributes ) +{ + Q_UNUSED(script); + + int end = from + len; + const QChar *uc = text.unicode() + from; + attributes += from; + int i = 0; + while ( i < len ) { + bool invalid; + int boundary = myanmar_nextSyllableBoundary( text, from+i, end, &invalid ) - from; + + attributes[i].charStop = TRUE; + attributes[i].softBreak = TRUE; + + if ( boundary > len-1 ) boundary = len; + i++; + while ( i < boundary ) { + attributes[i].charStop = FALSE; + attributes[i].softBreak = FALSE; + ++uc; + ++i; + } + assert( i == boundary ); + } +} + +// -------------------------------------------------------------------------------------------------------------------------------------------- +// +// Hangul +// +// -------------------------------------------------------------------------------------------------------------------------------------------- + +// Hangul is a syllable based script. Unicode reserves a large range +// for precomposed hangul, where syllables are already precomposed to +// their final glyph shape. In addition, a so called jamo range is +// defined, that can be used to express old Hangul. Modern hangul +// syllables can also be expressed as jamo, and should be composed +// into syllables. The operation is rather simple and mathematical. + +// Every hangul jamo is classified as being either a Leading consonant +// (L), and intermediat Vowel (V) or a trailing consonant (T). Modern +// hangul syllables (the ones in the precomposed area can be of type +// LV or LVT. +// +// Syllable breaks do _not_ occur between: +// +// L L, V or precomposed +// V, LV V, T +// LVT, T T +// +// A standard syllable is of the form L+V+T*. The above rules allow +// nonstandard syllables L*V*T*. To transform them into standard +// syllables fill characers L_f and V_f can be inserted. + +enum { + Hangul_SBase = 0xac00, + Hangul_LBase = 0x1100, + Hangul_VBase = 0x1161, + Hangul_TBase = 0x11a7, + Hangul_SCount = 11172, + Hangul_LCount = 19, + Hangul_VCount = 21, + Hangul_TCount = 28, + Hangul_NCount = 21*28 +}; + +static inline bool hangul_isPrecomposed(unsigned short uc) { + return (uc >= Hangul_SBase && uc < Hangul_SBase + Hangul_SCount); +} + +static inline bool hangul_isLV(unsigned short uc) { + return ((uc - Hangul_SBase) % Hangul_TCount == 0); +} + +enum HangulType { + L, + V, + T, + LV, + LVT, + X +}; + +static inline HangulType hangul_type(unsigned short uc) { + if (uc > Hangul_SBase && uc < Hangul_SBase + Hangul_SCount) + return hangul_isLV(uc) ? LV : LVT; + if (uc < Hangul_LBase || uc > 0x11ff) + return X; + if (uc < Hangul_VBase) + return L; + if (uc < Hangul_TBase) + return V; + return T; +} + +static int hangul_nextSyllableBoundary(const QString &s, int start, int end) +{ + const QChar *uc = s.unicode() + start; + + HangulType state = hangul_type(uc->unicode()); + int pos = 1; + + while (pos < end - start) { + HangulType newState = hangul_type(uc[pos].unicode()); + switch(newState) { + case X: + goto finish; + case L: + case V: + case T: + if (state > newState) + goto finish; + state = newState; + break; + case LV: + if (state > L) + goto finish; + state = V; + break; + case LVT: + if (state > L) + goto finish; + state = T; + } + ++pos; + } + + finish: + return start+pos; +} + +#ifndef QT_NO_XFTFREETYPE +static const QOpenType::Features hangul_features [] = { + { FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty }, + { FT_MAKE_TAG('l', 'j', 'm', 'o'), CcmpProperty }, + { FT_MAKE_TAG('j', 'j', 'm', 'o'), CcmpProperty }, + { FT_MAKE_TAG('t', 'j', 'm', 'o'), CcmpProperty }, + { 0, 0 } +}; +#endif + +static bool hangul_shape_syllable(QOpenType *openType, QShaperItem *item) +{ + Q_UNUSED(openType) + const QChar *ch = item->string->unicode() + item->from; + + int i; + unsigned short composed = 0; + // see if we can compose the syllable into a modern hangul + if (item->length == 2) { + int LIndex = ch[0].unicode() - Hangul_LBase; + int VIndex = ch[1].unicode() - Hangul_VBase; + if (LIndex >= 0 && LIndex < Hangul_LCount && + VIndex >= 0 && VIndex < Hangul_VCount) + composed = (LIndex * Hangul_VCount + VIndex) * Hangul_TCount + Hangul_SBase; + } else if (item->length == 3) { + int LIndex = ch[0].unicode() - Hangul_LBase; + int VIndex = ch[1].unicode() - Hangul_VBase; + int TIndex = ch[2].unicode() - Hangul_TBase; + if (LIndex >= 0 && LIndex < Hangul_LCount && + VIndex >= 0 && VIndex < Hangul_VCount && + TIndex >= 0 && TIndex < Hangul_TCount) + composed = (LIndex * Hangul_VCount + VIndex) * Hangul_TCount + TIndex + Hangul_SBase; + } + + + int len = item->length; + QChar c(composed); + + // ### icc says 'chars' is unused + // const QChar *chars = ch; + + // if we have a modern hangul use the composed form + if (composed) { + // chars = &c; + len = 1; + } + + if (item->font->stringToCMap(ch, len, item->glyphs, item->advances, + &item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError) + return FALSE; + for (i = 0; i < len; i++) { + item->attributes[i].mark = FALSE; + item->attributes[i].clusterStart = FALSE; + item->attributes[i].justification = 0; + item->attributes[i].zeroWidth = FALSE; + IDEBUG(" %d: %4x", i, ch[i].unicode()); + } + +#ifndef QT_NO_XFTFREETYPE + if (openType && !composed) { + + QVarLengthArray<unsigned short> logClusters(len); + for (i = 0; i < len; ++i) + logClusters[i] = i; + item->log_clusters = logClusters.data(); + + openType->shape(item); + if (!openType->positionAndAdd(item, FALSE)) + return FALSE; + + } +#endif + + item->attributes[0].clusterStart = TRUE; + return TRUE; +} + +static bool hangul_shape(QShaperItem *item) +{ + Q_ASSERT(item->script == QFont::Hangul); + + const QChar *uc = item->string->unicode() + item->from; + + bool allPrecomposed = TRUE; + for (int i = 0; i < item->length; ++i) { + if (!hangul_isPrecomposed(uc[i].unicode())) { + allPrecomposed = FALSE; + break; + } + } + + if (!allPrecomposed) { +#ifndef QT_NO_XFTFREETYPE + QOpenType *openType = item->font->openType(); + if (openType && !openType->supportsScript(item->script)) + openType = 0; + if (openType) + openType->selectScript(QFont::Hangul, hangul_features); +#else + QOpenType *openType = 0; +#endif + + unsigned short *logClusters = item->log_clusters; + + QShaperItem syllable = *item; + int first_glyph = 0; + + int sstart = item->from; + int end = sstart + item->length; + while (sstart < end) { + int send = hangul_nextSyllableBoundary(*(item->string), sstart, end); + + syllable.from = sstart; + syllable.length = send-sstart; + syllable.glyphs = item->glyphs + first_glyph; + syllable.offsets = item->offsets + first_glyph; + syllable.advances = item->advances + first_glyph; + syllable.attributes = item->attributes + first_glyph; + syllable.num_glyphs = item->num_glyphs - first_glyph; + if (!hangul_shape_syllable(openType, &syllable)) { + item->num_glyphs += syllable.num_glyphs; + return FALSE; + } + item->has_positioning |= syllable.has_positioning; + // fix logcluster array + for (int i = sstart; i < send; ++i) + logClusters[i-item->from] = first_glyph; + sstart = send; + first_glyph += syllable.num_glyphs; + } + item->num_glyphs = first_glyph; + return TRUE; + } + + return basic_shape(item); +} + +static void hangul_attributes(int script, const QString &text, int from, int len, QCharAttributes *attributes) +{ + Q_UNUSED(script); + + int end = from + len; + const QChar *uc = text.unicode() + from; + attributes += from; + int i = 0; + while (i < len) { + int boundary = hangul_nextSyllableBoundary(text, from+i, end) - from; + + attributes[i].charStop = TRUE; + + if (boundary > len-1) boundary = len; + i++; + while (i < boundary) { + attributes[i].charStop = FALSE; + ++uc; + ++i; + } + assert(i == boundary); + } +} + +// ----------------------------------------------------------------------------------------------- +// +// The script engine jump table +// +// ----------------------------------------------------------------------------------------------- + +const q_scriptEngine scriptEngines[] = { + // Latin, + { basic_shape, 0 }, + // Greek, + { basic_shape, 0 }, + // Cyrillic, + { basic_shape, 0 }, + // Armenian, + { basic_shape, 0 }, + // Georgian, + { basic_shape, 0 }, + // Runic, + { basic_shape, 0 }, + // Ogham, + { basic_shape, 0 }, + // SpacingModifiers, + { basic_shape, 0 }, + // CombiningMarks, + { basic_shape, 0 }, + + // // Middle Eastern Scripts + // Hebrew, + { hebrew_shape, 0 }, + // Arabic, + { arabic_shape, 0 }, + // Syriac, + { syriac_shape, 0 }, + // Thaana, + { thaana_shape, 0 }, + + // // South and Southeast Asian Scripts + // Devanagari, + { indic_shape, indic_attributes }, + // Bengali, + { indic_shape, indic_attributes }, + // Gurmukhi, + { indic_shape, indic_attributes }, + // Gujarati, + { indic_shape, indic_attributes }, + // Oriya, + { indic_shape, indic_attributes }, + // Tamil, + { indic_shape, indic_attributes }, + // Telugu, + { indic_shape, indic_attributes }, + // Kannada, + { indic_shape, indic_attributes }, + // Malayalam, + { indic_shape, indic_attributes }, + // Sinhala, + { indic_shape, indic_attributes }, + // Thai, + { basic_shape, thai_attributes }, + // Lao, + { basic_shape, thai_attributes }, + // Tibetan, + { tibetan_shape, tibetan_attributes }, + // Myanmar, + { myanmar_shape, myanmar_attributes }, + // Khmer, + { khmer_shape, khmer_attributes }, + + // // East Asian Scripts + // Han, + { basic_shape, 0 }, + // Hiragana, + { basic_shape, 0 }, + // Katakana, + { basic_shape, 0 }, + // Hangul, + { hangul_shape, hangul_attributes }, + // Bopomofo, + { basic_shape, 0 }, + // Yi, + { basic_shape, 0 }, + + // // Additional Scripts + // Ethiopic, + { basic_shape, 0 }, + // Cherokee, + { basic_shape, 0 }, + // CanadianAboriginal, + { basic_shape, 0 }, + // Mongolian, + { basic_shape, 0 }, + + // // Symbols + // CurrencySymbols, + { basic_shape, 0 }, + // LetterlikeSymbols, + { basic_shape, 0 }, + // NumberForms, + { basic_shape, 0 }, + // MathematicalOperators, + { basic_shape, 0 }, + // TechnicalSymbols, + { basic_shape, 0 }, + // GeometricSymbols, + { basic_shape, 0 }, + // MiscellaneousSymbols, + { basic_shape, 0 }, + // EnclosedAndSquare, + { basic_shape, 0 }, + // Braille, + { basic_shape, 0 }, + + // Unicode, + { basic_shape, 0 }, + //Tagalog, + { basic_shape, 0 }, + //Hanunoo, + { basic_shape, 0 }, + //Buhid, + { basic_shape, 0 }, + //Tagbanwa, + { basic_shape, 0 }, + // KatakanaHalfWidth + { basic_shape, 0 }, + // Limbu + { basic_shape, 0 }, + // TaiLe + { basic_shape, 0 } +}; |