diff options
Diffstat (limited to 'tdehtml/rendering/break_lines.h')
-rw-r--r-- | tdehtml/rendering/break_lines.h | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/tdehtml/rendering/break_lines.h b/tdehtml/rendering/break_lines.h new file mode 100644 index 000000000..34b733121 --- /dev/null +++ b/tdehtml/rendering/break_lines.h @@ -0,0 +1,163 @@ +#ifndef BREAK_LINES_H +#define BREAK_LINES_H + +#include <tqstring.h> + +namespace tdehtml { + + /* + array of unicode codes where breaking shouldn't occur. + (in sorted order because of using with binary search) + these are currently for Japanese, though simply adding + Korean, Chinese ones should work as well + */ + /* + dontbreakbefore[] contains characters not covered by TQChar::Punctuation_Close that shouldn't be broken before. + chars included in TQChar::Punctuation_Close are listed below.(look at UAX #14) + - 3001 ideographic comma + - 3002 ideographic full stop + - FE50 small comma + - FF52 small full stop + - FF0C fullwidth comma + - FF0E fullwidth full stop + - FF61 halfwidth ideographic full stop + - FF64 halfwidth ideographic comma + these character is commented out. + */ + const ushort dontbreakbefore[] = { + //0x3001, //ideographic comma + //0x3002, //ideographic full stop + 0x3005, //ideographic iteration mark + 0x3009, //right angle bracket + 0x300b, //right double angle bracket + 0x300d, //right corner bracket + 0x300f, //right white corner bracket + 0x3011, //right black lenticular bracket + 0x3015, //right tortoise shell bracket + 0x3041, //small a hiragana + 0x3043, //small i hiragana + 0x3045, //small u hiragana + 0x3047, //small e hiragana + 0x3049, //small o hiragana + 0x3063, //small tsu hiragana + 0x3083, //small ya hiragana + 0x3085, //small yu hiragana + 0x3087, //small yo hiragana + 0x308E, //small wa hiragana + 0x309B, //jap voiced sound mark + 0x309C, //jap semi-voiced sound mark + 0x309D, //jap iteration mark hiragana + 0x309E, //jap voiced iteration mark hiragana + 0x30A1, //small a katakana + 0x30A3, //small i katakana + 0x30A5, //small u katakana + 0x30A7, //small e katakana + 0x30A9, //small o katakana + 0x30C3, //small tsu katakana + 0x30E3, //small ya katakana + 0x30E5, //small yu katakana + 0x30E7, //small yo katakana + 0x30EE, //small wa katakana + 0x30F5, //small ka katakana + 0x30F6, //small ke katakana + 0x30FC, //jap prolonged sound mark + 0x30FD, //jap iteration mark katakana + 0x30FE, //jap voiced iteration mark katakana + //0xFE50, //small comma + //0xFF52, //small full stop + 0xFF01, //fullwidth exclamation mark + 0xFF09, //fullwidth right parenthesis + //0xFF0C, //fullwidth comma + 0xFF0D, //fullwidth hypen-minus + //0xFF0E, //fullwidth full stop + 0xFF1F, //fullwidth question mark + 0xFF3D, //fullwidth right square bracket + 0xFF5D, //fullwidth right curly bracket + //0xFF61, //halfwidth ideographic full stop + 0xFF63, //halfwidth right corner bracket + //0xFF64, //halfwidth ideographic comma + 0xFF67, //halfwidth katakana letter small a + 0xFF68, //halfwidth katakana letter small i + 0xFF69, //halfwidth katakana letter small u + 0xFF6a, //halfwidth katakana letter small e + 0xFF6b, //halfwidth katakana letter small o + 0xFF6c, //halfwidth katakana letter small ya + 0xFF6d, //halfwidth katakana letter small yu + 0xFF6e, //halfwidth katakana letter small yo + 0xFF6f, //halfwidth katakana letter small tu + 0xFF70 //halfwidth katakana-hiragana prolonged sound mark + }; + + // characters that aren't covered by TQChar::Punctuation_Open + const ushort dontbreakafter[] = { + 0x3012, //postal mark + 0xFF03, //full width pound mark + 0xFF04, //full width dollar sign + 0xFF20, //full width @ + 0xFFE1, //full width british pound sign + 0xFFE5 //full width yen sign + }; + + inline bool break_bsearch( const ushort* arr, const ushort val ) { + int left = 0; + int right = (sizeof(arr) / sizeof(ushort)) - 1; + + while (1) { + if (left == right) + return val != arr[left]; + + int i = (left + right) >> 1; + if ( val == arr[i] ) + return false; + if ( val < arr[i] ) + right = i; + else + left = i + 1; + } + } + + bool isBreakableThai( const TQChar *string, const int pos, const int len); + void cleanup_thaibreaks(); + + inline bool isBreakable( const TQChar *str, const int pos, int len ) + { + const TQChar *c = str+pos; + unsigned short ch = c->unicode(); + if ( ch > 0xff ) { + // not latin1, need to do more sophisticated checks for asian fonts + unsigned char row = c->row(); + if ( row == 0x0e ) { + // 0e00 - 0e7f == Thai + if ( c->cell() < 0x80 ) { + // consult libthai + return isBreakableThai(str, pos, len); + } else + return false; + } + if ( row > 0x2d && row < 0xfb || row == 0x11 ) { + /* asian line breaking. */ + if ( pos == 0 ) + return false; // never break before first character + + // check for simple punctuation cases + TQChar::Category cat = c->category(); + if ( cat == TQChar::Punctuation_Close || + cat == TQChar::Punctuation_Other || + (str+(pos-1))->category() == TQChar::Punctuation_Open ) + return false; + + // do binary search in dontbreak[] + return break_bsearch(dontbreakbefore, c->unicode()) && + break_bsearch(dontbreakafter, (str+(pos-1))->unicode()); + } else // no asian font + return c->isSpace(); + } else { + if ( ch == ' ' || ch == '\n' ) + return true; + } + return false; + } + +} + +#endif |