summaryrefslogtreecommitdiffstats
path: root/tools/linguist/lupdate/numberh.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/linguist/lupdate/numberh.cpp')
-rw-r--r--tools/linguist/lupdate/numberh.cpp248
1 files changed, 248 insertions, 0 deletions
diff --git a/tools/linguist/lupdate/numberh.cpp b/tools/linguist/lupdate/numberh.cpp
new file mode 100644
index 000000000..787c9a4e1
--- /dev/null
+++ b/tools/linguist/lupdate/numberh.cpp
@@ -0,0 +1,248 @@
+/**********************************************************************
+** Copyright (C) 2000-2008 Trolltech ASA. All rights reserved.
+**
+** This file is part of TQt Linguist.
+**
+** This file may be used under the terms of the GNU General
+** Public License versions 2.0 or 3.0 as published by the Free
+** Software Foundation and appearing in the files LICENSE.GPL2
+** and LICENSE.GPL3 included in the packaging of this file.
+** Alternatively you may (at your option) use any later version
+** of the GNU General Public License if such license has been
+** publicly approved by Trolltech ASA (or its successors, if any)
+** and the KDE Free TQt Foundation.
+**
+** Please review the following information to ensure GNU General
+** Public Licensing retquirements will be met:
+** http://trolltech.com/products/qt/licenses/licensing/opensource/.
+** If you are unsure which license is appropriate for your use, please
+** review the following information:
+** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
+** or contact the sales department at sales@trolltech.com.
+**
+** Licensees holding valid TQt Commercial licenses may use this file in
+** accordance with the TQt Commercial License Agreement provided with
+** the Software.
+**
+** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
+** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
+** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted
+** herein.
+**
+**********************************************************************/
+
+#include <metatranslator.h>
+
+#include <qmemarray.h>
+#include <qcstring.h>
+#include <qmap.h>
+#include <qstringlist.h>
+
+#include <ctype.h>
+
+typedef TQMap<TQCString, MetaTranslatorMessage> TMM;
+typedef TQValueList<MetaTranslatorMessage> TML;
+
+static bool isDigitFriendly( int c )
+{
+ return ispunct((uchar)c) || isspace((uchar)c);
+}
+
+static int numberLength( const char *s )
+{
+ int i = 0;
+
+ if ( isdigit((uchar)s[0]) ) {
+ do {
+ i++;
+ } while (isdigit((uchar)s[i]) ||
+ (isDigitFriendly(s[i]) &&
+ (isdigit((uchar)s[i + 1]) ||
+ (isDigitFriendly(s[i + 1]) && isdigit((uchar)s[i + 2])))));
+ }
+ return i;
+}
+
+/*
+ Returns a version of 'key' where all numbers have been replaced by zeroes. If
+ there were none, returns "".
+*/
+static TQCString zeroKey( const char *key )
+{
+ TQCString zeroed( (int)strlen(key) + 1 );
+ char *z = zeroed.data();
+ int i = 0, j = 0;
+ int len;
+ bool metSomething = FALSE;
+
+ while ( key[i] != '\0' ) {
+ len = numberLength( key + i );
+ if ( len > 0 ) {
+ i += len;
+ z[j++] = '0';
+ metSomething = TRUE;
+ } else {
+ z[j++] = key[i++];
+ }
+ }
+ z[j] = '\0';
+
+ if ( metSomething )
+ return zeroed;
+ else
+ return "";
+}
+
+static TQString translationAttempt( const TQString& oldTranslation,
+ const char *oldSource,
+ const char *newSource )
+{
+ int p = zeroKey( oldSource ).contains( '0' );
+ int oldSourceLen = qstrlen( oldSource );
+ TQString attempt;
+ TQStringList oldNumbers;
+ TQStringList newNumbers;
+ TQMemArray<bool> met( p );
+ TQMemArray<int> matchedYet( p );
+ int i, j;
+ int k = 0, ell, best;
+ int m, n;
+ int pass;
+
+ /*
+ This algorithm is hard to follow, so we'll consider an example
+ all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0"
+ and newSource is "XeT 3.1".
+
+ First, we set up two tables: oldNumbers and newNumbers. In our
+ example, oldNumber[0] is "3.0" and newNumber[0] is "3.1".
+ */
+ for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) {
+ m = numberLength( oldSource + i );
+ n = numberLength( newSource + j );
+ if ( m > 0 ) {
+ oldNumbers.append( TQCString(oldSource + i, m + 1) );
+ newNumbers.append( TQCString(newSource + j, n + 1) );
+ i += m;
+ j += n;
+ met[k] = FALSE;
+ matchedYet[k] = 0;
+ k++;
+ }
+ }
+
+ /*
+ We now go over the old translation, "XeT 3.0", one letter at a
+ time, looking for numbers found in oldNumbers. Whenever such a
+ number is met, it is replaced with its newNumber equivalent. In
+ our example, the "3.0" of "XeT 3.0" becomes "3.1".
+ */
+ for ( i = 0; i < (int) oldTranslation.length(); i++ ) {
+ attempt += oldTranslation[i];
+ for ( k = 0; k < p; k++ ) {
+ if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] )
+ matchedYet[k]++;
+ else
+ matchedYet[k] = 0;
+ }
+
+ /*
+ Let's find out if the last character ended a match. We make
+ two passes over the data. In the first pass, we try to
+ match only numbers that weren't matched yet; if that fails,
+ the second pass does the trick. This is useful in some
+ suspicious cases, flagged below.
+ */
+ for ( pass = 0; pass < 2; pass++ ) {
+ best = p; // an impossible value
+ for ( k = 0; k < p; k++ ) {
+ if ( (!met[k] || pass > 0) &&
+ matchedYet[k] == (int) oldNumbers[k].length() &&
+ numberLength(oldTranslation.latin1() + (i + 1) -
+ matchedYet[k]) == matchedYet[k] ) {
+ // the longer the better
+ if ( best == p || matchedYet[k] > matchedYet[best] )
+ best = k;
+ }
+ }
+ if ( best != p ) {
+ attempt.truncate( attempt.length() - matchedYet[best] );
+ attempt += newNumbers[best];
+ met[best] = TRUE;
+ for ( k = 0; k < p; k++ )
+ matchedYet[k] = 0;
+ break;
+ }
+ }
+ }
+
+ /*
+ We flag two kinds of suspicious cases. They are identified as
+ such with comments such as "{2000?}" at the end.
+
+ Example of the first kind: old source text "TeX 3.0" translated
+ as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the
+ new text is.
+ */
+ for ( k = 0; k < p; k++ ) {
+ if ( !met[k] )
+ attempt += TQString( " {" ) + newNumbers[k] + TQString( "?}" );
+ }
+
+ /*
+ Example of the second kind: "1 of 1" translated as "1 af 1",
+ with new source text "1 of 2", generates "1 af 2 {1 or 2?}"
+ because it's not clear which of "1 af 2" and "2 af 1" is right.
+ */
+ for ( k = 0; k < p; k++ ) {
+ for ( ell = 0; ell < p; ell++ ) {
+ if ( k != ell && oldNumbers[k] == oldNumbers[ell] &&
+ newNumbers[k] < newNumbers[ell] )
+ attempt += TQString( " {" ) + newNumbers[k] + TQString( " or " ) +
+ newNumbers[ell] + TQString( "?}" );
+ }
+ }
+ return attempt;
+}
+
+/*
+ Augments a MetaTranslator with translations easily derived from
+ similar existing (probably obsolete) translations.
+
+ For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
+ has no translation, "XeT 3.1" is added to the translator and is
+ marked Unfinished.
+*/
+void applyNumberHeuristic( MetaTranslator *tor, bool verbose )
+{
+ TMM translated, untranslated;
+ TMM::Iterator t, u;
+ TML all = tor->messages();
+ TML::Iterator it;
+ int inserted = 0;
+
+ for ( it = all.begin(); it != all.end(); ++it ) {
+ if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
+ if ( (*it).translation().isEmpty() )
+ untranslated.insert(TQCString((*it).context()) + "\n" + (*it).sourceText() + "\n"
+ + (*it).comment(), *it);
+ } else if ( !(*it).translation().isEmpty() ) {
+ translated.insert( zeroKey((*it).sourceText()), *it );
+ }
+ }
+
+ for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
+ t = translated.find( zeroKey((*u).sourceText()) );
+ if ( t != translated.end() && !t.key().isEmpty() &&
+ qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
+ MetaTranslatorMessage m( *u );
+ m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(),
+ (*u).sourceText()));
+ tor->insert( m );
+ inserted++;
+ }
+ }
+ if ( verbose && inserted != 0 )
+ fprintf( stderr, " number heuristic provided %d translation%s\n",
+ inserted, inserted == 1 ? "" : "s" );
+}