diff options
Diffstat (limited to 'dilos/tdesdk/debian/desktop-i18n/msgsplit')
-rw-r--r-- | dilos/tdesdk/debian/desktop-i18n/msgsplit | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/dilos/tdesdk/debian/desktop-i18n/msgsplit b/dilos/tdesdk/debian/desktop-i18n/msgsplit new file mode 100644 index 000000000..41481d336 --- /dev/null +++ b/dilos/tdesdk/debian/desktop-i18n/msgsplit @@ -0,0 +1,168 @@ +#! /usr/bin/env python + +import sys, string, codecs, os + +# TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes +# FIXME: it seems possible to get lines bigger than 80 characters. +max_length = 78 + +wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>', + '<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>', + '<blockquote>', '<pre>', '<hr>', '<hr/>'] + +### TODO: try to support any charset, not only UTF-8 (so that it can be used outside TDE) + +def splitit( start, message, outfile ): + # print start+"\""+message+"\"" # DEBUG + if len(start): + if len(message) + len(start) < max_length and \ + string.find(message, '\\n') == -1: + outstr = '%s"%s"\n' % (start, message) + outfile.write(outstr.encode('utf-8')) + return + outfile.write(start) + outfile.write(u'""\n') + index = 0 + mlen = len(message) + last_brace = 0 + last_space = 0 + last_comma = 0 + while index < mlen: + if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \ + and (index < 2 or message[index-2] != '\\'): + outstr = '"%s"\n' % message[:index+1] + outfile.write(outstr.encode('utf-8')) + message = message[index+1:] + mlen -= index + 1 + index = 0 + last_brace = 0 + last_space = 0 + last_comma = 0 + continue + elif message[index] == u'>': + last_brace = index + elif message[index] == u' ': + last_space = index + elif message[index] == u',': + last_comma = index + elif message[index] == u'<': + for s in wrap_before: + if index > 0 and message[index:].startswith(s): + outstr = '"%s"\n' % message[:index] + outfile.write(outstr.encode('utf-8')) + message = message[index:] + mlen -= index + index = 0 + last_brace = 0 + last_space = 0 + last_comma = 0 + continue + if index > max_length: + if last_brace > 50: + index = last_brace + while index < mlen - 1 and message[index+1] == ' ': + index += 1 + elif last_space != 0: + index = last_space + elif last_comma != 0: + index = last_comma + else: + while index > 0 and message[index] == u'\\': + index = index - 1 + outstr = '"%s"\n' % message[:index+1] + outfile.write(outstr.encode('utf-8')) + message = message[index+1:] + mlen -= index + 1 + index = 0 + last_brace = 0 + last_space = 0 + last_comma = 0 + continue + index += 1 + if len(message): + outstr = '"%s"\n' % message + outfile.write(outstr.encode('utf-8')) + +if sys.hexversion >= 0x02030000: + # We have Python 2.3 or better + open_type="rU" # Open for read with "Universal Newline Support" +else: + # We have a Python older than 2.3 + open_type="r" # Normal open for read +### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting. +for file in sys.argv[1:]: + orig_file = open(file, open_type) + new_file = open(file + ".new", 'w') + + last='' + start='' + index=0 + line=' ' + while 1: # python 2.1 has no True ;) + line = orig_file.readline() + index += 1 + if not line: + break + if line == '\n' or line[0] == '#': + splitit(start, last, new_file) + start = '' + last = '' + new_file.write(line) + continue + try: + line = string.strip(unicode(line, 'utf-8')) + except UnicodeError: + print file + if line[0] == '"' and line[-1:] == '"': + last += line[1:-1] + continue + # new message + splitit(start, last, new_file) + if line.startswith("msgid "): + start = "msgid " + last = string.lstrip(line[6:-1])[1:] + elif line.startswith("msgstr "): + start = "msgstr " + last = string.lstrip(line[7:-1])[1:] + elif line.startswith("msgctxt "): + start = "msgctxt " + last = string.lstrip(line[8:-1])[1:] + elif line.startswith("msgid_plural "): + start = "msgid_plural " + last = string.lstrip(line[13:-1])[1:] + elif line.startswith("msgstr["): + # For most languages, there will be only one digit + if line[8] == "]" and line[9] == " ": + if line[7].isdigit(): + start = line[:10] + last = string.lstrip(line[10:-1])[1:] + else: + print file, "not-a-digit error for mgstr[] in line", index + orig_file.close() + new_file.close() + sys.exit(1) + else: + posdigit = 7 # The first digit is at position 7 + while line[posdigit].isdigit(): + posdigit += 1 + if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ": + posdigit += 2 # skip ] and the space + start = line[:posdigit] + last = string.lstrip(line[posdigit:-1])[1:] + else: + print file, "parse error after msgstr[ in line", index + orig_file.close() + new_file.close() + sys.exit(1) + else: + print file, "parsing error in line", index + orig_file.close() + new_file.close() + sys.exit(1) + + splitit(start, last, new_file) + orig_file.close() + new_file.close() + os.rename(file + ".new", file) + +# kate: space-indent off; indent-width 8; replace-tabs off; |