1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
#! /usr/bin/env python
import sys, string, codecs, os
# TODO: currently the 78 chars are *without* the quotes, while for Gettext it is *with* the quotes
# FIXME: it seems possible to get lines bigger than 80 characters.
max_length = 78
wrap_before = ['<h1>', '<h2>', '<h3>', '<h4>', '<h5>', '<h6>', '<p>', '<br>', '<br/>',
'<ol>', '<ul>', '<li>', '<table>', '<th>', '<tr>', '<td>', '<center>',
'<blockquote>', '<pre>', '<hr>', '<hr/>']
### TODO: try to support any charset, not only UTF-8 (so that it can be used outside TDE)
def splitit( start, message, outfile ):
# print start+"\""+message+"\"" # DEBUG
if len(start):
if len(message) + len(start) < max_length and \
string.find(message, '\\n') == -1:
outstr = '%s"%s"\n' % (start, message)
outfile.write(outstr.encode('utf-8'))
return
outfile.write(start)
outfile.write(u'""\n')
index = 0
mlen = len(message)
last_brace = 0
last_space = 0
last_comma = 0
while index < mlen:
if message[index] == r'n' and (index > 0 and message[index-1] == '\\') \
and (index < 2 or message[index-2] != '\\'):
outstr = '"%s"\n' % message[:index+1]
outfile.write(outstr.encode('utf-8'))
message = message[index+1:]
mlen -= index + 1
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
elif message[index] == u'>':
last_brace = index
elif message[index] == u' ':
last_space = index
elif message[index] == u',':
last_comma = index
elif message[index] == u'<':
for s in wrap_before:
if index > 0 and message[index:].startswith(s):
outstr = '"%s"\n' % message[:index]
outfile.write(outstr.encode('utf-8'))
message = message[index:]
mlen -= index
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
if index > max_length:
if last_brace > 50:
index = last_brace
while index < mlen - 1 and message[index+1] == ' ':
index += 1
elif last_space != 0:
index = last_space
elif last_comma != 0:
index = last_comma
else:
while index > 0 and message[index] == u'\\':
index = index - 1
outstr = '"%s"\n' % message[:index+1]
outfile.write(outstr.encode('utf-8'))
message = message[index+1:]
mlen -= index + 1
index = 0
last_brace = 0
last_space = 0
last_comma = 0
continue
index += 1
if len(message):
outstr = '"%s"\n' % message
outfile.write(outstr.encode('utf-8'))
if sys.hexversion >= 0x02030000:
# We have Python 2.3 or better
open_type="rU" # Open for read with "Universal Newline Support"
else:
# We have a Python older than 2.3
open_type="r" # Normal open for read
### TODO: even in the case of a parse error, the script could try to process the next file(s) instead of exiting.
for file in sys.argv[1:]:
orig_file = open(file, open_type)
new_file = open(file + ".new", 'w')
last=''
start=''
index=0
line=' '
while 1: # python 2.1 has no True ;)
line = orig_file.readline()
index += 1
if not line:
break
if line == '\n' or line[0] == '#':
splitit(start, last, new_file)
start = ''
last = ''
new_file.write(line)
continue
try:
line = string.strip(unicode(line, 'utf-8'))
except UnicodeError:
print file
if line[0] == '"' and line[-1:] == '"':
last += line[1:-1]
continue
# new message
splitit(start, last, new_file)
if line.startswith("msgid "):
start = "msgid "
last = string.lstrip(line[6:-1])[1:]
elif line.startswith("msgstr "):
start = "msgstr "
last = string.lstrip(line[7:-1])[1:]
elif line.startswith("msgctxt "):
start = "msgctxt "
last = string.lstrip(line[8:-1])[1:]
elif line.startswith("msgid_plural "):
start = "msgid_plural "
last = string.lstrip(line[13:-1])[1:]
elif line.startswith("msgstr["):
# For most languages, there will be only one digit
if line[8] == "]" and line[9] == " ":
if line[7].isdigit():
start = line[:10]
last = string.lstrip(line[10:-1])[1:]
else:
print file, "not-a-digit error for mgstr[] in line", index
orig_file.close()
new_file.close()
sys.exit(1)
else:
posdigit = 7 # The first digit is at position 7
while line[posdigit].isdigit():
posdigit += 1
if posdigit > 7 and line[posdigit] == "]" and line[posdigit+1] == " ":
posdigit += 2 # skip ] and the space
start = line[:posdigit]
last = string.lstrip(line[posdigit:-1])[1:]
else:
print file, "parse error after msgstr[ in line", index
orig_file.close()
new_file.close()
sys.exit(1)
else:
print file, "parsing error in line", index
orig_file.close()
new_file.close()
sys.exit(1)
splitit(start, last, new_file)
orig_file.close()
new_file.close()
os.rename(file + ".new", file)
# kate: space-indent off; indent-width 8; replace-tabs off;
|