1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for parsing Qt .qm files
24
25 @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation
26 of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break and print out
28 the missing tag. They are easy to implement and should follow the structure in 03
29 (Translation). We could find no examples that use these so we'd rather leave it
30 unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source text. We assume
32 that since they use a hash table to lookup the data there is actually no need for the
33 source text. It seems however that in Qt4's lrelease all data is included in the resultant .qm
34 file.
35 @todo: We can only parse, not create, a .qm file. The main issue is that we need to
36 implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm). Unlike
37 Gettext it seems that the hash is required, but that has not been validated.
38 @todo: The code can parse files correctly. But it could be cleaned up to be more readable, especially
39 the part that breaks the file into sections.
40 """
41
42 from translate.storage import base
43 from translate.misc.multistring import multistring
44 import codecs
45 import struct
46 import sys
47
48 QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL)
49
51 """Helper to unpack Qt .qm files into a Python string"""
52 f = open(qmfile)
53 s = f.read()
54 print "\\x%02x"*len(s) % tuple(map(ord, s))
55 f.close()
56
57 -class qmunit(base.TranslationUnit):
58 """A class representing a .qm translation message."""
61
62 -class qmfile(base.TranslationStore):
63 """A class representing a .qm file."""
64 UnitClass = qmunit
72
74 """Output a string representation of the .qm data file"""
75 return ""
76
78 """parses the given file or file source string"""
79 if hasattr(input, 'name'):
80 self.filename = input.name
81 elif not getattr(self, 'filename', ''):
82 self.filename = ''
83 if hasattr(input, "read"):
84 qmsrc = input.read()
85 input.close()
86 input = qmsrc
87 magic = struct.unpack(">4L", input[:16])
88 if magic != QM_MAGIC_NUMBER:
89 raise ValueError("This is not a .qm file")
90 startsection = 16
91 sectionheader = 5
92 while startsection < len(input):
93 section_type, length = struct.unpack(">bL", input[startsection:startsection+sectionheader])
94 if section_type == 0x42:
95
96 hashash = True
97 hash_start = startsection+sectionheader
98 hash_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
99 elif section_type == 0x69:
100
101 hasmessages = True
102 messages_start = startsection+sectionheader
103 messages_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
104 elif section_type == 0x2f:
105
106 hascontexts = True
107 contexts_start = startsection+sectionheader
108 contexts_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
109 startsection = startsection+sectionheader+length
110 pos = messages_start
111 source = target = None
112 while pos < messages_start + len(messages_data):
113 subsection, = struct.unpack(">b", input[pos:pos+1])
114 if subsection == 0x01:
115
116 pos = pos+1
117 if not source is None and not target is None:
118 newunit = self.addsourceunit(source)
119 newunit.target = target
120 source = target = None
121 else:
122 raise ValueError("Old .qm format with no source defined")
123 continue
124
125 pos = pos+1
126 length, = struct.unpack(">l", input[pos:pos+4])
127 if subsection == 0x03:
128 if length != -1:
129 raw, = struct.unpack(">%ds" % length, input[pos+4:pos+4+length])
130 string, templen = codecs.utf_16_be_decode(raw)
131 if target:
132 target.strings.append(string)
133 else:
134 target = multistring(string)
135 pos = pos+4+length
136 else:
137 target = ""
138 pos = pos+4
139
140 elif subsection == 0x06:
141 source = input[pos+4:pos+4+length].decode('iso-8859-1')
142
143 pos = pos+4+length
144 elif subsection == 0x07:
145 context = input[pos+4:pos+4+length].decode('iso-8859-1')
146
147 pos = pos+4+length
148 elif subsection == 0x08:
149 comment = input[pos+4:pos+4+length]
150
151 pos = pos+4+length
152 elif subsection == 0x05:
153 hash = input[pos:pos+4]
154
155 pos = pos+4
156 else:
157 if subsection == 0x02:
158 subsection_name = "SourceText16"
159 elif subsection == 0x04:
160 subsection_name = "Context16"
161 else:
162 subsection_name = "Unkown"
163 print >> sys.stderr, "Unimplemented: %s %s" % (subsection, subsection_name)
164 return
165