Package translate :: Package storage :: Module mo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.mo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007 Zuza Software Foundation 
  5  # 
  6  # the function "__str__" was derived from Python v2.4 
  7  #       (Tools/i18n/msgfmt.py - function "generate"): 
  8  #   Written by Martin v. Lowis <loewis@informatik.hu-berlin.de> 
  9  #   Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. 
 10  #   All rights reserved. 
 11  #   original license: Python Software Foundation (version 2) 
 12  #  
 13  # 
 14  # This file is part of translate. 
 15  # 
 16  # translate is free software; you can redistribute it and/or modify 
 17  # it under the terms of the GNU General Public License as published by 
 18  # the Free Software Foundation; either version 2 of the License, or 
 19  # (at your option) any later version. 
 20  #  
 21  # translate is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 24  # GNU General Public License for more details. 
 25  # 
 26  # You should have received a copy of the GNU General Public License 
 27  # along with translate; if not, write to the Free Software 
 28  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 29  # 
 30   
 31  """Module for parsing Gettext .mo files for translation. 
 32   
 33  The coding of .mo files was produced from documentation in Gettext 0.16 and  
 34  from observation and testing of existing .mo files in the wild. 
 35   
 36  The class does not implement any of the hashing componets of Gettext.  This  
 37  will probably make the output file slower in some instances. 
 38  """ 
 39   
 40  from translate.storage import base 
 41  from translate.storage import po 
 42  from translate.misc.multistring import multistring 
 43  import struct 
 44  import array 
 45  import re 
 46   
 47  MO_MAGIC_NUMBER = 0x950412deL 
 48   
49 -def mounpack(filename='messages.mo'):
50 """Helper to unpack Gettext MO files into a Python string""" 51 f = open(filename) 52 s = f.read() 53 print "\\x%02x"*len(s) % tuple(map(ord, s)) 54 f.close()
55
56 -class mounit(base.TranslationUnit):
57 """A class representing a .mo translation message."""
58 - def __init__(self, source=None):
59 self.msgctxt = [] 60 self.msgidcomments = [] 61 super(mounit, self).__init__(source)
62
63 - def getcontext(self):
64 """Get the message context""" 65 # Still need to handle KDE comments 66 if self.msgctxt is None: 67 return None 68 return "".join(self.msgctxt)
69
70 - def isheader(self):
71 """Is this a header entry?""" 72 return self.source == ""
73
74 - def istranslatable(self):
75 """Is this message translateable?""" 76 return bool(self.source)
77
78 -class mofile(base.TranslationStore):
79 """A class representing a .mo file.""" 80 UnitClass = mounit
81 - def __init__(self, inputfile=None, unitclass=mounit):
82 self.UnitClass = unitclass 83 base.TranslationStore.__init__(self, unitclass=unitclass) 84 self.units = [] 85 self.filename = '' 86 if inputfile is not None: 87 self.parsestring(inputfile)
88
89 - def __str__(self):
90 """Output a string representation of the MO data file""" 91 # check the header of this file for the copyright note of this function 92 MESSAGES = {} 93 for unit in self.units: 94 if isinstance(unit.source, multistring): 95 source = "".join(unit.msgidcomments) + "\0".join(unit.source.strings) 96 else: 97 source = "".join(unit.msgidcomments) + unit.source 98 if unit.msgctxt: 99 source = "".join(unit.msgctxt) + "\x04" + source 100 if isinstance(unit.target, multistring): 101 target = "\0".join(unit.target.strings) 102 else: 103 target = unit.target 104 if unit.target: 105 MESSAGES[source.encode("utf-8")] = target 106 keys = MESSAGES.keys() 107 # the keys are sorted in the .mo file 108 keys.sort() 109 offsets = [] 110 ids = strs = '' 111 for id in keys: 112 # For each string, we need size and file offset. Each string is NUL 113 # terminated; the NUL does not count into the size. 114 # TODO: We don't do any encoding detection from the PO Header 115 string = MESSAGES[id] # id is already encoded for use as a dictionary key 116 if isinstance(string, unicode): 117 string = string.encode('utf-8') 118 offsets.append((len(ids), len(id), len(strs), len(string))) 119 ids = ids + id + '\0' 120 strs = strs + string + '\0' 121 output = '' 122 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 123 # the keys start right after the index tables. 124 # translated string. 125 keystart = 7*4+16*len(keys) 126 # and the values start after the keys 127 valuestart = keystart + len(ids) 128 koffsets = [] 129 voffsets = [] 130 # The string table first has the list of keys, then the list of values. 131 # Each entry has first the size of the string, then the file offset. 132 for o1, l1, o2, l2 in offsets: 133 koffsets = koffsets + [l1, o1+keystart] 134 voffsets = voffsets + [l2, o2+valuestart] 135 offsets = koffsets + voffsets 136 output = struct.pack("Iiiiiii", 137 MO_MAGIC_NUMBER, # Magic 138 0, # Version 139 len(keys), # # of entries 140 7*4, # start of key index 141 7*4+len(keys)*8, # start of value index 142 0, 0) # size and offset of hash table 143 output = output + array.array("i", offsets).tostring() 144 output = output + ids 145 output = output + strs 146 return output
147
148 - def parse(self, input):
149 """parses the given file or file source string""" 150 if hasattr(input, 'name'): 151 self.filename = input.name 152 elif not getattr(self, 'filename', ''): 153 self.filename = '' 154 if hasattr(input, "read"): 155 mosrc = input.read() 156 input.close() 157 input = mosrc 158 little, = struct.unpack("<L", input[:4]) 159 big, = struct.unpack(">L", input[:4]) 160 if little == MO_MAGIC_NUMBER: 161 endian = "<" 162 elif big == MO_MAGIC_NUMBER: 163 endian = ">" 164 else: 165 raise ValueError("This is not an MO file") 166 magic, version, lenkeys, startkey, startvalue, sizehash, offsethash = struct.unpack("%sLiiiiii" % endian, input[:(7*4)]) 167 if version > 1: 168 raise ValueError("Unable to process MO files with versions > 1. This is a %d version MO file" % version) 169 encoding = 'UTF-8' 170 for i in range(lenkeys): 171 nextkey = startkey+(i*2*4) 172 nextvalue = startvalue+(i*2*4) 173 klength, koffset = struct.unpack("%sii" % endian, input[nextkey:nextkey+(2*4)]) 174 vlength, voffset = struct.unpack("%sii" % endian, input[nextvalue:nextvalue+(2*4)]) 175 source = input[koffset:koffset+klength] 176 context = None 177 #---- 178 if "\x04" in source: 179 context, source = source.split("\x04") 180 #--- ~/download/po/gnome/tmp/gnome-games.mo 181 # Still need to handle KDE comments 182 source = multistring(source.split("\0"), encoding=encoding) 183 if source == "": 184 charset = re.search("charset=([^\\s]+)", input[voffset:voffset+vlength]) 185 if charset: 186 encoding = po.encodingToUse(charset.group(1)) 187 target = multistring(input[voffset:voffset+vlength].split("\0"), encoding=encoding) 188 newunit = mounit(source) 189 newunit.settarget(target) 190 if context is not None: 191 newunit.msgctxt.append(context) 192 self.addunit(newunit)
193