Package translate :: Package tools :: Module pogrep
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pogrep

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Grep XLIFF, Gettext PO and TMX localization files 
 23   
 24  Matches are output to snippet files of the same type which can then be reviewed  
 25  and later merged using pomerge 
 26   
 27  See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and 
 28  usage instructions 
 29  """ 
 30   
 31  from translate.storage import factory 
 32  from translate.misc import optrecurse 
 33  from translate.misc.multistring import multistring 
 34  from translate.lang import data 
 35  import re 
 36  import locale 
 37   
38 -class GrepFilter:
39 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False, invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False):
40 """builds a checkfilter using the given checker""" 41 if isinstance(searchstring, unicode): 42 self.searchstring = searchstring 43 else: 44 self.searchstring = searchstring.decode(encoding) 45 self.searchstring = data.normalize(self.searchstring) 46 if searchparts: 47 # For now we still support the old terminology, except for the old 'source' 48 # which has a new meaning now. 49 self.search_source = ('source' in searchparts) or ('msgid' in searchparts) 50 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts) 51 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts) 52 self.search_locations = 'locations' in searchparts 53 else: 54 self.search_source = True 55 self.search_target = True 56 self.search_notes = False 57 self.search_locations = False 58 self.ignorecase = ignorecase 59 if self.ignorecase: 60 self.searchstring = self.searchstring.lower() 61 self.useregexp = useregexp 62 if self.useregexp: 63 self.searchpattern = re.compile(self.searchstring) 64 self.invertmatch = invertmatch 65 self.accelchar = accelchar 66 self.includeheader = includeheader
67
68 - def matches(self, teststr):
69 teststr = data.normalize(teststr) 70 if self.ignorecase: 71 teststr = teststr.lower() 72 if self.accelchar: 73 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr) 74 teststr = re.sub(self.accelchar, "", teststr) 75 if self.useregexp: 76 found = self.searchpattern.search(teststr) 77 else: 78 found = teststr.find(self.searchstring) != -1 79 if self.invertmatch: 80 found = not found 81 return found
82
83 - def filterunit(self, unit):
84 """runs filters on an element""" 85 if unit.isheader(): return [] 86 87 if self.search_source: 88 if isinstance(unit.source, multistring): 89 strings = unit.source.strings 90 else: 91 strings = [unit.source] 92 for string in strings: 93 if self.matches(string): 94 return True 95 96 if self.search_target: 97 if isinstance(unit.target, multistring): 98 strings = unit.target.strings 99 else: 100 strings = [unit.target] 101 for string in strings: 102 if self.matches(string): 103 return True 104 105 if self.search_notes: 106 return self.matches(unit.getnotes()) 107 if self.search_locations: 108 return self.matches(u" ".join(unit.getlocations())) 109 return False
110
111 - def filterfile(self, thefile):
112 """runs filters on a translation file object""" 113 thenewfile = type(thefile)() 114 for unit in thefile.units: 115 if self.filterunit(unit): 116 thenewfile.addunit(unit) 117 if self.includeheader and thenewfile.units > 0: 118 if thefile.units[0].isheader(): 119 thenewfile.units.insert(0, thefile.units[0]) 120 else: 121 thenewfile.units.insert(0, thenewfile.makeheader()) 122 return thenewfile
123
124 -class GrepOptionParser(optrecurse.RecursiveOptionParser):
125 """a specialized Option Parser for the grep tool..."""
126 - def parse_args(self, args=None, values=None):
127 """parses the command line options, handling implicit input/output args""" 128 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values) 129 # some intelligence as to what reasonable people might give on the command line 130 if args: 131 options.searchstring = args[0] 132 args = args[1:] 133 else: 134 self.error("At least one argument must be given for the search string") 135 if args and not options.input: 136 if not options.output: 137 options.input = args[:-1] 138 args = args[-1:] 139 else: 140 options.input = args 141 args = [] 142 if args and not options.output: 143 options.output = args[-1] 144 args = args[:-1] 145 if args: 146 self.error("You have used an invalid combination of --input, --output and freestanding args") 147 if isinstance(options.input, list) and len(options.input) == 1: 148 options.input = options.input[0] 149 return (options, args)
150
151 - def set_usage(self, usage=None):
152 """sets the usage string - if usage not given, uses getusagestring for each option""" 153 if usage is None: 154 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list]) 155 else: 156 super(GrepOptionParser, self).set_usage(usage)
157
158 - def run(self):
159 """parses the arguments, and runs recursiveprocess with the resulting options""" 160 (options, args) = self.parse_args() 161 options.inputformats = self.inputformats 162 options.outputoptions = self.outputoptions 163 options.checkfilter = GrepFilter(options.searchstring, options.searchparts, options.ignorecase, options.useregexp, options.invertmatch, options.accelchar, locale.getpreferredencoding(), options.includeheader) 164 self.usepsyco(options) 165 self.recursiveprocess(options)
166
167 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
168 """reads in inputfile, filters using checkfilter, writes to outputfile""" 169 fromfile = factory.getobject(inputfile) 170 tofile = checkfilter.filterfile(fromfile) 171 if tofile.isempty(): 172 return False 173 outputfile.write(str(tofile)) 174 return True
175
176 -def cmdlineparser():
177 formats = {"po":("po", rungrep), "pot":("pot", rungrep), 178 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep), 179 "tmx":("tmx", rungrep), 180 None:("po", rungrep)} 181 parser = GrepOptionParser(formats) 182 parser.add_option("", "--search", dest="searchparts", 183 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ], 184 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)") 185 parser.add_option("-I", "--ignore-case", dest="ignorecase", 186 action="store_true", default=False, help="ignore case distinctions") 187 parser.add_option("-e", "--regexp", dest="useregexp", 188 action="store_true", default=False, help="use regular expression matching") 189 parser.add_option("-v", "--invert-match", dest="invertmatch", 190 action="store_true", default=False, help="select non-matching lines") 191 parser.add_option("", "--accelerator", dest="accelchar", 192 action="store", type="choice", choices=["&", "_", "~"], 193 metavar="ACCELERATOR", help="ignores the given accelerator when matching") 194 parser.add_option("", "--header", dest="includeheader", 195 action="store_true", default=False, 196 help="include a PO header in the output") 197 parser.set_usage() 198 parser.passthrough.append('checkfilter') 199 parser.description = __doc__ 200 return parser
201
202 -def main():
203 parser = cmdlineparser() 204 parser.run()
205 206 if __name__ == '__main__': 207 main() 208