1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of comma-separated values (.csv) files (csvunit)
23 or entire files (csvfile) for use with localisation
24 """
25
26 try:
27
28 import csv
29 except:
30
31 from translate.misc import csv
32
33 from translate.misc import sparse
34 from translate.storage import base
35
37 - def __init__(self, fileobj, fieldnames):
38 self.fieldnames = fieldnames
39 self.contents = fileobj.read()
40 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"],whitespacechars="\r")
41 self.parser.stringescaping = 0
42 self.parser.quotechars = '"'
43 self.tokens = self.parser.tokenize(self.contents)
44 self.tokenpos = 0
45
48
55
57 lentokens = len(self.tokens)
58 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
59 self.tokenpos += 1
60 if self.tokenpos >= lentokens:
61 raise StopIteration()
62 thistokens = []
63 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n":
64 thistokens.append(self.tokens[self.tokenpos])
65 self.tokenpos += 1
66 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
67 self.tokenpos += 1
68 fields = []
69
70 currentfield = ''
71 fieldparts = 0
72 for token in thistokens:
73 if token == ',':
74
75 if fieldparts == 1:
76 currentfield = self.getvalue(currentfield)
77 fields.append(currentfield)
78 currentfield = ''
79 fieldparts = 0
80 else:
81 currentfield += token
82 fieldparts += 1
83
84 if fieldparts:
85 if fieldparts == 1:
86 currentfield = self.getvalue(currentfield)
87 fields.append(currentfield)
88 values = {}
89 for fieldnum in range(len(self.fieldnames)):
90 if fieldnum >= len(fields):
91 values[self.fieldnames[fieldnum]] = ""
92 else:
93 values[self.fieldnames[fieldnum]] = fields[fieldnum]
94 return values
95
96 -class csvunit(base.TranslationUnit):
97 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
103
112
121
130
131 - def todict(self, encoding='utf-8'):
132 comment, source, target = self.comment, self.source, self.target
133 source, target = self.add_spreadsheet_escapes(source, target)
134 if isinstance(comment, unicode):
135 comment = comment.encode(encoding)
136 if isinstance(source, unicode):
137 source = source.encode(encoding)
138 if isinstance(target, unicode):
139 target = target.encode(encoding)
140 return {'comment':comment, 'source': source, 'target': target}
141
142 -class csvfile(base.TranslationStore):
143 """This class represents a .csv file with various lines.
144 The default format contains three columns: comments, source, target"""
145 UnitClass = csvunit
146 - def __init__(self, inputfile=None, fieldnames=None):
147 base.TranslationStore.__init__(self, unitclass = self.UnitClass)
148 self.units = []
149 if fieldnames is None:
150 self.fieldnames = ['comment', 'source', 'target']
151 else:
152 if isinstance(fieldnames, basestring):
153 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")]
154 self.fieldnames = fieldnames
155 self.filename = getattr(inputfile, 'name', '')
156 if inputfile is not None:
157 csvsrc = inputfile.read()
158 inputfile.close()
159 self.parse(csvsrc)
160
161 - def parse(self, csvsrc):
168
175
184
185
186 if __name__ == '__main__':
187 import sys
188 cf = csvfile()
189 cf.parse(sys.stdin.read())
190 sys.stdout.write(str(cf))
191