Package translate :: Package lang :: Module data
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.data

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This module stores information and functionality that relates to plurals.""" 
 23   
 24  import unicodedata 
 25   
 26  # The key is the language code, which may contain country codes and modifiers. 
 27  # The value is a tuple: (Full name in English, nplurals, plural equation) 
 28   
 29  languages = { 
 30  'af': ('Afrikaans', 2, '(n != 1)'), 
 31  'ak': ('Akan', 2, 'n > 1'), 
 32  'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n>=3 && n<=10 ? 3 : n>=11 && n<=99 ? 4 : 5'), 
 33  'az': ('Azerbaijani', 2, '(n != 1)'), 
 34  'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 35  'bg': ('Bulgarian', 2, '(n != 1)'), 
 36  'bn': ('Bengali', 2, '(n != 1)'), 
 37  'bo': ('Tibetan', 1, '0'), 
 38  'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 39  'ca': ('Catalan', 2, '(n != 1)'), 
 40  'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
 41  'cy': ('Welsh', 2, '(n==2) ? 1 : 0'), 
 42  'da': ('Danish', 2, '(n != 1)'), 
 43  'de': ('German', 2, '(n != 1)'), 
 44  'dz': ('Dzongkha', 1, '0'), 
 45  'el': ('Greek', 2, '(n != 1)'), 
 46  'en': ('English', 2, '(n != 1)'), 
 47  'en_UK': ('English (United Kingdom)', 2, '(n != 1)'), 
 48  'en_ZA': ('English (South Africa)', 2, '(n != 1)'), 
 49  'eo': ('Esperanto', 2, '(n != 1)'), 
 50  'es': ('Spanish', 2, '(n != 1)'), 
 51  'et': ('Estonian', 2, '(n != 1)'), 
 52  'eu': ('Basque', 2, '(n != 1)'), 
 53  'fa': ('Persian', 1, '0'), 
 54  'fi': ('Finnish', 2, '(n != 1)'), 
 55  'fo': ('Faroese', 2, '(n != 1)'), 
 56  'fr': ('French', 2, '(n > 1)'), 
 57  'fur': ('Friulian', 2, '(n != 1)'), 
 58  'fy': ('Frisian', 2, '(n != 1)'), 
 59  'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'), 
 60  'gl': ('Galician', 2, '(n != 1)'), 
 61  'gu': ('Gujarati', 2, '(n != 1)'), 
 62  'he': ('Hebrew', 2, '(n != 1)'), 
 63  'hi': ('Hindi', 2, '(n != 1)'), 
 64  'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 65  'hu': ('Hungarian', 2, '(n != 1)'), 
 66  'id': ('Indonesian', 1, '0'), 
 67  'is': ('Icelandic', 2, '(n != 1)'), 
 68  'it': ('Italian', 2, '(n != 1)'), 
 69  'ja': ('Japanese', 1, '0'), 
 70  'ka': ('Georgian', 1, '0'), 
 71  'km': ('Khmer', 1, '0'), 
 72  'ko': ('Korean', 1, '0'), 
 73  'ku': ('Kurdish', 2, '(n != 1)'), 
 74  'lb': ('Letzeburgesch', 2, '(n != 1)'), 
 75  'ln': ('Lingala', 2, '(n > 1)'), 
 76  'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 77  'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'), 
 78  'mg': ('Malagasy', 2, '(n > 1)'), 
 79  'mn': ('Mongolian', 2, '(n != 1)'), 
 80  'mr': ('Marathi', 2, '(n != 1)'), 
 81  'ms': ('Malay', 1, '0'), 
 82  'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'), 
 83  'nah': ('Nahuatl', 2, '(n != 1)'), 
 84  'nb': ('Norwegian Bokmal', 2, '(n != 1)'), 
 85  'ne': ('Nepali', 2, '(n != 1)'), 
 86  'nl': ('Dutch', 2, '(n != 1)'), 
 87  'nn': ('Norwegian Nynorsk', 2, '(n != 1)'), 
 88  'nso': ('Northern Sotho', 2, '(n > 1)'), 
 89  'or': ('Oriya', 2, '(n != 1)'), 
 90  'pa': ('Punjabi', 2, '(n != 1)'), 
 91  'pap': ('Papiamento', 2, '(n != 1)'), 
 92  'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 93  'pt': ('Portugese', 2, '(n != 1)'), 
 94  'pt_BR': ('Portugese (Brazil)', 2, '(n > 1)'), 
 95  'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'), 
 96  'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 97  'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
 98  'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'), 
 99  'sq': ('Albanian', 2, '(n != 1)'), 
100  'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
101  'sv': ('Swedish', 2, '(n != 1)'), 
102  'ta': ('Tamil', 2, '(n != 1)'), 
103  'th': ('Thai', 1, '0'), 
104  'tk': ('Turkmen', 2, '(n != 1)'), 
105  'tr': ('Turkish', 1, '0'), 
106  'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
107  'vi': ('Vietnamese',1 , '0'), 
108  'wa': ('Walloon', 2, '(n > 1)'), 
109  # Chinese is difficult because the main divide is on script, not really  
110  # country. Simplified Chinese is used mostly in China, Singapore and Malaysia. 
111  # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau. 
112  'zh_CN': ('Chinese (China)', 1, '0'), 
113  'zh_HK': ('Chinese (Hong Kong)', 1, '0'), 
114  'zh_TW': ('Chinese (Taiwan)', 1, '0'), 
115  } 
116   
117 -def simplercode(code):
118 """This attempts to simplify the given language code by ignoring country 119 codes, for example.""" 120 # Check http://www.rfc-editor.org/rfc/bcp/bcp47.txt for possible extra issues 121 # http://www.rfc-editor.org/rfc/rfc4646.txt 122 # http://www.w3.org/International/articles/language-tags/ 123 if not code: 124 return code 125 126 # The @ modifier is used for script variants of the same language, like 127 # sr@Latn or gez_ER@abegede 128 modifier = code.rfind("@") 129 if modifier >= 0: 130 return code[:modifier] 131 132 underscore = code.rfind("_") 133 if underscore >= 0: 134 return code[:underscore]
135 136 137 import gettext 138 import re 139 140 iso639 = {} 141 iso3166 = {} 142 143 dialectre = re.compile(r"([^(\s]+)\s*\(([^)]+)\)") 144
145 -def tr_lang(langcode):
146 """Gives a function that can translate a language name, even in the form 147 "language (country)" 148 into the language with iso code langcode.""" 149 langfunc = gettext_lang(langcode) 150 countryfunc = gettext_country(langcode) 151 152 def handlelanguage(name): 153 match = dialectre.match(name) 154 if match: 155 language, country = match.groups() 156 return u"%s (%s)" % (langfunc(language), countryfunc(country)) 157 else: 158 return langfunc(name)
159 160 return handlelanguage 161
162 -def gettext_lang(langcode):
163 """Returns a gettext function to translate language names into the given 164 language.""" 165 if not langcode in iso639: 166 t = gettext.translation('iso_639', languages=[langcode], fallback=True) 167 iso639[langcode] = t.ugettext 168 return iso639[langcode]
169
170 -def gettext_country(langcode):
171 """Returns a gettext function to translate country names into the given 172 language.""" 173 if not langcode in iso3166: 174 t = gettext.translation('iso_3166', languages=[langcode], fallback=True) 175 iso3166[langcode] = t.ugettext 176 return iso3166[langcode]
177
178 -def normalize(string, normal_form="NFC"):
179 """Return a unicode string in its normalized form 180 181 @param sting: The string to be normalized 182 @param normal_form: NFC (default), NFD, NFCK, NFDK 183 @return: Normalized string 184 """ 185 return unicodedata.normalize(normal_form, string)
186
187 -def forceunicode(string):
188 """Helper method to ensure that the parameter becomes unicode if not yet""" 189 if string is None: 190 return None 191 if isinstance(string, str): 192 encoding = getattr(string, "encoding", "utf-8") 193 string = string.decode(encoding) 194 string = normalize(string) 195 return string
196