You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

update-translations.py 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #!/usr/bin/python
  2. # Copyright (c) 2014 Wladimir J. van der Laan
  3. # Distributed under the MIT/X11 software license, see the accompanying
  4. # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5. '''
  6. Run this script from the root of the repository to update all translations from
  7. transifex.
  8. It will do the following automatically:
  9. - fetch all translations using the tx tool
  10. - post-process them into valid and committable format
  11. - remove invalid control characters
  12. - remove location tags (makes diffs less noisy)
  13. TODO:
  14. - auto-add new translations to the build system according to the translation process
  15. '''
  16. from __future__ import division, print_function
  17. import subprocess
  18. import re
  19. import sys
  20. import os
  21. import io
  22. import xml.etree.ElementTree as ET
  23. # Name of transifex tool
  24. TX = 'tx'
  25. # Name of source language file
  26. SOURCE_LANG = 'bitcoin_en.ts'
  27. # Directory with locale files
  28. LOCALE_DIR = 'src/qt/locale'
  29. def check_at_repository_root():
  30. if not os.path.exists('.git'):
  31. print('No .git directory found')
  32. print('Execute this script at the root of the repository', file=sys.stderr)
  33. exit(1)
  34. def fetch_all_translations():
  35. if subprocess.call([TX, 'pull', '-f']):
  36. print('Error while fetching translations', file=sys.stderr)
  37. exit(1)
  38. def find_format_specifiers(s):
  39. '''Find all format specifiers in a string.'''
  40. pos = 0
  41. specifiers = []
  42. while True:
  43. percent = s.find('%', pos)
  44. if percent < 0:
  45. break
  46. specifiers.append(s[percent+1])
  47. pos = percent+2
  48. return specifiers
  49. def split_format_specifiers(specifiers):
  50. '''Split format specifiers between numeric (Qt) and others (strprintf)'''
  51. numeric = []
  52. other = []
  53. for s in specifiers:
  54. if s in {'1','2','3','4','5','6','7','8','9'}:
  55. numeric.append(s)
  56. else:
  57. other.append(s)
  58. # numeric (Qt) can be present in any order, others (strprintf) must be in specified order
  59. return set(numeric),other
  60. def sanitize_string(s):
  61. '''Sanitize string for printing'''
  62. return s.replace('\n',' ')
  63. def check_format_specifiers(source, translation, errors):
  64. source_f = split_format_specifiers(find_format_specifiers(source))
  65. # assert that no source messages contain both Qt and strprintf format specifiers
  66. # if this fails, go change the source as this is hacky and confusing!
  67. assert(not(source_f[0] and source_f[1]))
  68. try:
  69. translation_f = split_format_specifiers(find_format_specifiers(translation))
  70. except IndexError:
  71. errors.append("Parse error in translation '%s'" % sanitize_string(translation))
  72. return False
  73. else:
  74. if source_f != translation_f:
  75. errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation)))
  76. return False
  77. return True
  78. def all_ts_files(suffix=''):
  79. for filename in os.listdir(LOCALE_DIR):
  80. # process only language files, and do not process source language
  81. if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix:
  82. continue
  83. if suffix: # remove provided suffix
  84. filename = filename[0:-len(suffix)]
  85. filepath = os.path.join(LOCALE_DIR, filename)
  86. yield(filename, filepath)
  87. FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]')
  88. def remove_invalid_characters(s):
  89. '''Remove invalid characters from translation string'''
  90. return FIX_RE.sub(b'', s)
  91. # Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for
  92. # comparison, disable by default)
  93. _orig_escape_cdata = None
  94. def escape_cdata(text):
  95. text = _orig_escape_cdata(text)
  96. text = text.replace("'", '&apos;')
  97. text = text.replace('"', '&quot;')
  98. return text
  99. def postprocess_translations(reduce_diff_hacks=False):
  100. print('Checking and postprocessing...')
  101. if reduce_diff_hacks:
  102. global _orig_escape_cdata
  103. _orig_escape_cdata = ET._escape_cdata
  104. ET._escape_cdata = escape_cdata
  105. for (filename,filepath) in all_ts_files():
  106. os.rename(filepath, filepath+'.orig')
  107. have_errors = False
  108. for (filename,filepath) in all_ts_files('.orig'):
  109. # pre-fixups to cope with transifex output
  110. parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8'
  111. with open(filepath + '.orig', 'rb') as f:
  112. data = f.read()
  113. # remove control characters; this must be done over the entire file otherwise the XML parser will fail
  114. data = remove_invalid_characters(data)
  115. tree = ET.parse(io.BytesIO(data), parser=parser)
  116. # iterate over all messages in file
  117. root = tree.getroot()
  118. for context in root.findall('context'):
  119. for message in context.findall('message'):
  120. numerus = message.get('numerus') == 'yes'
  121. source = message.find('source').text
  122. translation_node = message.find('translation')
  123. # pick all numerusforms
  124. if numerus:
  125. translations = [i.text for i in translation_node.findall('numerusform')]
  126. else:
  127. translations = [translation_node.text]
  128. for translation in translations:
  129. if translation is None:
  130. continue
  131. errors = []
  132. valid = check_format_specifiers(source, translation, errors)
  133. for error in errors:
  134. print('%s: %s' % (filename, error))
  135. if not valid: # set type to unfinished and clear string if invalid
  136. translation_node.clear()
  137. translation_node.set('type', 'unfinished')
  138. have_errors = True
  139. # Remove location tags
  140. for location in message.findall('location'):
  141. message.remove(location)
  142. # Remove entire message if it is an unfinished translation
  143. if translation_node.get('type') == 'unfinished':
  144. context.remove(message)
  145. # write fixed-up tree
  146. # if diff reduction requested, replace some XML to 'sanitize' to qt formatting
  147. if reduce_diff_hacks:
  148. out = io.BytesIO()
  149. tree.write(out, encoding='utf-8')
  150. out = out.getvalue()
  151. out = out.replace(b' />', b'/>')
  152. with open(filepath, 'wb') as f:
  153. f.write(out)
  154. else:
  155. tree.write(filepath, encoding='utf-8')
  156. return have_errors
  157. if __name__ == '__main__':
  158. check_at_repository_root()
  159. fetch_all_translations()
  160. postprocess_translations()