msgfmt.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. #! /usr/bin/env python3
  2. # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
  3. """Generate binary message catalog from textual translation description.
  4. This program converts a textual Uniforum-style message catalog (.po file) into
  5. a binary GNU catalog (.mo file). This is essentially the same function as the
  6. GNU msgfmt program, however, it is a simpler implementation.
  7. Usage: msgfmt.py [OPTIONS] filename.po
  8. Options:
  9. -o file
  10. --output-file=file
  11. Specify the output file to write to. If omitted, output will go to a
  12. file named filename.mo (based off the input file name).
  13. -h
  14. --help
  15. Print this message and exit.
  16. -V
  17. --version
  18. Display version information and exit.
  19. """
  20. import os
  21. import sys
  22. import ast
  23. import getopt
  24. import struct
  25. import array
  26. from email.parser import HeaderParser
  27. __version__ = "1.1"
  28. MESSAGES = {}
  29. def usage(code, msg=''):
  30. print(__doc__, file=sys.stderr)
  31. if msg:
  32. print(msg, file=sys.stderr)
  33. sys.exit(code)
  34. def add(id, str, fuzzy):
  35. "Add a non-fuzzy translation to the dictionary."
  36. global MESSAGES
  37. if not fuzzy and str:
  38. MESSAGES[id] = str
  39. def generate():
  40. "Return the generated output."
  41. global MESSAGES
  42. # the keys are sorted in the .mo file
  43. keys = sorted(MESSAGES.keys())
  44. offsets = []
  45. ids = strs = b''
  46. for id in keys:
  47. # For each string, we need size and file offset. Each string is NUL
  48. # terminated; the NUL does not count into the size.
  49. offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
  50. ids += id + b'\0'
  51. strs += MESSAGES[id] + b'\0'
  52. output = ''
  53. # The header is 7 32-bit unsigned integers. We don't use hash tables, so
  54. # the keys start right after the index tables.
  55. # translated string.
  56. keystart = 7*4+16*len(keys)
  57. # and the values start after the keys
  58. valuestart = keystart + len(ids)
  59. koffsets = []
  60. voffsets = []
  61. # The string table first has the list of keys, then the list of values.
  62. # Each entry has first the size of the string, then the file offset.
  63. for o1, l1, o2, l2 in offsets:
  64. koffsets += [l1, o1+keystart]
  65. voffsets += [l2, o2+valuestart]
  66. offsets = koffsets + voffsets
  67. output = struct.pack("Iiiiiii",
  68. 0x950412de, # Magic
  69. 0, # Version
  70. len(keys), # # of entries
  71. 7*4, # start of key index
  72. 7*4+len(keys)*8, # start of value index
  73. 0, 0) # size and offset of hash table
  74. output += array.array("i", offsets).tobytes()
  75. output += ids
  76. output += strs
  77. return output
  78. def make(filename, outfile):
  79. ID = 1
  80. STR = 2
  81. # Compute .mo name from .po name and arguments
  82. if filename.endswith('.po'):
  83. infile = filename
  84. else:
  85. infile = filename + '.po'
  86. if outfile is None:
  87. outfile = os.path.splitext(infile)[0] + '.mo'
  88. try:
  89. with open(infile, 'rb') as f:
  90. lines = f.readlines()
  91. except IOError as msg:
  92. print(msg, file=sys.stderr)
  93. sys.exit(1)
  94. section = None
  95. fuzzy = 0
  96. # Start off assuming Latin-1, so everything decodes without failure,
  97. # until we know the exact encoding
  98. encoding = 'latin-1'
  99. # Parse the catalog
  100. lno = 0
  101. for l in lines:
  102. l = l.decode(encoding)
  103. lno += 1
  104. # If we get a comment line after a msgstr, this is a new entry
  105. if l[0] == '#' and section == STR:
  106. add(msgid, msgstr, fuzzy)
  107. section = None
  108. fuzzy = 0
  109. # Record a fuzzy mark
  110. if l[:2] == '#,' and 'fuzzy' in l:
  111. fuzzy = 1
  112. # Skip comments
  113. if l[0] == '#':
  114. continue
  115. # Now we are in a msgid section, output previous section
  116. if l.startswith('msgid') and not l.startswith('msgid_plural'):
  117. if section == STR:
  118. add(msgid, msgstr, fuzzy)
  119. if not msgid:
  120. # See whether there is an encoding declaration
  121. p = HeaderParser()
  122. charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
  123. if charset:
  124. encoding = charset
  125. section = ID
  126. l = l[5:]
  127. msgid = msgstr = b''
  128. is_plural = False
  129. # This is a message with plural forms
  130. elif l.startswith('msgid_plural'):
  131. if section != ID:
  132. print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
  133. file=sys.stderr)
  134. sys.exit(1)
  135. l = l[12:]
  136. msgid += b'\0' # separator of singular and plural
  137. is_plural = True
  138. # Now we are in a msgstr section
  139. elif l.startswith('msgstr'):
  140. section = STR
  141. if l.startswith('msgstr['):
  142. if not is_plural:
  143. print('plural without msgid_plural on %s:%d' % (infile, lno),
  144. file=sys.stderr)
  145. sys.exit(1)
  146. l = l.split(']', 1)[1]
  147. if msgstr:
  148. msgstr += b'\0' # Separator of the various plural forms
  149. else:
  150. if is_plural:
  151. print('indexed msgstr required for plural on %s:%d' % (infile, lno),
  152. file=sys.stderr)
  153. sys.exit(1)
  154. l = l[6:]
  155. # Skip empty lines
  156. l = l.strip()
  157. if not l:
  158. continue
  159. l = ast.literal_eval(l)
  160. if section == ID:
  161. msgid += l.encode(encoding)
  162. elif section == STR:
  163. msgstr += l.encode(encoding)
  164. else:
  165. print('Syntax error on %s:%d' % (infile, lno), \
  166. 'before:', file=sys.stderr)
  167. print(l, file=sys.stderr)
  168. sys.exit(1)
  169. # Add last entry
  170. if section == STR:
  171. add(msgid, msgstr, fuzzy)
  172. # Compute output
  173. output = generate()
  174. try:
  175. with open(outfile,"wb") as f:
  176. f.write(output)
  177. except IOError as msg:
  178. print(msg, file=sys.stderr)
  179. def main():
  180. try:
  181. opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
  182. ['help', 'version', 'output-file='])
  183. except getopt.error as msg:
  184. usage(1, msg)
  185. outfile = None
  186. # parse options
  187. for opt, arg in opts:
  188. if opt in ('-h', '--help'):
  189. usage(0)
  190. elif opt in ('-V', '--version'):
  191. print("msgfmt.py", __version__)
  192. sys.exit(0)
  193. elif opt in ('-o', '--output-file'):
  194. outfile = arg
  195. # do it
  196. if not args:
  197. print('No input file given', file=sys.stderr)
  198. print("Try `msgfmt --help' for more information.", file=sys.stderr)
  199. return
  200. for filename in args:
  201. make(filename, outfile)
  202. if __name__ == '__main__':
  203. main()