| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238 |
- #! /usr/bin/env python3
- # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
- """Generate binary message catalog from textual translation description.
- This program converts a textual Uniforum-style message catalog (.po file) into
- a binary GNU catalog (.mo file). This is essentially the same function as the
- GNU msgfmt program, however, it is a simpler implementation.
- Usage: msgfmt.py [OPTIONS] filename.po
- Options:
- -o file
- --output-file=file
- Specify the output file to write to. If omitted, output will go to a
- file named filename.mo (based off the input file name).
- -h
- --help
- Print this message and exit.
- -V
- --version
- Display version information and exit.
- """
- import os
- import sys
- import ast
- import getopt
- import struct
- import array
- from email.parser import HeaderParser
- __version__ = "1.1"
- MESSAGES = {}
- def usage(code, msg=''):
- print(__doc__, file=sys.stderr)
- if msg:
- print(msg, file=sys.stderr)
- sys.exit(code)
- def add(id, str, fuzzy):
- "Add a non-fuzzy translation to the dictionary."
- global MESSAGES
- if not fuzzy and str:
- MESSAGES[id] = str
- def generate():
- "Return the generated output."
- global MESSAGES
- # the keys are sorted in the .mo file
- keys = sorted(MESSAGES.keys())
- offsets = []
- ids = strs = b''
- for id in keys:
- # For each string, we need size and file offset. Each string is NUL
- # terminated; the NUL does not count into the size.
- offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
- ids += id + b'\0'
- strs += MESSAGES[id] + b'\0'
- output = ''
- # The header is 7 32-bit unsigned integers. We don't use hash tables, so
- # the keys start right after the index tables.
- # translated string.
- keystart = 7*4+16*len(keys)
- # and the values start after the keys
- valuestart = keystart + len(ids)
- koffsets = []
- voffsets = []
- # The string table first has the list of keys, then the list of values.
- # Each entry has first the size of the string, then the file offset.
- for o1, l1, o2, l2 in offsets:
- koffsets += [l1, o1+keystart]
- voffsets += [l2, o2+valuestart]
- offsets = koffsets + voffsets
- output = struct.pack("Iiiiiii",
- 0x950412de, # Magic
- 0, # Version
- len(keys), # # of entries
- 7*4, # start of key index
- 7*4+len(keys)*8, # start of value index
- 0, 0) # size and offset of hash table
- output += array.array("i", offsets).tobytes()
- output += ids
- output += strs
- return output
- def make(filename, outfile):
- ID = 1
- STR = 2
- # Compute .mo name from .po name and arguments
- if filename.endswith('.po'):
- infile = filename
- else:
- infile = filename + '.po'
- if outfile is None:
- outfile = os.path.splitext(infile)[0] + '.mo'
- try:
- with open(infile, 'rb') as f:
- lines = f.readlines()
- except IOError as msg:
- print(msg, file=sys.stderr)
- sys.exit(1)
- section = None
- fuzzy = 0
- # Start off assuming Latin-1, so everything decodes without failure,
- # until we know the exact encoding
- encoding = 'latin-1'
- # Parse the catalog
- lno = 0
- for l in lines:
- l = l.decode(encoding)
- lno += 1
- # If we get a comment line after a msgstr, this is a new entry
- if l[0] == '#' and section == STR:
- add(msgid, msgstr, fuzzy)
- section = None
- fuzzy = 0
- # Record a fuzzy mark
- if l[:2] == '#,' and 'fuzzy' in l:
- fuzzy = 1
- # Skip comments
- if l[0] == '#':
- continue
- # Now we are in a msgid section, output previous section
- if l.startswith('msgid') and not l.startswith('msgid_plural'):
- if section == STR:
- add(msgid, msgstr, fuzzy)
- if not msgid:
- # See whether there is an encoding declaration
- p = HeaderParser()
- charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
- if charset:
- encoding = charset
- section = ID
- l = l[5:]
- msgid = msgstr = b''
- is_plural = False
- # This is a message with plural forms
- elif l.startswith('msgid_plural'):
- if section != ID:
- print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
- file=sys.stderr)
- sys.exit(1)
- l = l[12:]
- msgid += b'\0' # separator of singular and plural
- is_plural = True
- # Now we are in a msgstr section
- elif l.startswith('msgstr'):
- section = STR
- if l.startswith('msgstr['):
- if not is_plural:
- print('plural without msgid_plural on %s:%d' % (infile, lno),
- file=sys.stderr)
- sys.exit(1)
- l = l.split(']', 1)[1]
- if msgstr:
- msgstr += b'\0' # Separator of the various plural forms
- else:
- if is_plural:
- print('indexed msgstr required for plural on %s:%d' % (infile, lno),
- file=sys.stderr)
- sys.exit(1)
- l = l[6:]
- # Skip empty lines
- l = l.strip()
- if not l:
- continue
- l = ast.literal_eval(l)
- if section == ID:
- msgid += l.encode(encoding)
- elif section == STR:
- msgstr += l.encode(encoding)
- else:
- print('Syntax error on %s:%d' % (infile, lno), \
- 'before:', file=sys.stderr)
- print(l, file=sys.stderr)
- sys.exit(1)
- # Add last entry
- if section == STR:
- add(msgid, msgstr, fuzzy)
- # Compute output
- output = generate()
- try:
- with open(outfile,"wb") as f:
- f.write(output)
- except IOError as msg:
- print(msg, file=sys.stderr)
- def main():
- try:
- opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
- ['help', 'version', 'output-file='])
- except getopt.error as msg:
- usage(1, msg)
- outfile = None
- # parse options
- for opt, arg in opts:
- if opt in ('-h', '--help'):
- usage(0)
- elif opt in ('-V', '--version'):
- print("msgfmt.py", __version__)
- sys.exit(0)
- elif opt in ('-o', '--output-file'):
- outfile = arg
- # do it
- if not args:
- print('No input file given', file=sys.stderr)
- print("Try `msgfmt --help' for more information.", file=sys.stderr)
- return
- for filename in args:
- make(filename, outfile)
- if __name__ == '__main__':
- main()
|