beremiz: comparison svghmi/i18n.py

equal deleted inserted replaced

-:b2aa31f3a0ce
+:b5017dd5c049
 import subprocess
 import time
 import ast
 import wx
 import re
+from email.parser import HeaderParser
 # to have it for python 2, had to install
 # https://pypi.org/project/pycountry/18.12.8/
 # python2 -m pip install pycountry==18.12.8 --user
 import pycountry
 from dialogs import MessageBoxOnce
+from POULibrary import UserAddressedException
 cmd_parser = re.compile(r'(?:"([^"]+)"\s*|([^\s]+)\s*)?')
 def open_pofile(pofile):
 """ Opens PO file with POEdit """
 """ Converts XML tree from 'extract_i18n' templates into a list of tuples """
 messages = []
 for msg in msgs:
 messages.append((
-"\n".join([line.text for line in msg]),
+b"\n".join([line.text.encode() for line in msg]),
-msg.get("label"), msg.get("id")))
+msg.get("label").encode(), msg.get("id").encode()))
 return messages
 def SaveCatalog(fname, messages):
 """ Save messages given as list of tupple (msg,label,id) in POT file """
 w = POTWriter()
 w.ImportMessages(messages)
-with open(fname, 'w') as POT_file:
+with open(fname, 'wb') as POT_file:
 w.write(POT_file)
 def GetPoFiles(dirpath):
 po_files = [fname for fname in os.listdir(dirpath) if fname.endswith(".po")]
 po_files.sort()
 """ Read all PO files from a directory and return a list of (langcode, translation_dict) tuples """
 translations = []
 for translation_name, po_path in GetPoFiles(dirpath):
 r = POReader()
-with open(po_path, 'r') as PO_file:
+r.read(po_path)
-r.read(PO_file)
+translations.append((translation_name, r.get_messages()))
-translations.append((translation_name, r.get_messages()))
 return translations
 def MatchTranslations(translations, messages, errcallback):
 """
 Matches translations against original message catalog,
 msgsroot = etree.SubElement(result, "messages")
 for msgid, msgs in translated_messages:
 msgidel = etree.SubElement(msgsroot, "msgid")
 for msg in msgs:
 msgel = etree.SubElement(msgidel, "msg")
-for line in msg.split("\n"):
+for line in msg.split(b"\n"):
 lineel = etree.SubElement(msgel, "line")
-lineel.text = escape(line.encode("utf-8")).decode("utf-8")
+lineel.text = escape(line).decode()
 return result
+# Code below is based on :
+#  cpython/Tools/i18n/pygettext.py
-locpfx = '#:svghmi.svg:'
+#  cpython/Tools/i18n/msgfmt.py
+locpfx = b'#:svghmi.svg:'
 pot_header = '''\
 # SOME DESCRIPTIVE TITLE.
 # Copyright (C) YEAR ORGANIZATION
 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
 "Generated-By: SVGHMI 1.0\\n"
 '''
 escapes = []
-def make_escapes(pass_iso8859):
+def make_escapes():
 global escapes
-escapes = [chr(i) for i in range(256)]
+escapes = [b"\%03o" % i for i in range(128)]
-if pass_iso8859:
+for i in range(32, 127):
-# Allow iso-8859 characters to pass through so that e.g. 'msgid
+escapes[i] = bytes([i])
-# "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
+escapes[ord('\\')] = b'\\\\'
-# escape any character outside the 32..126 range.
+escapes[ord('\t')] = b'\\t'
-mod = 128
+escapes[ord('\r')] = b'\\r'
-else:
+escapes[ord('\n')] = b'\\n'
-mod = 256
+escapes[ord('\"')] = b'\\"'
-for i in range(mod):
-if not(32 <= i <= 126):
+make_escapes()
-escapes[i] = "\\%03o" % i
-escapes[ord('\\')] = '\\\\'
-escapes[ord('\t')] = '\\t'
-escapes[ord('\r')] = '\\r'
-escapes[ord('\n')] = '\\n'
-escapes[ord('\"')] = '\\"'
-make_escapes(pass_iso8859 = True)
-EMPTYSTRING = ''
 def escape(s):
-global escapes
+l = [escapes[c] if c < 128 else bytes([c]) for c in s]
-s = list(s)
+return b''.join(l)
-for i in range(len(s)):
+#return bytes([escapes[c] if c < 128 else c for c in s])
-s[i] = escapes[ord(s[i])]
-return EMPTYSTRING.join(s)
 def normalize(s):
 # This converts the various Python string types into a format that is
 # appropriate for .po files, namely much closer to C style.
-lines = s.split('\n')
+lines = s.split(b'\n')
 if len(lines) == 1:
-s = '"' + escape(s) + '"'
+s = b'"' + escape(s) + b'"'
 else:
 if not lines[-1]:
 del lines[-1]
-lines[-1] = lines[-1] + '\n'
+lines[-1] = lines[-1] + b'\n'
 for i in range(len(lines)):
 lines[i] = escape(lines[i])
-lineterm = '\\n"\n"'
+lineterm = b'\\n"\n"'
-s = '""\n"' + lineterm.join(lines) + '"'
+s = b'""\n"' + lineterm.join(lines) + b'"'
 return s
 class POTWriter:
 def __init__(self):
 self.__messages = {}
 def ImportMessages(self, msgs):
 for  msg, label, svgid in msgs:
-self.addentry(msg.encode("utf-8"), label, svgid)
+self.addentry(msg, label, svgid)
 def addentry(self, msg, label, svgid):
 entry = (label, svgid)
 self.__messages.setdefault(msg, set()).add(entry)
 def write(self, fp):
-timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
+timestamp = time.strftime('%Y-%m-%d %H:%M%z')
-print(pot_header % {'time': timestamp}, file=fp)
+header = pot_header % {'time': timestamp}
+fp.write(header.encode())
 reverse = {}
-for k, v in list(self.__messages.items()):
+for k, v in self.__messages.items():
 keys = list(v)
 keys.sort()
 reverse.setdefault(tuple(keys), []).append((k, v))
-rkeys = list(reverse.keys())
+rkeys = sorted(reverse.keys())
-rkeys.sort()
 for rkey in rkeys:
 rentries = reverse[rkey]
 rentries.sort()
 for k, v in rentries:
 v = list(v)
 v.sort()
 locline = locpfx
 for label, svgid in v:
-d = {'label': label, 'svgid': svgid}
+d = {b'label': label, b'svgid': svgid}
-s = _(' %(label)s:%(svgid)s') % d
+s = b' %(label)s:%(svgid)s' % d
 if len(locline) + len(s) <= 78:
 locline = locline + s
 else:
-print(locline, file=fp)
+fp.write(locline)
 locline = locpfx + s
 if len(locline) > len(locpfx):
-print(locline, file=fp)
+fp.write(locline)
-print('msgid', normalize(k), file=fp)
+fp.write(b'msgid '+normalize(k))
-print('msgstr ""\n', file=fp)
+fp.write(b'msgstr ""\n')
 class POReader:
 def __init__(self):
 self.__messages = {}
 def get_messages(self):
 return self.__messages
-def add(self, msgid, msgstr, fuzzy):
+def add(self, ctxt, msgid, msgstr, fuzzy):
 "Add a non-fuzzy translation to the dictionary."
 if not fuzzy and msgstr and msgid:
-self.__messages[msgid.decode('utf-8')] = msgstr.decode('utf-8')
+if ctxt is None:
+self.__messages[msgid] = msgstr
-def read(self, fp):
+else:
+self.__messages[b"%b\x04%b" % (ctxt, id)] = str
+def read(self, infile):
 ID = 1
 STR = 2
+CTXT = 3
-lines = fp.readlines()
-section = None
+with open(infile, 'rb') as f:
+lines = f.readlines()
+section = msgctxt = None
 fuzzy = 0
+# Start off assuming Latin-1, so everything decodes without failure,
+# until we know the exact encoding
+encoding = 'latin-1'
 # Parse the catalog
 lno = 0
 for l in lines:
+l = l.decode(encoding)
 lno += 1
 # If we get a comment line after a msgstr, this is a new entry
 if l[0] == '#' and section == STR:
-self.add(msgid, msgstr, fuzzy)
+self.add(msgctxt, msgid, msgstr, fuzzy)
-section = None
+section = msgctxt = None
 fuzzy = 0
 # Record a fuzzy mark
 if l[:2] == '#,' and 'fuzzy' in l:
 fuzzy = 1
 # Skip comments
 if l[0] == '#':
 continue
-# Now we are in a msgid section, output previous section
+# Now we are in a msgid or msgctxt section, output previous section
-if l.startswith('msgid') and not l.startswith('msgid_plural'):
+if l.startswith('msgctxt'):
 if section == STR:
-self.add(msgid, msgstr, fuzzy)
+self.add(msgctxt, msgid, msgstr, fuzzy)
+section = CTXT
+l = l[7:]
+msgctxt = b''
+elif l.startswith('msgid') and not l.startswith('msgid_plural'):
+if section == STR:
+self.add(msgctxt, msgid, msgstr, fuzzy)
+if not msgid:
+# See whether there is an encoding declaration
+p = HeaderParser()
+charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
+if charset:
+encoding = charset
 section = ID
 l = l[5:]
-msgid = msgstr = ''
+msgid = msgstr = b''
 is_plural = False
 # This is a message with plural forms
 elif l.startswith('msgid_plural'):
 if section != ID:
-print('msgid_plural not preceded by msgid on %s:%d' %\
+raise UserAddressedException(
-(infile, lno), file=sys.stderr)
+'msgid_plural not preceded by msgid on %s:%d' % (infile, lno))
-sys.exit(1)
 l = l[12:]
-msgid += '\0' # separator of singular and plural
+msgid += b'\0' # separator of singular and plural
 is_plural = True
 # Now we are in a msgstr section
 elif l.startswith('msgstr'):
 section = STR
 if l.startswith('msgstr['):
 if not is_plural:
-print('plural without msgid_plural on %s:%d' %\
+raise UserAddressedException(
-(infile, lno), file=sys.stderr)
+'plural without msgid_plural on %s:%d' % (infile, lno))
-sys.exit(1)
 l = l.split(']', 1)[1]
 if msgstr:
-msgstr += '\0' # Separator of the various plural forms
+msgstr += b'\0' # Separator of the various plural forms
 else:
 if is_plural:
-print('indexed msgstr required for plural on  %s:%d' %\
+raise UserAddressedException(
-(infile, lno), file=sys.stderr)
+'indexed msgstr required for plural on  %s:%d' % (infile, lno))
-sys.exit(1)
 l = l[6:]
 # Skip empty lines
 l = l.strip()
 if not l:
 continue
 l = ast.literal_eval(l)
-if section == ID:
+if section == CTXT:
-msgid += l
+msgctxt += l.encode(encoding)
+elif section == ID:
+msgid += l.encode(encoding)
 elif section == STR:
-msgstr += l
+msgstr += l.encode(encoding)
 else:
-print('Syntax error on %s:%d' % (infile, lno), \
+raise UserAddressedException(
-'before:', file=sys.stderr)
+'Syntax error on %s:%d' % (infile, lno) + 'before:\n %s'%l)
-print(l, file=sys.stderr)
-sys.exit(1)
 # Add last entry
 if section == STR:
-self.add(msgid, msgstr, fuzzy)
+self.add(msgctxt, msgid, msgstr, fuzzy)

changeset 3915	b5017dd5c049
parent 3750	f62625418bff
child 3918	9f0ef23569cb