SVGHMI: fix py3 consequences on i18n
authorEdouard Tisserant <edouard.tisserant@gmail.com>
Mon, 18 Mar 2024 18:43:41 +0100
changeset 3915 b5017dd5c049
parent 3914 b2aa31f3a0ce
child 3917 43e7fd0cb88e
SVGHMI: fix py3 consequences on i18n
svghmi/i18n.py
svghmi/svghmi.py
--- a/svghmi/i18n.py	Fri Mar 15 09:20:11 2024 +0100
+++ b/svghmi/i18n.py	Mon Mar 18 18:43:41 2024 +0100
@@ -15,12 +15,14 @@
 import ast
 import wx
 import re
+from email.parser import HeaderParser
 
 # to have it for python 2, had to install 
 # https://pypi.org/project/pycountry/18.12.8/
 # python2 -m pip install pycountry==18.12.8 --user
 import pycountry
 from dialogs import MessageBoxOnce
+from POULibrary import UserAddressedException
 
 cmd_parser = re.compile(r'(?:"([^"]+)"\s*|([^\s]+)\s*)?')
 
@@ -67,8 +69,8 @@
 
     for msg in msgs:
         messages.append((
-            "\n".join([line.text for line in msg]),
-            msg.get("label"), msg.get("id")))
+            b"\n".join([line.text.encode() for line in msg]),
+            msg.get("label").encode(), msg.get("id").encode()))
 
     return messages
 
@@ -77,7 +79,7 @@
     w = POTWriter()
     w.ImportMessages(messages)
 
-    with open(fname, 'w') as POT_file:
+    with open(fname, 'wb') as POT_file:
         w.write(POT_file)
 
 def GetPoFiles(dirpath):
@@ -91,9 +93,8 @@
     translations = []
     for translation_name, po_path in GetPoFiles(dirpath):
         r = POReader()
-        with open(po_path, 'r') as PO_file:
-            r.read(PO_file)
-            translations.append((translation_name, r.get_messages()))
+        r.read(po_path)
+        translations.append((translation_name, r.get_messages()))
     return translations
 
 def MatchTranslations(translations, messages, errcallback):
@@ -155,15 +156,17 @@
         msgidel = etree.SubElement(msgsroot, "msgid")
         for msg in msgs:
             msgel = etree.SubElement(msgidel, "msg")
-            for line in msg.split("\n"):
+            for line in msg.split(b"\n"):
                 lineel = etree.SubElement(msgel, "line")
-                lineel.text = escape(line.encode("utf-8")).decode("utf-8")
+                lineel.text = escape(line).decode()
 
     return result
 
-
-
-locpfx = '#:svghmi.svg:'
+# Code below is based on :
+#  cpython/Tools/i18n/pygettext.py
+#  cpython/Tools/i18n/msgfmt.py
+
+locpfx = b'#:svghmi.svg:'
 
 pot_header = '''\
 # SOME DESCRIPTIVE TITLE.
@@ -185,75 +188,62 @@
 '''
 escapes = []
 
-def make_escapes(pass_iso8859):
+def make_escapes():
     global escapes
-    escapes = [chr(i) for i in range(256)]
-    if pass_iso8859:
-        # Allow iso-8859 characters to pass through so that e.g. 'msgid
-        # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
-        # escape any character outside the 32..126 range.
-        mod = 128
-    else:
-        mod = 256
-    for i in range(mod):
-        if not(32 <= i <= 126):
-            escapes[i] = "\\%03o" % i
-    escapes[ord('\\')] = '\\\\'
-    escapes[ord('\t')] = '\\t'
-    escapes[ord('\r')] = '\\r'
-    escapes[ord('\n')] = '\\n'
-    escapes[ord('\"')] = '\\"'
-
-make_escapes(pass_iso8859 = True)
-
-EMPTYSTRING = ''
+    escapes = [b"\%03o" % i for i in range(128)]
+    for i in range(32, 127):
+        escapes[i] = bytes([i])
+    escapes[ord('\\')] = b'\\\\'
+    escapes[ord('\t')] = b'\\t'
+    escapes[ord('\r')] = b'\\r'
+    escapes[ord('\n')] = b'\\n'
+    escapes[ord('\"')] = b'\\"'
+
+make_escapes()
 
 def escape(s):
-    global escapes
-    s = list(s)
-    for i in range(len(s)):
-        s[i] = escapes[ord(s[i])]
-    return EMPTYSTRING.join(s)
+    l = [escapes[c] if c < 128 else bytes([c]) for c in s]
+    return b''.join(l)
+    #return bytes([escapes[c] if c < 128 else c for c in s])
 
 def normalize(s):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style.
-    lines = s.split('\n')
+    lines = s.split(b'\n')
     if len(lines) == 1:
-        s = '"' + escape(s) + '"'
+        s = b'"' + escape(s) + b'"'
     else:
         if not lines[-1]:
             del lines[-1]
-            lines[-1] = lines[-1] + '\n'
+            lines[-1] = lines[-1] + b'\n'
         for i in range(len(lines)):
             lines[i] = escape(lines[i])
-        lineterm = '\\n"\n"'
-        s = '""\n"' + lineterm.join(lines) + '"'
+        lineterm = b'\\n"\n"'
+        s = b'""\n"' + lineterm.join(lines) + b'"'
     return s
 
-
 class POTWriter:
     def __init__(self):
         self.__messages = {}
 
     def ImportMessages(self, msgs):
         for  msg, label, svgid in msgs:
-            self.addentry(msg.encode("utf-8"), label, svgid)
+            self.addentry(msg, label, svgid)
 
     def addentry(self, msg, label, svgid):
         entry = (label, svgid)
         self.__messages.setdefault(msg, set()).add(entry)
 
     def write(self, fp):
-        timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
-        print(pot_header % {'time': timestamp}, file=fp)
+        timestamp = time.strftime('%Y-%m-%d %H:%M%z')
+        header = pot_header % {'time': timestamp}
+        fp.write(header.encode())
         reverse = {}
-        for k, v in list(self.__messages.items()):
+        for k, v in self.__messages.items():
             keys = list(v)
             keys.sort()
             reverse.setdefault(tuple(keys), []).append((k, v))
-        rkeys = list(reverse.keys())
-        rkeys.sort()
+        rkeys = sorted(reverse.keys())
         for rkey in rkeys:
             rentries = reverse[rkey]
             rentries.sort()
@@ -262,17 +252,17 @@
                 v.sort()
                 locline = locpfx
                 for label, svgid in v:
-                    d = {'label': label, 'svgid': svgid}
-                    s = _(' %(label)s:%(svgid)s') % d
+                    d = {b'label': label, b'svgid': svgid}
+                    s = b' %(label)s:%(svgid)s' % d
                     if len(locline) + len(s) <= 78:
                         locline = locline + s
                     else:
-                        print(locline, file=fp)
+                        fp.write(locline)
                         locline = locpfx + s
                 if len(locline) > len(locpfx):
-                    print(locline, file=fp)
-                print('msgid', normalize(k), file=fp)
-                print('msgstr ""\n', file=fp)
+                    fp.write(locline)
+                fp.write(b'msgid '+normalize(k))
+                fp.write(b'msgstr ""\n')
 
 
 class POReader:
@@ -282,27 +272,39 @@
     def get_messages(self):
         return self.__messages
 
-    def add(self, msgid, msgstr, fuzzy):
+    def add(self, ctxt, msgid, msgstr, fuzzy):
         "Add a non-fuzzy translation to the dictionary."
         if not fuzzy and msgstr and msgid:
-            self.__messages[msgid.decode('utf-8')] = msgstr.decode('utf-8')
-
-    def read(self, fp):
+            if ctxt is None:
+                self.__messages[msgid] = msgstr
+            else:
+                self.__messages[b"%b\x04%b" % (ctxt, id)] = str
+
+    def read(self, infile):
         ID = 1
         STR = 2
-
-        lines = fp.readlines()
-        section = None
+        CTXT = 3
+
+
+        with open(infile, 'rb') as f:
+            lines = f.readlines()
+            
+        section = msgctxt = None
         fuzzy = 0
 
+        # Start off assuming Latin-1, so everything decodes without failure,
+        # until we know the exact encoding
+        encoding = 'latin-1'
+
         # Parse the catalog
         lno = 0
         for l in lines:
+            l = l.decode(encoding)
             lno += 1
             # If we get a comment line after a msgstr, this is a new entry
             if l[0] == '#' and section == STR:
-                self.add(msgid, msgstr, fuzzy)
-                section = None
+                self.add(msgctxt, msgid, msgstr, fuzzy)
+                section = msgctxt = None
                 fuzzy = 0
             # Record a fuzzy mark
             if l[:2] == '#,' and 'fuzzy' in l:
@@ -310,56 +312,66 @@
             # Skip comments
             if l[0] == '#':
                 continue
-            # Now we are in a msgid section, output previous section
-            if l.startswith('msgid') and not l.startswith('msgid_plural'):
+            # Now we are in a msgid or msgctxt section, output previous section
+            if l.startswith('msgctxt'):
                 if section == STR:
-                    self.add(msgid, msgstr, fuzzy)
+                    self.add(msgctxt, msgid, msgstr, fuzzy)
+                section = CTXT
+                l = l[7:]
+                msgctxt = b''
+            elif l.startswith('msgid') and not l.startswith('msgid_plural'):
+                if section == STR:
+                    self.add(msgctxt, msgid, msgstr, fuzzy)
+                    if not msgid:
+                        # See whether there is an encoding declaration
+                        p = HeaderParser()
+                        charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
+                        if charset:
+                            encoding = charset
                 section = ID
                 l = l[5:]
-                msgid = msgstr = ''
+                msgid = msgstr = b''
                 is_plural = False
             # This is a message with plural forms
             elif l.startswith('msgid_plural'):
                 if section != ID:
-                    print('msgid_plural not preceded by msgid on %s:%d' %\
-                        (infile, lno), file=sys.stderr)
-                    sys.exit(1)
+                    raise UserAddressedException(
+                        'msgid_plural not preceded by msgid on %s:%d' % (infile, lno))
                 l = l[12:]
-                msgid += '\0' # separator of singular and plural
+                msgid += b'\0' # separator of singular and plural
                 is_plural = True
             # Now we are in a msgstr section
             elif l.startswith('msgstr'):
                 section = STR
                 if l.startswith('msgstr['):
                     if not is_plural:
-                        print('plural without msgid_plural on %s:%d' %\
-                            (infile, lno), file=sys.stderr)
-                        sys.exit(1)
+                        raise UserAddressedException(
+                            'plural without msgid_plural on %s:%d' % (infile, lno))
                     l = l.split(']', 1)[1]
                     if msgstr:
-                        msgstr += '\0' # Separator of the various plural forms
+                        msgstr += b'\0' # Separator of the various plural forms
                 else:
                     if is_plural:
-                        print('indexed msgstr required for plural on  %s:%d' %\
-                            (infile, lno), file=sys.stderr)
-                        sys.exit(1)
+                        raise UserAddressedException(
+                            'indexed msgstr required for plural on  %s:%d' % (infile, lno))
                     l = l[6:]
             # Skip empty lines
             l = l.strip()
             if not l:
                 continue
             l = ast.literal_eval(l)
-            if section == ID:
-                msgid += l
+            if section == CTXT:
+                msgctxt += l.encode(encoding)
+            elif section == ID:
+                msgid += l.encode(encoding)
             elif section == STR:
-                msgstr += l
+                msgstr += l.encode(encoding)
             else:
-                print('Syntax error on %s:%d' % (infile, lno), \
-                      'before:', file=sys.stderr)
-                print(l, file=sys.stderr)
-                sys.exit(1)
+                raise UserAddressedException(
+                    'Syntax error on %s:%d' % (infile, lno) + 'before:\n %s'%l)
         # Add last entry
         if section == STR:
-            self.add(msgid, msgstr, fuzzy)
-
-
+            self.add(msgctxt, msgid, msgstr, fuzzy)
+
+
+
--- a/svghmi/svghmi.py	Fri Mar 15 09:20:11 2024 +0100
+++ b/svghmi/svghmi.py	Mon Mar 18 18:43:41 2024 +0100
@@ -559,7 +559,7 @@
             hmi_tree_root._hash(hasher)
             pofiles = GetPoFiles(self.CTNPath())
             filestocheck = [svgfile] + \
-                           (list(zip(*pofiles)[1]) if pofiles else []) + \
+                           (list(list(zip(*pofiles))[1]) if pofiles else []) + \
                            self.GetFontsFiles()
 
             for filetocheck in filestocheck: