Tests: revert Teseract to less perforant legacy mode, but more repeatable. Compensate with zoom in edit_project and filter noise out of OCR text.
authorEdouard Tisserant <edouard.tisserant@gmail.com>
Mon, 07 Aug 2023 18:17:12 +0200
changeset 3842 3922024076b3
parent 3841 02fa0f8484d7
child 3843 832bcf1b5b60
Tests: revert Teseract to less perforant legacy mode, but more repeatable. Compensate with zoom in edit_project and filter noise out of OCR text.
tests/ide_tests/edit_project.sikuli/edit_project.py
tests/ide_tests/sikuliberemiz.py
--- a/tests/ide_tests/edit_project.sikuli/edit_project.py	Tue Aug 01 23:07:39 2023 +0200
+++ b/tests/ide_tests/edit_project.sikuli/edit_project.py	Mon Aug 07 18:17:12 2023 +0200
@@ -23,6 +23,9 @@
 
     app.type(Key.DOWN * 10, Key.CTRL)
 
+    # Zoom in to allow OCR
+    app.type("+")
+
     app.WaitIdleUI()
 
     app.doubleClick("Hello")
--- a/tests/ide_tests/sikuliberemiz.py	Tue Aug 01 23:07:39 2023 +0200
+++ b/tests/ide_tests/sikuliberemiz.py	Mon Aug 07 18:17:12 2023 +0200
@@ -211,7 +211,11 @@
         """
         self.ocropts = sikuli.OCR.globalOptions()
         self.ocropts.dataPath(tessdata_path)
-        self.ocropts.oem(2)
+        
+        # 0 use legacy Tesseract (not so good, but repeatable)
+        # 1 use RNN Tesseract (better but non-repeatable)
+        # 2 use both
+        self.ocropts.oem(0)
         self.ocropts.smallFont()
 
         self.imgnum = 0
@@ -318,7 +322,7 @@
         for m in matches:
             mText = m.getText().encode('ascii', 'ignore')
             for arg in args:
-                if arg in mText:
+                if arg in mText or arg in mText.translate(None, "\"`'|-. "):
                     if match is None:
                         match = m
                     if mText == arg: