summaryrefslogtreecommitdiffstats
path: root/src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py')
-rwxr-xr-xsrc/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py175
1 files changed, 175 insertions, 0 deletions
diff --git a/src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py b/src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py
new file mode 100755
index 00000000..effe1661
--- /dev/null
+++ b/src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py
@@ -0,0 +1,175 @@
+## @file
+# Unit tests for AutoGen.UniClassObject
+#
+# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
+
+##
+# Import Modules
+#
+import os
+import unittest
+
+import codecs
+
+import TestTools
+
+from Common.Misc import PathClass
+import AutoGen.UniClassObject as BtUni
+
+from Common import EdkLogger
+EdkLogger.InitializeForUnitTest()
+
+class Tests(TestTools.BaseToolsTest):
+
+ SampleData = u'''
+ #langdef en-US "English"
+ #string STR_A #language en-US "STR_A for en-US"
+ '''
+
+ def EncodeToFile(self, encoding, string=None):
+ if string is None:
+ string = self.SampleData
+ if encoding is not None:
+ data = codecs.encode(string, encoding)
+ else:
+ data = string
+ path = 'input.uni'
+ self.WriteTmpFile(path, data)
+ return PathClass(self.GetTmpFilePath(path))
+
+ def ErrorFailure(self, error, encoding, shouldPass):
+ msg = error + ' should '
+ if shouldPass:
+ msg += 'not '
+ msg += 'be generated for '
+ msg += '%s data in a .uni file' % encoding
+ self.fail(msg)
+
+ def UnicodeErrorFailure(self, encoding, shouldPass):
+ self.ErrorFailure('UnicodeError', encoding, shouldPass)
+
+ def EdkErrorFailure(self, encoding, shouldPass):
+ self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass)
+
+ def CheckFile(self, encoding, shouldPass, string=None):
+ path = self.EncodeToFile(encoding, string)
+ try:
+ BtUni.UniFileClassObject([path])
+ if shouldPass:
+ return
+ except UnicodeError:
+ if not shouldPass:
+ return
+ else:
+ self.UnicodeErrorFailure(encoding, shouldPass)
+ except EdkLogger.FatalError:
+ if not shouldPass:
+ return
+ else:
+ self.EdkErrorFailure(encoding, shouldPass)
+ except Exception:
+ pass
+
+ self.EdkErrorFailure(encoding, shouldPass)
+
+ def testUtf16InUniFile(self):
+ self.CheckFile('utf_16', shouldPass=True)
+
+ def testSupplementaryPlaneUnicodeCharInUtf16File(self):
+ #
+ # Supplementary Plane characters can exist in UTF-16 files,
+ # but they are not valid UCS-2 characters.
+ #
+ # This test makes sure that BaseTools rejects these characters
+ # if seen in a .uni file.
+ #
+ data = u'''
+ #langdef en-US "English"
+ #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
+ '''
+
+ self.CheckFile('utf_16', shouldPass=False, string=data)
+
+ def testSurrogatePairUnicodeCharInUtf16File(self):
+ #
+ # Surrogate Pair code points are used in UTF-16 files to
+ # encode the Supplementary Plane characters. But, a Surrogate
+ # Pair code point which is not followed by another Surrogate
+ # Pair code point might be interpreted as a single code point
+ # with the Surrogate Pair code point.
+ #
+ # This test makes sure that BaseTools rejects these characters
+ # if seen in a .uni file.
+ #
+ data = codecs.BOM_UTF16_LE + b'//\x01\xd8 '
+
+ self.CheckFile(encoding=None, shouldPass=False, string=data)
+
+ def testValidUtf8File(self):
+ self.CheckFile(encoding='utf_8', shouldPass=True)
+
+ def testValidUtf8FileWithBom(self):
+ #
+ # Same test as testValidUtf8File, but add the UTF-8 BOM
+ #
+ data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8')
+
+ self.CheckFile(encoding=None, shouldPass=True, string=data)
+
+ def test32bitUnicodeCharInUtf8File(self):
+ data = u'''
+ #langdef en-US "English"
+ #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
+ '''
+
+ self.CheckFile('utf_16', shouldPass=False, string=data)
+
+ def test32bitUnicodeCharInUtf8File(self):
+ data = u'''
+ #langdef en-US "English"
+ #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF"
+ '''
+
+ self.CheckFile('utf_8', shouldPass=False, string=data)
+
+ def test32bitUnicodeCharInUtf8Comment(self):
+ data = u'''
+ // Even in comments, we reject non-UCS-2 chars: \U00010300
+ #langdef en-US "English"
+ #string STR_A #language en-US "A"
+ '''
+
+ self.CheckFile('utf_8', shouldPass=False, string=data)
+
+ def testSurrogatePairUnicodeCharInUtf8File(self):
+ #
+ # Surrogate Pair code points are used in UTF-16 files to
+ # encode the Supplementary Plane characters. In UTF-8, it is
+ # trivial to encode these code points, but they are not valid
+ # code points for characters, since they are reserved for the
+ # UTF-16 Surrogate Pairs.
+ #
+ # This test makes sure that BaseTools rejects these characters
+ # if seen in a .uni file.
+ #
+ data = b'\xed\xa0\x81'
+
+ self.CheckFile(encoding=None, shouldPass=False, string=data)
+
+ def testSurrogatePairUnicodeCharInUtf8FileWithBom(self):
+ #
+ # Same test as testSurrogatePairUnicodeCharInUtf8File, but add
+ # the UTF-8 BOM
+ #
+ data = codecs.BOM_UTF8 + b'\xed\xa0\x81'
+
+ self.CheckFile(encoding=None, shouldPass=False, string=data)
+
+TheTestSuite = TestTools.MakeTheTestSuite(locals())
+
+if __name__ == '__main__':
+ allTests = TheTestSuite()
+ unittest.TextTestRunner().run(allTests)