diff options
Diffstat (limited to 'src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py')
-rwxr-xr-x | src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py b/src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py new file mode 100755 index 00000000..effe1661 --- /dev/null +++ b/src/VBox/Devices/EFI/Firmware/BaseTools/Tests/CheckUnicodeSourceFiles.py @@ -0,0 +1,175 @@ +## @file +# Unit tests for AutoGen.UniClassObject +# +# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR> +# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# + +## +# Import Modules +# +import os +import unittest + +import codecs + +import TestTools + +from Common.Misc import PathClass +import AutoGen.UniClassObject as BtUni + +from Common import EdkLogger +EdkLogger.InitializeForUnitTest() + +class Tests(TestTools.BaseToolsTest): + + SampleData = u''' + #langdef en-US "English" + #string STR_A #language en-US "STR_A for en-US" + ''' + + def EncodeToFile(self, encoding, string=None): + if string is None: + string = self.SampleData + if encoding is not None: + data = codecs.encode(string, encoding) + else: + data = string + path = 'input.uni' + self.WriteTmpFile(path, data) + return PathClass(self.GetTmpFilePath(path)) + + def ErrorFailure(self, error, encoding, shouldPass): + msg = error + ' should ' + if shouldPass: + msg += 'not ' + msg += 'be generated for ' + msg += '%s data in a .uni file' % encoding + self.fail(msg) + + def UnicodeErrorFailure(self, encoding, shouldPass): + self.ErrorFailure('UnicodeError', encoding, shouldPass) + + def EdkErrorFailure(self, encoding, shouldPass): + self.ErrorFailure('EdkLogger.FatalError', encoding, shouldPass) + + def CheckFile(self, encoding, shouldPass, string=None): + path = self.EncodeToFile(encoding, string) + try: + BtUni.UniFileClassObject([path]) + if shouldPass: + return + except UnicodeError: + if not shouldPass: + return + else: + self.UnicodeErrorFailure(encoding, shouldPass) + except EdkLogger.FatalError: + if not shouldPass: + return + else: + self.EdkErrorFailure(encoding, shouldPass) + except Exception: + pass + + self.EdkErrorFailure(encoding, shouldPass) + + def testUtf16InUniFile(self): + self.CheckFile('utf_16', shouldPass=True) + + def testSupplementaryPlaneUnicodeCharInUtf16File(self): + # + # Supplementary Plane characters can exist in UTF-16 files, + # but they are not valid UCS-2 characters. + # + # This test makes sure that BaseTools rejects these characters + # if seen in a .uni file. + # + data = u''' + #langdef en-US "English" + #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF" + ''' + + self.CheckFile('utf_16', shouldPass=False, string=data) + + def testSurrogatePairUnicodeCharInUtf16File(self): + # + # Surrogate Pair code points are used in UTF-16 files to + # encode the Supplementary Plane characters. But, a Surrogate + # Pair code point which is not followed by another Surrogate + # Pair code point might be interpreted as a single code point + # with the Surrogate Pair code point. + # + # This test makes sure that BaseTools rejects these characters + # if seen in a .uni file. + # + data = codecs.BOM_UTF16_LE + b'//\x01\xd8 ' + + self.CheckFile(encoding=None, shouldPass=False, string=data) + + def testValidUtf8File(self): + self.CheckFile(encoding='utf_8', shouldPass=True) + + def testValidUtf8FileWithBom(self): + # + # Same test as testValidUtf8File, but add the UTF-8 BOM + # + data = codecs.BOM_UTF8 + codecs.encode(self.SampleData, 'utf_8') + + self.CheckFile(encoding=None, shouldPass=True, string=data) + + def test32bitUnicodeCharInUtf8File(self): + data = u''' + #langdef en-US "English" + #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF" + ''' + + self.CheckFile('utf_16', shouldPass=False, string=data) + + def test32bitUnicodeCharInUtf8File(self): + data = u''' + #langdef en-US "English" + #string STR_A #language en-US "CodePoint (\U00010300) > 0xFFFF" + ''' + + self.CheckFile('utf_8', shouldPass=False, string=data) + + def test32bitUnicodeCharInUtf8Comment(self): + data = u''' + // Even in comments, we reject non-UCS-2 chars: \U00010300 + #langdef en-US "English" + #string STR_A #language en-US "A" + ''' + + self.CheckFile('utf_8', shouldPass=False, string=data) + + def testSurrogatePairUnicodeCharInUtf8File(self): + # + # Surrogate Pair code points are used in UTF-16 files to + # encode the Supplementary Plane characters. In UTF-8, it is + # trivial to encode these code points, but they are not valid + # code points for characters, since they are reserved for the + # UTF-16 Surrogate Pairs. + # + # This test makes sure that BaseTools rejects these characters + # if seen in a .uni file. + # + data = b'\xed\xa0\x81' + + self.CheckFile(encoding=None, shouldPass=False, string=data) + + def testSurrogatePairUnicodeCharInUtf8FileWithBom(self): + # + # Same test as testSurrogatePairUnicodeCharInUtf8File, but add + # the UTF-8 BOM + # + data = codecs.BOM_UTF8 + b'\xed\xa0\x81' + + self.CheckFile(encoding=None, shouldPass=False, string=data) + +TheTestSuite = TestTools.MakeTheTestSuite(locals()) + +if __name__ == '__main__': + allTests = TheTestSuite() + unittest.TextTestRunner().run(allTests) |