diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 01:47:29 +0000 |
commit | 0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch) | |
tree | a31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /intl/icu/source/extra | |
parent | Initial commit. (diff) | |
download | firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip |
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
50 files changed, 5224 insertions, 0 deletions
diff --git a/intl/icu/source/extra/Makefile.in b/intl/icu/source/extra/Makefile.in new file mode 100644 index 0000000000..2e48dba73d --- /dev/null +++ b/intl/icu/source/extra/Makefile.in @@ -0,0 +1,81 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +#****************************************************************************** +# +# Copyright (C) 1999-2011, International Business Machines +# Corporation and others. All Rights Reserved. +# +#****************************************************************************** +## Makefile.in for ICU extras +## Stephen F. Booth + +## Install directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +top_builddir = .. + +include $(top_builddir)/icudefs.mk + +## Build directory information +subdir = extra + +## Files to remove for 'make clean' +CLEANFILES = *~ + +SUBDIRS = scrptrun uconv + +## List of phony targets +.PHONY : all all-local all-recursive install install-local \ +install-recursive clean clean-local clean-recursive distclean \ +distclean-local distclean-recursive dist dist-recursive dist-local \ +check check-recursive check-local check-exhaustive + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-recursive all-local +install: install-recursive install-local +clean: clean-recursive clean-local +distclean : distclean-recursive distclean-local +dist: dist-recursive dist-local +check: all check-recursive check-local + +check-exhaustive: check + +## Recursive targets +all-recursive install-recursive clean-recursive distclean-recursive dist-recursive check-recursive: + @dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "$(MAKE)[$(MAKELEVEL)]: Making \`$$target' in \`$$subdir'"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-local"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $$local_target) || exit; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) "$$target-local" || exit; \ + fi + +all-local: + +install-local: + +dist-local: + +clean-local: + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) + +check-local: + +distclean-local: clean-local + $(RMV) Makefile + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status diff --git a/intl/icu/source/extra/scrptrun/Makefile.in b/intl/icu/source/extra/scrptrun/Makefile.in new file mode 100644 index 0000000000..f3f89431df --- /dev/null +++ b/intl/icu/source/extra/scrptrun/Makefile.in @@ -0,0 +1,87 @@ +## Copyright (C) 2016 and later: Unicode, Inc. and others. +## License & terms of use: http://www.unicode.org/copyright.html +## Makefile.in for ICU - extra/scrptrun +## Copyright (c) 2001-2011, International Business Machines Corporation and +## others. All Rights Reserved. + +## Source directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +top_builddir = ../.. + +include $(top_builddir)/icudefs.mk + +## Build directory information +subdir = extra/scrptrun + +## Extra files to remove for 'make clean' +CLEANFILES = *~ $(DEPS) + +## Target information +TARGET = srtest$(EXEEXT) + +DEFS = @DEFS@ +CPPFLAGS = @CPPFLAGS@ -I$(top_srcdir)/common -I$(top_srcdir) +CFLAGS = @CFLAGS@ +CXXFLAGS = @CXXFLAGS@ +ENABLE_RPATH = @ENABLE_RPATH@ +ifeq ($(ENABLE_RPATH),YES) +RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir) +endif +LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS) +LIBS = $(LIBICUUC) $(LIB_M) + +SOURCES = $(shell cat $(srcdir)/sources.txt) +OBJECTS = $(SOURCES:.cpp=.o) + +DEPS = $(OBJECTS:.o=.d) + +## List of phony targets +.PHONY : all all-local install install-local clean clean-local \ +distclean distclean-local dist dist-local check check-local + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-local +install: install-local +clean: clean-local +distclean : distclean-local +dist: dist-local +check: all check-local + +all-local: $(TARGET) + +install-local: + +dist-local: + +clean-local: + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) + $(RMV) $(OBJECTS) $(TARGET) + +distclean-local: clean-local + $(RMV) Makefile + +check-local: all-local + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +$(TARGET) : $(OBJECTS) + $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS) + $(POST_BUILD_STEP) + +invoke: + ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION) + +ifeq (,$(MAKECMDGOALS)) +-include $(DEPS) +else +ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),) +-include $(DEPS) +endif +endif diff --git a/intl/icu/source/extra/scrptrun/readme.html b/intl/icu/source/extra/scrptrun/readme.html new file mode 100644 index 0000000000..9968a5e85a --- /dev/null +++ b/intl/icu/source/extra/scrptrun/readme.html @@ -0,0 +1,69 @@ +<!doctype html public "-//w3c//dtd html 4.0 transitional//en"> +<html> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> + <meta name="Copyright" content="Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html"> + <!-- meta name="Copyright" content="Copyright (c) 2002, International Business Machines Corporation and others. All Rights Reserved." --> + <meta name="Author" content="Eric Mader"> + <meta name="GENERATOR" content="Mozilla/4.72 [en] (Windows NT 5.0; U) [Netscape]"> + <title>ScriptRun readme</title> +</head> +<body> + +<h2> +What is scrptrun and srtest?</h2> +The ICU LayoutEngine must be called with text in a single script. scrptrun.h +and scrptrun.cpp implement the ScriptRun class, which can be used to find +runs of text that is in a single script. It uses a basic iteration interface. +<p>srtest is a little program that tests ScriptRun. You can use it as an +example of how to use ScriptRun. Here's what the output should look like: +<blockquote><font face="Courier New,Courier"><font size=-1>Script 'DEVANAGARI' +from 0 to 9.</font></font> +<br><font face="Courier New,Courier"><font size=-1>Script 'ARABIC' from +9 to 17.</font></font> +<br><font face="Courier New,Courier"><font size=-1>Script 'CYRILLIC' from +17 to 25.</font></font> +<br><font face="Courier New,Courier"><font size=-1>Script 'LATIN' from +25 to 33.</font></font> +<br><font face="Courier New,Courier"><font size=-1>Script 'HAN' from 33 +to 35.</font></font> +<br><font face="Courier New,Courier"><font size=-1>Script 'HIRAGANA' from +35 to 41.</font></font> +<br><font face="Courier New,Courier"><font size=-1>Script 'KATAKANA' from +41 to 45.</font></font> +<br><font face="Courier New,Courier"><font size=-1>Script 'DESERET' from +45 to 53.</font></font></blockquote> + +<h2> +How do I build scrptrun and srtest?</h2> +To use the ScriptRun class in a Windows application, just include scrptrun.h +and scrptrun.cpp right out of the <icu>\source\extra\scrptrun directory +into your project. You'll also need to add the <icu>\source]extra\scrptrun +directory to the "Additional include directories" section of the "Preprocessor" +category on the "C/C++" tab in the project settings. +<p>On UNIX systems the simplest thing to do is to just copy scrptrun.h +and scrptrun.cpp into your source directory. If you want to use them from +<icu>/source/extra/scrpturn, it's a bit trickier: the default dependency +rules don't work on source files in a different directory. You need to +add separate dependency rules for scrptrun.o and scrptrun.d. See <icu>/source/samples/layout/Makefile.in +for an example of how to do this. You'll also have to add -I$(top_srcdir)/extra/scrptrun +to your compiler flags so that the compiler can find scrptrun.h. If your +application has to build on multiple UNIX platforms, it might be difficult +to wirte dependency rules that will work correctly on all platforms. In +that case, you're probably better off copying the scrpturn files to your +source directory. +<p>Building srtest is easy, on Windows build the srtest workspace in <icu>\source\extra\scrptrun. +On UNIX, connect to <top-build-dir>/extra/scrptrun and do "make all" +<h2> +Notes</h2> + +<ul> +<li> +ScriptRun is based on <a href="http://www.unicode.org/unicode/reports/tr24/">Unicode +Technical Report #24</a> but the implementation is not complete. In particular +the support for paired punctuation is only a prototype. A complete implementation +will be added to ICU in the future.</li> +</ul> + +</body> +</html> diff --git a/intl/icu/source/extra/scrptrun/scrptrun.cpp b/intl/icu/source/extra/scrptrun/scrptrun.cpp new file mode 100644 index 0000000000..4fd8de1b85 --- /dev/null +++ b/intl/icu/source/extra/scrptrun/scrptrun.cpp @@ -0,0 +1,206 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 1999-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * file name: scrptrun.cpp + * + * created on: 10/17/2001 + * created by: Eric R. Mader + */ + +#include "unicode/utypes.h" +#include "unicode/uscript.h" + +#include "cmemory.h" +#include "scrptrun.h" + +U_NAMESPACE_BEGIN + +const char ScriptRun::fgClassID=0; + +UChar32 ScriptRun::pairedChars[] = { + 0x0028, 0x0029, // ascii paired punctuation + 0x003c, 0x003e, + 0x005b, 0x005d, + 0x007b, 0x007d, + 0x00ab, 0x00bb, // guillemets + 0x2018, 0x2019, // general punctuation + 0x201c, 0x201d, + 0x2039, 0x203a, + 0x3008, 0x3009, // chinese paired punctuation + 0x300a, 0x300b, + 0x300c, 0x300d, + 0x300e, 0x300f, + 0x3010, 0x3011, + 0x3014, 0x3015, + 0x3016, 0x3017, + 0x3018, 0x3019, + 0x301a, 0x301b +}; + +const int32_t ScriptRun::pairedCharCount = UPRV_LENGTHOF(pairedChars); +const int32_t ScriptRun::pairedCharPower = 1 << highBit(pairedCharCount); +const int32_t ScriptRun::pairedCharExtra = pairedCharCount - pairedCharPower; + +int8_t ScriptRun::highBit(int32_t value) +{ + if (value <= 0) { + return -32; + } + + int8_t bit = 0; + + if (value >= 1 << 16) { + value >>= 16; + bit += 16; + } + + if (value >= 1 << 8) { + value >>= 8; + bit += 8; + } + + if (value >= 1 << 4) { + value >>= 4; + bit += 4; + } + + if (value >= 1 << 2) { + value >>= 2; + bit += 2; + } + + if (value >= 1 << 1) { + value >>= 1; + bit += 1; + } + + return bit; +} + +int32_t ScriptRun::getPairIndex(UChar32 ch) +{ + int32_t probe = pairedCharPower; + int32_t index = 0; + + if (ch >= pairedChars[pairedCharExtra]) { + index = pairedCharExtra; + } + + while (probe > (1 << 0)) { + probe >>= 1; + + if (ch >= pairedChars[index + probe]) { + index += probe; + } + } + + if (pairedChars[index] != ch) { + index = -1; + } + + return index; +} + +UBool ScriptRun::sameScript(int32_t scriptOne, int32_t scriptTwo) +{ + return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo; +} + +UBool ScriptRun::next() +{ + int32_t startSP = parenSP; // used to find the first new open character + UErrorCode error = U_ZERO_ERROR; + + // if we've fallen off the end of the text, we're done + if (scriptEnd >= charLimit) { + return false; + } + + scriptCode = USCRIPT_COMMON; + + for (scriptStart = scriptEnd; scriptEnd < charLimit; scriptEnd += 1) { + char16_t high = charArray[scriptEnd]; + UChar32 ch = high; + + // if the character is a high surrogate and it's not the last one + // in the text, see if it's followed by a low surrogate + if (high >= 0xD800 && high <= 0xDBFF && scriptEnd < charLimit - 1) + { + char16_t low = charArray[scriptEnd + 1]; + + // if it is followed by a low surrogate, + // consume it and form the full character + if (low >= 0xDC00 && low <= 0xDFFF) { + ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000; + scriptEnd += 1; + } + } + + UScriptCode sc = uscript_getScript(ch, &error); + int32_t pairIndex = getPairIndex(ch); + + // Paired character handling: + // + // if it's an open character, push it onto the stack. + // if it's a close character, find the matching open on the + // stack, and use that script code. Any non-matching open + // characters above it on the stack will be poped. + if (pairIndex >= 0) { + if ((pairIndex & 1) == 0) { + parenStack[++parenSP].pairIndex = pairIndex; + parenStack[parenSP].scriptCode = scriptCode; + } else if (parenSP >= 0) { + int32_t pi = pairIndex & ~1; + + while (parenSP >= 0 && parenStack[parenSP].pairIndex != pi) { + parenSP -= 1; + } + + if (parenSP < startSP) { + startSP = parenSP; + } + + if (parenSP >= 0) { + sc = parenStack[parenSP].scriptCode; + } + } + } + + if (sameScript(scriptCode, sc)) { + if (scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { + scriptCode = sc; + + // now that we have a final script code, fix any open + // characters we pushed before we knew the script code. + while (startSP < parenSP) { + parenStack[++startSP].scriptCode = scriptCode; + } + } + + // if this character is a close paired character, + // pop it from the stack + if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) { + parenSP -= 1; + startSP -= 1; + } + } else { + // if the run broke on a surrogate pair, + // end it before the high surrogate + if (ch >= 0x10000) { + scriptEnd -= 1; + } + + break; + } + } + + return true; +} + +U_NAMESPACE_END diff --git a/intl/icu/source/extra/scrptrun/scrptrun.h b/intl/icu/source/extra/scrptrun/scrptrun.h new file mode 100644 index 0000000000..cc07746641 --- /dev/null +++ b/intl/icu/source/extra/scrptrun/scrptrun.h @@ -0,0 +1,159 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 1999-2003, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * file name: scrptrun.h + * + * created on: 10/17/2001 + * created by: Eric R. Mader + */ + +#ifndef __SCRPTRUN_H +#define __SCRPTRUN_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/uscript.h" + +U_NAMESPACE_BEGIN + +struct ScriptRecord +{ + UChar32 startChar; + UChar32 endChar; + UScriptCode scriptCode; +}; + +struct ParenStackEntry +{ + int32_t pairIndex; + UScriptCode scriptCode; +}; + +class ScriptRun : public UObject { +public: + ScriptRun(); + + ScriptRun(const char16_t chars[], int32_t length); + + ScriptRun(const char16_t chars[], int32_t start, int32_t length); + + void reset(); + + void reset(int32_t start, int32_t count); + + void reset(const char16_t chars[], int32_t start, int32_t length); + + int32_t getScriptStart(); + + int32_t getScriptEnd(); + + UScriptCode getScriptCode(); + + UBool next(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual inline UClassID getDynamicClassID() const override { return getStaticClassID(); } + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } + +private: + + static UBool sameScript(int32_t scriptOne, int32_t scriptTwo); + + int32_t charStart; + int32_t charLimit; + const char16_t *charArray; + + int32_t scriptStart; + int32_t scriptEnd; + UScriptCode scriptCode; + + ParenStackEntry parenStack[128]; + int32_t parenSP; + + static int8_t highBit(int32_t value); + static int32_t getPairIndex(UChar32 ch); + + static UChar32 pairedChars[]; + static const int32_t pairedCharCount; + static const int32_t pairedCharPower; + static const int32_t pairedCharExtra; + + /** + * The address of this static class variable serves as this class's ID + * for ICU "poor man's RTTI". + */ + static const char fgClassID; +}; + +inline ScriptRun::ScriptRun() +{ + reset(nullptr, 0, 0); +} + +inline ScriptRun::ScriptRun(const char16_t chars[], int32_t length) +{ + reset(chars, 0, length); +} + +inline ScriptRun::ScriptRun(const char16_t chars[], int32_t start, int32_t length) +{ + reset(chars, start, length); +} + +inline int32_t ScriptRun::getScriptStart() +{ + return scriptStart; +} + +inline int32_t ScriptRun::getScriptEnd() +{ + return scriptEnd; +} + +inline UScriptCode ScriptRun::getScriptCode() +{ + return scriptCode; +} + +inline void ScriptRun::reset() +{ + scriptStart = charStart; + scriptEnd = charStart; + scriptCode = USCRIPT_INVALID_CODE; + parenSP = -1; +} + +inline void ScriptRun::reset(int32_t start, int32_t length) +{ + charStart = start; + charLimit = start + length; + + reset(); +} + +inline void ScriptRun::reset(const char16_t chars[], int32_t start, int32_t length) +{ + charArray = chars; + + reset(start, length); +} + +U_NAMESPACE_END + +#endif diff --git a/intl/icu/source/extra/scrptrun/sources.txt b/intl/icu/source/extra/scrptrun/sources.txt new file mode 100644 index 0000000000..a4b20b7130 --- /dev/null +++ b/intl/icu/source/extra/scrptrun/sources.txt @@ -0,0 +1,2 @@ +scrptrun.cpp +srtest.cpp diff --git a/intl/icu/source/extra/scrptrun/srtest.cpp b/intl/icu/source/extra/scrptrun/srtest.cpp new file mode 100644 index 0000000000..63750960cd --- /dev/null +++ b/intl/icu/source/extra/scrptrun/srtest.cpp @@ -0,0 +1,42 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + * %W% %E% + * + * (C) Copyright IBM Corp. 2001-2016 - All Rights Reserved + * + */ + +#include "unicode/utypes.h" +#include "unicode/uscript.h" + +#include "cmemory.h" +#include "scrptrun.h" + +#include <stdio.h> + +char16_t testChars[] = { + 0x0020, 0x0946, 0x0939, 0x093F, 0x0928, 0x094D, 0x0926, 0x0940, 0x0020, + 0x0627, 0x0644, 0x0639, 0x0631, 0x0628, 0x064A, 0x0629, 0x0020, + 0x0420, 0x0443, 0x0441, 0x0441, 0x043A, 0x0438, 0x0439, 0x0020, + 'E', 'n', 'g', 'l', 'i', 's', 'h', 0x0020, + 0x6F22, 0x5B75, 0x3068, 0x3072, 0x3089, 0x304C, 0x306A, 0x3068, + 0x30AB, 0x30BF, 0x30AB, 0x30CA, + 0xD801, 0xDC00, 0xD801, 0xDC01, 0xD801, 0xDC02, 0xD801, 0xDC03 +}; + +int32_t testLength = UPRV_LENGTHOF(testChars); + +int main() +{ + icu::ScriptRun scriptRun(testChars, 0, testLength); + + while (scriptRun.next()) { + int32_t start = scriptRun.getScriptStart(); + int32_t end = scriptRun.getScriptEnd(); + UScriptCode code = scriptRun.getScriptCode(); + + printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, end); + } + return 0; +} diff --git a/intl/icu/source/extra/scrptrun/srtest.dsp b/intl/icu/source/extra/scrptrun/srtest.dsp new file mode 100644 index 0000000000..f5e23dfdb8 --- /dev/null +++ b/intl/icu/source/extra/scrptrun/srtest.dsp @@ -0,0 +1,110 @@ +# Microsoft Developer Studio Project File - Name="srtest" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=srtest - Win32 Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "srtest.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "srtest.mak" CFG="srtest - Win32 Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "srtest - Win32 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "srtest - Win32 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "srtest - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c +# ADD CPP /nologo /W3 /GX /O2 /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 ..\..\..\lib\icuuc.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 + +!ELSEIF "$(CFG)" == "srtest - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c +# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "../../../include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 ..\..\..\lib\icuucd.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept + +!ENDIF + +# Begin Target + +# Name "srtest - Win32 Release" +# Name "srtest - Win32 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\scrptrun.cpp +# End Source File +# Begin Source File + +SOURCE=.\srtest.cpp +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# Begin Source File + +SOURCE=.\scrptrun.h +# End Source File +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/intl/icu/source/extra/scrptrun/srtest.dsw b/intl/icu/source/extra/scrptrun/srtest.dsw new file mode 100644 index 0000000000..6609c4f206 --- /dev/null +++ b/intl/icu/source/extra/scrptrun/srtest.dsw @@ -0,0 +1,29 @@ +Microsoft Developer Studio Workspace File, Format Version 6.00 +# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! + +############################################################################### + +Project: "srtest"=.\srtest.dsp - Package Owner=<4> + +Package=<5> +{{{ +}}} + +Package=<4> +{{{ +}}} + +############################################################################### + +Global: + +Package=<5> +{{{ +}}} + +Package=<3> +{{{ +}}} + +############################################################################### + diff --git a/intl/icu/source/extra/uconv/Makefile.in b/intl/icu/source/extra/uconv/Makefile.in new file mode 100644 index 0000000000..30e27fdd56 --- /dev/null +++ b/intl/icu/source/extra/uconv/Makefile.in @@ -0,0 +1,183 @@ +## Copyright (C) 2016 and later: Unicode, Inc. and others. +## License & terms of use: http://www.unicode.org/copyright.html +## ****************************************************************************** +## * +## * Copyright (C) 1999-2014, International Business Machines +## * Corporation and others. All Rights Reserved. +## * +## ******************************************************************************* +## Makefile.in for ICU - uconv +## Steven R. Loomis + +## Set the following to dll or static or common.. +UCONVMSG_MODE=static +############################################################## + +srcdir=@srcdir@ +top_srcdir=@top_srcdir@ + +top_builddir = ../.. +subdir = extra/uconv + +include $(top_builddir)/icudefs.mk + +MSGNAME=uconvmsg + +# RESSRC comes from resfiles.mk +FILESEPCHAR=/ +include $(srcdir)/resfiles.mk + +RESDIR=$(MSGNAME) +RESFILES=$(RESSRC:$(RESOURCESDIR)$(FILESEPCHAR)%.txt=$(RESDIR)/%.res) + +## + +TARGET_STUB_NAME = uconv + +SECTION = 1 + +ALL_MAN_FILES = $(TARGET_STUB_NAME).$(SECTION) + +## Extra files to remove for 'make clean' +CLEANFILES = *~ $(DEPS) $(ALL_MAN_FILES) + +## Target information +TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT) + +CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil +CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit +LIBS = $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M) + +ifeq ($(PKGDATA_OPTS),) +PKGDATA_OPTS = -O pkgdata.inc +endif + +## generic settings for data - common. +PKGMODE=common +INSTALLTO=$(DESTDIR)$(ICUDATA_DIR) +UCONVMSG_LIB=package-resfiles + +## Static mode +ifeq ($(UCONVMSG_MODE),static) +DEFS += -DUCONVMSG_LINK=$(MSGNAME) +UCONVMSG_LIB = $(RESDIR)/$(LIBPREFIX)$(STATIC_PREFIX_WHEN_USED)$(MSGNAME).$(A) +LIBS += $(UCONVMSG_LIB) +PKGMODE=static +INSTALLTO=$(libdir) +endif + +## DLL mode +ifeq ($(UCONVMSG_MODE),dll) +DEFS += -DUCONVMSG_LINK=$(MSGNAME) +LIBS += -L$(RESDIR) -l$(MSGNAME) +PKGMODE=dll +INSTALLTO=$(libdir) +endif + +SOURCES = $(shell cat $(srcdir)/sources.txt) +OBJECTS = $(patsubst %.cpp,%.o,$(patsubst %.c,%.o, $(SOURCES))) + +DEPS = $(OBJECTS:.o=.d) + +## List of phony targets +.PHONY : all all-local install install-local clean clean-local \ +distclean resclean distclean-local dist dist-local \ +check check-local build-dir package-resfiles install-resfiles install-man + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-local + +install: install-local +clean: clean-local +distclean : distclean-local +dist: dist-local +check: check-local + +all-local: build-dir $(TARGET) $(ALL_MAN_FILES) + +install-local: all-local install-target install-resfiles install-man + +install-target: all-local + $(MKINSTALLDIRS) $(DESTDIR)$(bindir) + $(INSTALL) $(TARGET) $(DESTDIR)$(bindir) + +dist-local: + +clean-local: resclean + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) $(RESFILES) + $(RMV) $(OBJECTS) $(TARGET) + +resclean: + @#-$(INVOKE) $(TOOLBINDIR)/pkgdata --clean -p $(RESDIR) -O pkgdata.inc -m $(PKGMODE) -d $(RESDIR) -T $(RESDIR) $(RESDIR)/$(RESDIR).lst + $(RMV) pkgdata.inc $(RESDIR) + +distclean-local: clean-local + $(RMV) Makefile $(DEPS) + +check-local: $(TARGET) +ifneq (,$(filter $(PKGDATA_MODE),files common)) + @echo "Currently, pkgdata is in \"$(PKGDATA_MODE)\" mode." + @echo "To test uconv, run this manually after installing ICU:" + @echo "\"./$(TARGET) -f ibm-37 $(srcdir)/samples/ibm-37-test.txt\"" +else + $(INVOKE) ./$(TARGET) -f ibm-37 $(srcdir)/samples/ibm-37-test.txt +endif + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +pkgdata.inc: pkgdataMakefile + $(MAKE) -f pkgdataMakefile + +build-dir: + @$(MKINSTALLDIRS) $(RESDIR) + +pkgdataMakefile: + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +$(TARGET_STUB_NAME).$(SECTION): $(srcdir)/$(TARGET_STUB_NAME).$(SECTION).in pkgdata.inc + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$(TARGET_STUB_NAME).$(SECTION) CONFIG_HEADERS= $(SHELL) ./config.status + +$(TARGET) : $(OBJECTS) $(UCONVMSG_LIB) + $(LINK.cc) $(OUTOPT)$@ $(OBJECTS) $(LIBS) + $(POST_BUILD_STEP) + +# The | is an order-only prerequisite. This helps when the -j option is used, +# and we don't want the files to be built before the directories are built. +ifneq ($(filter order-only,$(.FEATURES)),) +$(RESFILES) $(RESDIR)/$(RESDIR).lst: | build-dir +endif + +$(UCONVMSG_LIB): $(RESFILES) $(RESDIR)/$(RESDIR).lst pkgdata.inc + $(INVOKE) $(PKGDATA_INVOKE_OPTS) $(TOOLBINDIR)/pkgdata -p $(MSGNAME) $(PKGDATA_OPTS) -m $(PKGMODE) -s $(RESDIR) -d $(RESDIR) -T $(RESDIR) $(RESDIR)/$(RESDIR).lst + +$(RESDIR)/$(RESDIR).lst: Makefile $(srcdir)/resfiles.mk + @-$(RMV) $@ + @for file in $(RESFILES:$(RESDIR)/%.res=%.res); do \ + echo $$file >> $@; \ + done; + +# no install for static mode +ifneq ($(UCONVMSG_MODE),static) +install-resfiles: $(RESFILES) $(RESDIR)/$(RESDIR).lst pkgdata.inc + $(MKINSTALLDIRS) $(DESTDIR)$(ICUDATA_DIR) + $(INVOKE) $(TOOLBINDIR)/pkgdata -p $(RESDIR) -O pkgdata.inc -m $(PKGMODE) -d $(RESDIR) -I $(INSTALLTO) -T $(RESDIR) $(RESDIR)/$(RESDIR).lst +else +install-resfiles: +endif + +$(MSGNAME)/%.res: $(srcdir)/$(RESOURCESDIR)/%.txt + $(INVOKE) $(TOOLBINDIR)/genrb -e UTF-8 -s $(^D) -d $(@D) $(^F) + +install-man: $(ALL_MAN_FILES) + $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION) + $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION) + + + diff --git a/intl/icu/source/extra/uconv/README b/intl/icu/source/extra/uconv/README new file mode 100644 index 0000000000..5f0a84261b --- /dev/null +++ b/intl/icu/source/extra/uconv/README @@ -0,0 +1,17 @@ +Copyright (C) 2016 and later: Unicode, Inc. and others. +License & terms of use: http://www.unicode.org/copyright.html +Copyright (c) 2002, International Business Machines Corporation and others. All Rights Reserved. + +The uconv command is an iconv(1)-like conversion / transcoding +program. Please check its manual page, or run uconv -h, for help. + +Help, as well as error messages, are displayed through the use of a +resource bundle. Please contact Steven Loomis <srl@jtcsv.com> if you +want to offer a translation of these messages for a particular locale. + +uconv was originally written and contributed to icuapps by Jonas Utterstrm +<jonas.utterstrom@vittran.norrnod.se>, and offered simple conversion and +a way to know which encodings were available. It has since then be +moved to the main ICU distribution and converted to the C conversion +API, and is maintained by Yves Arrouye <yves@realnames.com> who seems +to always be looking for one more feature or option to add to the tool. diff --git a/intl/icu/source/extra/uconv/makedata.mak b/intl/icu/source/extra/uconv/makedata.mak new file mode 100644 index 0000000000..81fcc908ee --- /dev/null +++ b/intl/icu/source/extra/uconv/makedata.mak @@ -0,0 +1,151 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +#********************************************************************** +#* Copyright (C) 1999-2008, International Business Machines Corporation +#* and others. All Rights Reserved. +#********************************************************************** +# nmake file for creating data files on win32 +# invoke with +# nmake /f makedata.mak icup=<path_to_icu_instalation> [Debug|Release] +# +# 12/10/1999 weiv Created + +#If no config, we default to debug +!IF "$(CFG)" == "" +CFG=Debug +!MESSAGE No configuration specified. Defaulting to common - Win32 Debug. +!ENDIF + +#Here we test if a valid configuration is given +!IF "$(CFG)" != "Release" && "$(CFG)" != "release" && "$(CFG)" != "Debug" && "$(CFG)" != "debug" && "$(CFG)" != "x86\Release" && "$(CFG)" != "x86\Debug" && "$(CFG)" != "x64\Release" && "$(CFG)" != "x64\Debug" && "$(CFG)" != "ARM\Release" && "$(CFG)" != "ARM\Debug" && "$(CFG)" != "ARM64\Release" && "$(CFG)" != "ARM64\Debug" +!MESSAGE Invalid configuration "$(CFG)" specified. +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "makedata.mak" CFG="Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "Release" +!MESSAGE "Debug" +!MESSAGE +!ERROR An invalid configuration is specified. +!ENDIF + +#Let's see if user has given us a path to ICU +#This could be found according to the path to makefile, but for now it is this way +!IF "$(ICUP)"=="" +!ERROR Can't find path! +!ENDIF +!MESSAGE ICU path is $(ICUP) + +RESNAME=uconvmsg +RESDIR=resources +RESFILES=resfiles.mk +ICUDATA=$(ICUP)\data + +DLL_OUTPUT=.\$(CFG) +# set the following to 'static' or 'dll' depending +PKGMODE=static + +ICD=$(ICUDATA)^\ +DATA_PATH=$(ICUP)\data^\ + +NATIVE_ARM= +!IF "$(PROCESSOR_ARCHITECTURE)" == "ARM64" || "$(PROCESSOR_ARCHITEW6432)" == "ARM64" +NATIVE_ARM=ARM64 +!ELSE IF "$(PROCESSOR_ARCHITECTURE)" == "ARM" || "$(PROCESSOR_ARCHITEW6432)" == "ARM" +NATIVE_ARM=ARM +!ENDIF + +# Use the x64 tools for building ARM and ARM64. +# Note: This is similar to the TOOLS CFG PATH in source\data\makedata.mak +!IF "$(NATIVE_ARM)" == "" +!IF "$(CFG)" == "x64\Release" || "$(CFG)" == "x64\Debug" || "$(CFG)" == "ARM\Release" || "$(CFG)" == "ARM\Debug" || "$(CFG)" == "ARM64\Release" || "$(CFG)" == "ARM64\Debug" +ICUTOOLS=$(ICUP)\bin64 +PATH = $(ICUP)\bin64;$(PATH) +!ELSE +ICUTOOLS=$(ICUP)\bin +PATH = $(ICUP)\bin;$(PATH) +!ENDIF +!ELSE +!IF "$(CFG)" == "ARM\Release" || "$(CFG)" == "ARM\Debug" +ICUTOOLS=$(ICUP)\binARM +PATH = $(ICUP)\binARM;$(PATH) +!ELSE +ICUTOOLS=$(ICUP)\binARM64 +PATH = $(ICUP)\binARM64;$(PATH) +!ENDIF +!ENDIF + +# If building ARM/ARM, then we need to pass the arch as an argument. +EXTRA_PKGDATA_ARGUMENTS= +!IF "$(CFG)" == "ARM\Release" || "$(CFG)" == "ARM\Debug" +EXTRA_PKGDATA_ARGUMENTS=-a ARM +!ENDIF +!IF "$(CFG)" == "ARM64\Release" || "$(CFG)" == "ARM64\Debug" +EXTRA_PKGDATA_ARGUMENTS=-a ARM64 +!ENDIF + +# Make sure the necessary tools exist before continuing. (This is to prevent cryptic errors from NMAKE). +!IF !EXISTS($(ICUTOOLS)\pkgdata.exe) +!MESSAGE Unable to find "$(ICUTOOLS)\pkgdata.exe" +!ERROR The tool 'pkgdata.exe' does not exist! (Have you built all of ICU yet?). +!IF "$(CFG)" == "ARM\Release" || "$(CFG)" == "ARM\Debug" || "$(CFG)" == "ARM64\Release" || "$(CFG)" == "ARM64\Debug" +!ERROR Note that the ARM and ARM64 builds require building x64 first. +!ENDIF +!ENDIF +!IF !EXISTS($(ICUTOOLS)\genrb.exe) +!MESSAGE Unable to find "$(ICUTOOLS)\genrb.exe" +!ERROR The tool 'genrb.exe' does not exist! (Have you built all of ICU yet?). +!IF "$(CFG)" == "ARM\Release" || "$(CFG)" == "ARM\Debug" || "$(CFG)" == "ARM64\Release" || "$(CFG)" == "ARM64\Debug" +!ERROR Note that the ARM and ARM64 builds require building x64 first. +!ENDIF +!ENDIF + +# Suffixes for data files +.SUFFIXES : .ucm .cnv .dll .dat .res .txt .c + +# We're including a list of resource files. +FILESEPCHAR= + +!IF EXISTS("$(RESFILES)") +!INCLUDE "$(RESFILES)" +!ELSE +!ERROR ERROR: cannot find "$(RESFILES)" +!ENDIF +RES_FILES = $(RESSRC:.txt=.res) +RB_FILES = resources\$(RES_FILES:.res =.res resources\) +RESOURCESDIR= + +# This target should build all the data files +!IF "$(PKGMODE)" == "dll" +OUTPUT = "$(DLL_OUTPUT)\$(RESNAME).dll" +!ELSE +OUTPUT = "$(DLL_OUTPUT)\$(RESNAME).lib" +!ENDIF + +ALL : $(OUTPUT) + @echo All targets are up to date (mode $(PKGMODE)) + + +# invoke pkgdata - static +"$(DLL_OUTPUT)\$(RESNAME).lib" : $(RB_FILES) $(RESFILES) + @echo Building $(RESNAME).lib + @"$(ICUTOOLS)\pkgdata" -f -v -m static -c -p $(RESNAME) -d "$(DLL_OUTPUT)" $(EXTRA_PKGDATA_ARGUMENTS) -s "$(RESDIR)" <<pkgdatain.txt +$(RES_FILES:.res =.res +) +<<KEEP + +# This is to remove all the data files +CLEAN : + -@erase "$(RB_FILES)" + -@erase "$(CFG)\*uconvmsg*.*" + -@"$(ICUTOOLS)\pkgdata" -f --clean -v -m static -c -p $(RESNAME) -d "$(DLL_OUTPUT)" $(EXTRA_PKGDATA_ARGUMENTS) -s "$(RESDIR)" pkgdatain.txt + +# Inference rule for creating resource bundles +{$(RESDIR)}.txt{$(RESDIR)}.res: + @echo Making Resource Bundle files + "$(ICUTOOLS)\genrb" -s $(@D) -d $(@D) $(?F) + +$(RESSRC) : {"$(ICUTOOLS)"}genrb.exe diff --git a/intl/icu/source/extra/uconv/pkgdata.inc.in b/intl/icu/source/extra/uconv/pkgdata.inc.in new file mode 100644 index 0000000000..b2aa4bd804 --- /dev/null +++ b/intl/icu/source/extra/uconv/pkgdata.inc.in @@ -0,0 +1,64 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +#****************************************************************************** +# +# Copyright (C) 2000-2007, International Business Machines +# Corporation and others. All Rights Reserved. +# +#****************************************************************************** +# This Makefile is used to build the ICU's data. +# It is included with the -O option to pkgdata. + +PLATFORM=@platform@ + +top_srcdir=@top_srcdir@ +srcdir=@srcdir@ +top_builddir=../.. + +include $(top_builddir)/icudefs.mk + +CPPFLAGS+= -I$(top_builddir)/common -I$(top_srcdir)/common +LIBS=@LIBS@ +exec_prefix=@exec_prefix@ +prefix=@prefix@ +program_transform_name=@program_transform_name@ +bindir=@bindir@ +sbindir=@sbindir@ +libexecdir=@libexecdir@ +datadir=@datadir@ +sysconfdir=@sysconfdir@ +sharedstatedir=@sharedstatedir@ +localstatedir=@localstatedir@ +libdir=@libdir@ +includedir=@includedir@ +oldincludedir=@oldincludedir@ +infodir=@infodir@ +mandir=@mandir@ +PACKAGE=@PACKAGE@ +VERSION=@VERSION@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ + +INSTALL_PROGRAM=@INSTALL_PROGRAM@ +INSTALL_SCRIPT=@INSTALL_SCRIPT@ +INSTALL_DATA=@INSTALL_DATA@ +host=@host@ +host_alias=@host_alias@ +host_cpu=@host_cpu@ +host_vendor=@host_vendor@ +host_os=@host_os@ +LIB_M=@LIB_M@ +CPP=@CPP@ +U_HAVE_INTTYPES_H=@U_HAVE_INTTYPES_H@ +U_IS_BIG_ENDIAN=@U_IS_BIG_ENDIAN@ +platform=@platform@ + +##### Add the following to source/config/Makefile.in + +GENCCODE=$(BINDIR)/genccode +GENCMN=$(BINDIR)/gencmn +ICUPKG=$(BINDIR)/icupkg + + diff --git a/intl/icu/source/extra/uconv/pkgdataMakefile.in b/intl/icu/source/extra/uconv/pkgdataMakefile.in new file mode 100644 index 0000000000..f8a684fc28 --- /dev/null +++ b/intl/icu/source/extra/uconv/pkgdataMakefile.in @@ -0,0 +1,42 @@ +## pkgdataMakefile.in for ICU data +## Copyright (C) 2016 and later: Unicode, Inc. and others. +## License & terms of use: http://www.unicode.org/copyright.html +## Copyright (c) 2008-2012, International Business Machines Corporation and +## others. All Rights Reserved. + +## Source directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +# So that you have $(top_builddir)/config.status +top_builddir = ../.. + +## All the flags and other definitions are included here. +include $(top_builddir)/icudefs.mk + +OUTPUTFILE=pkgdata.inc +MIDDLE_SO_TARGET= +PKGDATA_TRAILING_SPACE=" " + +all : clean + @echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE) + @echo SO=$(SO) >> $(OUTPUTFILE) + @echo SOBJ=$(SOBJ) >> $(OUTPUTFILE) + @echo A=$(A) >> $(OUTPUTFILE) + @echo LIBPREFIX=$(LIBPREFIX)$(STATIC_PREFIX_WHEN_USED) >> $(OUTPUTFILE) + @echo LIB_EXT_ORDER=$(FINAL_SO_TARGET) >> $(OUTPUTFILE) + @echo COMPILE="$(COMPILE.c)" >> $(OUTPUTFILE) + @echo LIBFLAGS="-I$(top_srcdir)/common -I$(top_builddir)/common $(SHAREDLIBCPPFLAGS) $(SHAREDLIBCFLAGS)" >> $(OUTPUTFILE) + @echo GENLIB="$(SHLIB.c)" >> $(OUTPUTFILE) + @echo LDICUDTFLAGS=$(LDFLAGSICUDT) >> $(OUTPUTFILE) + @echo LD_SONAME=$(LD_SONAME) >> $(OUTPUTFILE) + @echo RPATH_FLAGS=$(RPATH_FLAGS) >> $(OUTPUTFILE) + @echo BIR_LDFLAGS=$(BIR_LDFLAGS) >> $(OUTPUTFILE) + @echo AR=$(AR) >> $(OUTPUTFILE) + @echo ARFLAGS=$(ARFLAGS) >> $(OUTPUTFILE) + @echo RANLIB=$(RANLIB) >> $(OUTPUTFILE) + @echo INSTALL_CMD=$(INSTALL) >> $(OUTPUTFILE) + +clean : + $(RMV) $(OUTPUTFILE) + diff --git a/intl/icu/source/extra/uconv/resfiles.mk b/intl/icu/source/extra/uconv/resfiles.mk new file mode 100644 index 0000000000..2117127ea2 --- /dev/null +++ b/intl/icu/source/extra/uconv/resfiles.mk @@ -0,0 +1,11 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Copyright (c) 2000-2002 IBM, Inc. and Others. +# A small makefile containing the list of resource bundles +# to include in uconv. + +# The variable FILESEPCHAR is defined by the caller to be +# the character separating components of a filename. + +RESOURCESDIR = resources +RESSRC = $(RESOURCESDIR)$(FILESEPCHAR)root.txt $(RESOURCESDIR)$(FILESEPCHAR)fr.txt diff --git a/intl/icu/source/extra/uconv/resources/fr.txt b/intl/icu/source/extra/uconv/resources/fr.txt new file mode 100644 index 0000000000..180fb9b671 --- /dev/null +++ b/intl/icu/source/extra/uconv/resources/fr.txt @@ -0,0 +1,106 @@ +// -*- Coding: utf-8; -*- [all uconv resource files] +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// Copyright (c) 2000-2004 IBM, Inc. and Others. +// +// Root translation file for uconv messages. +// So you want to translate this file??? Great! +// 1. copy it to a new name [ex: se.txt] +// +// 2. You might wish to comment out ALL lines, and then uncomment them +// as you add translations. That way, you don't inadvertently mark +// an untranslated English (or whatever) string as already +// translated. The base translation might change! +// +// 3. These files are in UTF-8 format (even though root uses only +// ASCII) +// +// 4. Make note of the location of {0}, {1}, etc.. they are taken from +// arguments to u_wmsg() in order.. +// +// 5. Add se.txt to RESSRC= in resfiles.mk and to the project file on +// the Windows side. +// +// 6. Send it in to srl@jtcsv.com or ask on the ICU mailing list! thanks! + +fr +{ + // uconv errors + + lcUsageWord { "usage" } + ucUsageWord { "Usage" } + usage { + "{0}: {1} " + "[ -h, -?, --help ] [ -V, --version ] [ -s, --silent ] [ -v, --verbose ] " + "[ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] " + "[ --canon ] [ -x translitération ] " + "[ --to-callback callback | -c ] [ --from-callback callback | -i ] [ --callback callback ] " + "[ --fallback | --no-fallback ] " + "[ -b, --block-size taille ] " + "[ -f, --from-code code ] [ -t, --to-code code ] " + "[ --add-signature ] [ --remove-signature ] " + "[ -o, --output fichier ] " + "[ fichier ... ]\n" + } + + // TODO there is some English in here + help { "Options : -h, --help affiche ce message\n" + " -V, --version affiche la version du programme\n" +" -s, --silent supprime les messages\n" +" -v, --verbose affiche les progrès\n" +" -l, --list liste tous les encodages disponibles\n" +" --list-code code liste juste l''encodage donné\n" +" --default-code liste juste l''encodage par défaut\n" +" -L, --list-transliterators liste tous les translitérateurs\n" +" --canon affiche la liste dans le format de cnvrtrs.txt(5)\n" +" -x translitération passe le texte à travers translitération\n" +" --to-callback callback utilise callback sur l''encodage cible\n" +" -c omet les caractères invalides de la sortie\n" +" --from-callback callback utilise callback sur l''encodage source\n" +" -i omet les séquences invalides de l''entrée\n" +" --callback callback utilise callback sur les deux encodages\n" +" -b, --block-size taille lit des blocks de taille octets (défaut : 4096)\n" +" --fallback utilise les correspondances de secours\n" +" --no-fallback n''utilise pas les correspondances de secours\n" +" -f, --from-code code fixe l''encodage d''origine\n" +" -t, --to-code code fixe l''encodage de destination\n" +" --add-signature add a U+FEFF Unicode signature character (BOM)\n" +" --remove-signature remove a U+FEFF Unicode signature character (BOM)\n" +" -o, --output fichier écrit la sortie dans fichier\n" +"\n" +"Callbacks :" } + + cantGetNames { "Ne peux obtenir la liste des encodages.\n" } // 0: err + cantGetTag { "Ne peux obtenir le nom de l'étiquette standard : {0}.\n" } // 0: err + + noSuchCodeset { "Ne peux trouver l''encodage : {0}.\n" } // 0: name of the encoding + noFromCodeset { "L''encodage d''origine n''a pas été fixé (utilisez -f).\n" } + noToCodeset { "L''encodage de destination n''a pas été fixé (utilisez -t).\n" } + + badBlockSize { "Taille de bloc incorrecte : {0}.\n" } // 0: size of the block + + cantSetInBinMode { "Ne peux mettre l''entrée standard en mode binaire.\n" } + cantSetOutBinMode { "Ne peux mettre la sortie standard en mode binaire.\n" } + + cantOpenFromCodeset { "Ne peux ouvrir de convertisseur pour l''encodage d''origine {0} : {1}.\n" } // 0:set, 1: err + cantOpenToCodeset { "Ne peux ouvrir de convertisseur pour l''encodage de destination {0} : {1}.\n" } // 0:set, 1: err + + cantCreateTranslit { "Ne peux créer la translitération \"{0}\": {1}.\n" } // 0:set, 1: err + cantCreateTranslitParseErr { "Ne peux créer la translitération \"{0}\": {1}, ligne {2}, position {3}.\n" } // 0: set, 1: err, 2: line, 3: offset + + cantSetCallback { "Ne peux fixer le callack de transcodage : {0}.\n" } // 0: err + + unknownCallback { "Callback inconnu : {0}.\n" } // 0: callback name + + cantOpenInputF { "Ne peux ouvrir le fichier d''entrée {0} : {1}.\n" } // 0: file, 1: strerror [OS error string] + cantCreateOutputF { "Ne peux créer le fichier de sortie {0} : {1}.\n" } // 0: file, 1: strerror [OS error string] + + cantWrite { "Le texte converti ne peut pas être écrit : {0}.\n" } // 0: OS error string + cantRead { "Erreur de lecture du fichier d''entrée : {0}.\n" } // 0: OS error string + + // TODO retranslate the problemCvt... messages because their format changed + //problemCvtToU { "La conversion d''Unicode vers l''encodage de destination a échoué à la position {0} : {1}.\n" } // 0: position, 1: err + //problemCvtFromU { "La conversion de l''encodage original vers Unicode a échoué à la position {0} : {1}.\n" } // 0: position, 1: err + //problemCvtFromUOut { "La conversion de l''encodage original vers Unicode a échoué à la position {0} de la sortie : {1}.\n" } // 0: position, 1: err +} diff --git a/intl/icu/source/extra/uconv/resources/root.txt b/intl/icu/source/extra/uconv/resources/root.txt new file mode 100644 index 0000000000..3ed0252a43 --- /dev/null +++ b/intl/icu/source/extra/uconv/resources/root.txt @@ -0,0 +1,130 @@ +// -*- Coding: utf-8; -*- [all uconv resource files] +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// Copyright (c) 2000-2004 IBM, Inc. and Others. +// +// Root translation file for uconv messages. +// So you want to translate this file??? Great! +// 1. copy it to a new name [ex: se.txt] +// +// 2. You might wish to comment out ALL lines, and then uncomment them +// as you add translations. That way, you don't inadvertently mark +// an untranslated English (or whatever) string as already +// translated. The base translation might change! +// +// 3. These files are in UTF-8 format (even though root uses only +// ASCII) +// +// 4. Make note of the location of {0}, {1}, etc.. they are taken from +// arguments to u_wmsg() in order.. +// +// 5. Add se.txt to RESSRC= in resfiles.mk and to the project file on +// the Windows side. +// +// 6. Send it in to srl@jtcsv.com or ask on the ICU mailing list! thanks! + +root +{ + // uconv errors + + lcUsageWord { "usage" } + ucUsageWord { "Usage" } + usage { + "{0}: {1} " + "[ -h, -?, --help ] [ -V, --version ] [ -s, --silent ] [ -v, --verbose ] " + "[ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] " + "[ --canon ] [ -x transliteration ] " + "[ --to-callback callback | -c ] [ --from-callback callback | -i ] [ --callback callback ] " + "[ --fallback | --no-fallback ] " + "[ -b, --block-size size ] " + "[ -f, --from-code code ] [ -t, --to-code code ] " + "[ --add-signature ] [ --remove-signature ] " + "[ -o, --output file ] " + "[ file ... ]\n" } + + help { "Options: -h, --help print this message\n" + " -V, --version print the program version\n" + " -s, --silent suppress messages\n" + " -v, --verbose display progress information\n" + " -l, --list list all available encodings\n" + " --list-code code list only the given encoding\n" + " --default-code list only the default encoding\n" + " -L, --list-transliterators list all available transliterators\n" + " --canon print list in cnvrtrs.txt(5) format\n" + " -x transliteration run everything through transliteration\n" + " --to-callback callback use callback on destination encoding\n" + " -c omit invalid characters from the output\n" + " --from-callback callback use callback on original encoding\n" + " -i ignore invalid sequences in the input\n" + " --callback callback use callback on both encodings\n" + " -b, --block-size size read size bytes blocks (default: 4096)\n" + " --fallback use fallback mapping\n" + " --no-fallback do not use fallback mapping\n" + " -f, --from-code code set the original encoding\n" + " -t, --to-code code set the destination encoding\n" + " --add-signature add a U+FEFF Unicode signature character (BOM)\n" + " --remove-signature remove a U+FEFF Unicode signature character (BOM)\n" + " -o, --output file write output to file\n" + "\n" + "Callbacks:" + } + + cantGetNames { "Couldn''t get available converter names.\n" } // 0: err + cantGetTag { "Couldn''t get standard tag name: {0}.\n" } // 0: err + + noSuchCodeset { "Couldn''t find encoding: {0}.\n" } // 0: name of the encoding + noFromCodeset { "No original encoding set (use -f).\n" } + noToCodeset { "No destination encoding set (use -t).\n" } + + badBlockSize { "Bad block size: {0}.\n" } // 0: size of the block + + cantSetInBinMode { "Couldn't set standard input to binary mode." } + cantSetOutBinMode { "Couldn't set standard output to binary mode." } + + cantOpenFromCodeset { "Couldn''t open converter for original encoding {0}: {1}.\n" } // 0:set, 1: err + cantOpenToCodeset { "Couldn''t open converter for destination encoding {0}: {1}.\n" } // 0: set, 1: err + + cantCreateTranslit { "Couldn''t create transliteration \"{0}\": {1}.\n" } // 0: set, 1: err + cantCreateTranslitParseErr { "Couldn''t create transliteration \"{0}\": {1}, line {2}, offset {3}.\n" } // 0: set, 1: err, 2: line, 3: offset + + cantSetCallback { "Couldn''t set transcoding callback: {0}.\n" } // 0: err + + unknownCallback { "Unknown callback: {0}.\n" } // 0: callback name + + cantOpenInputF { "Couldn''t open input file {0}: {1}.\n" } // 0: file, 1: strerror [OS error string] + cantCreateOutputF { "Couldn''t create output file {0}: {1}.\n" } // 0: file, 1: strerror [OS error string] + + cantWrite { "The converted text couldn't be written: {0}.\n" } // 0: OS error string + cantRead { "Error reading from input file: {0}.\n" } // 0: OS error string + + problemCvtToU { "Conversion to Unicode from codepage failed at input byte position {0}. Bytes: {1} Error: {2}\n" } // 0: position, 1: bytes, 2: err + problemCvtFromU { "Conversion from Unicode to codepage failed at input byte position {0}. Unicode: {1} Error: {2}\n"} // 0: position, 1: Unicode, 2: err + problemCvtFromUOut { "Conversion from Unicode to codepage failed at output byte position {0}. Unicode: {1} Error: {2}\n"} // 0: position, 1: Unicode, 2: err + +// ICU errors - used by u_wmsg_errorName() + + U_USING_FALLBACK_ERROR { "Using fallback data" } + U_USING_DEFAULT_ERROR { "Using default data" } + U_ZERO_ERROR { "No error has occurred" } + U_ILLEGAL_ARGUMENT_ERROR { "Illegal argument" } + U_MISSING_RESOURCE_ERROR { "A resource was missing" } + U_INVALID_FORMAT_ERROR { "Invalid format" } + U_FILE_ACCESS_ERROR { "Problem accessing that file/object" } + U_INTERNAL_PROGRAM_ERROR { "Internal program error" } + U_MESSAGE_PARSE_ERROR { "Parse error on message format" } + U_MEMORY_ALLOCATION_ERROR { "Out of memory" } + U_INDEX_OUTOFBOUNDS_ERROR { "An index was out-of-bounds" } + U_PARSE_ERROR { "Parse error" } + U_INVALID_CHAR_FOUND { "Invalid character found" } + U_TRUNCATED_CHAR_FOUND { "Truncated character found" } + U_ILLEGAL_CHAR_FOUND { "Illegal character found" } + U_INVALID_TABLE_FORMAT { "Invalid table data format" } + U_INVALID_TABLE_FILE { "Invalid table data file" } + U_BUFFER_OVERFLOW_ERROR { "Buffer overflow" } + U_UNSUPPORTED_ERROR { "A feature was unsupported" } + U_RESOURCE_TYPE_MISMATCH { "Resource type mismatch" } + U_ILLEGAL_ESCAPE_SEQUENCE { "Illegal escape sequence found" } + U_UNSUPPORTED_ESCAPE_SEQUENCE {"Unsupported escape sequence found" } +} + diff --git a/intl/icu/source/extra/uconv/samples/ISO-8859-2.txt b/intl/icu/source/extra/uconv/samples/ISO-8859-2.txt new file mode 100644 index 0000000000..61e8d712b2 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/ISO-8859-2.txt @@ -0,0 +1,2 @@ +Dobar dan! Kako ste? Ja sam Marina +Dodigovi. Kako se Vi zovete? diff --git a/intl/icu/source/extra/uconv/samples/ISO-8859-3.txt b/intl/icu/source/extra/uconv/samples/ISO-8859-3.txt new file mode 100644 index 0000000000..b6ba7937ff --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/ISO-8859-3.txt @@ -0,0 +1,6 @@ +Yukarda mavi gk, asada yaz yer yaratldkta; ikisinin arasnda insan +olu yaratlm. nsan oullar zerine ecdadm Bumn hakan, stemi hakan +tahta oturmu; oturarak Trk milletinin lkesini, tresini, idare edivermi, +tanzim edivermis. Drt taraf hep dman imi. Asker sevk edip drt taraftaki +kavmi hep (itaati altna) alm hep muti klm. Ballara ba edirmi, +dizlilere diz ktrm. diff --git a/intl/icu/source/extra/uconv/samples/danish-ISO-8859-1.txt b/intl/icu/source/extra/uconv/samples/danish-ISO-8859-1.txt new file mode 100644 index 0000000000..95e57d6c72 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/danish-ISO-8859-1.txt @@ -0,0 +1,18 @@ + Foto ______ det formodede fdehus, hjrnet ______ Hans Jensensstrde og Bangs Boder. + (H. C. Andersens Hus, Odense). + +BARNDOMSOMGIVELSERNE</b> + Sknt H. C. Andersens barndomsomgivelser var meget fattige, blev de i hans rige fantasi + solbeskinnede. + Der findes en mandtalsliste fra nogle f r ______ H. C. Andersens fdsel. Den er ______ 1801 + og den giver klare oplysninger om, hvor mange der boede ______ Odense og hvad de var + beskftigede ______. Den omfatter 1199 husstande. Hvis man fordeler disse ______ erhverv, + fr man 102 embeds- og bestillingsmnd, 26 officerer, 12 der beskftiger sig med + immaterielle erhverv, 81 som lever ______ handel, 36 vrtshusholdere, 460 hndvrkere, 39 + avlsmnd og urtemnd, 121 soldater, 97 daglejere, 139 enlige kvinder og + almissemedlemmer, 29 pensionister og rentenydere. + H. C. Andersens forldre tilhrte samfundets laveste lag. Faderen var friskomager, og + nr han meldte sig ______ militrtjeneste ______ Napoleons side, har det nok ikke s meget vret + idealisme som praktisk konomi. For at sikre sig en soldats vrgeln. Han kom ikke + lngere end ______ Holsten. Han fik hj feber og mtte sendes hjem. Da han kom hjem, + forvrredes sygdommen og han dde. diff --git a/intl/icu/source/extra/uconv/samples/eucJP.txt b/intl/icu/source/extra/uconv/samples/eucJP.txt new file mode 100644 index 0000000000..680efb1d1a --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/eucJP.txt @@ -0,0 +1,37 @@ +åܸ + ֥桼եɥסäȡøƤФ +椦ϡ ̤Ρÿͤ +ñ û ˡåޥäǤ +Ρå ûŻˤ +ΩƤ뤳ȤǤ뤳Ȥ餭Ƥ롣 +Ф롡? ƤФ +夦ϡ? ͳϡ +դĤΡ?̤ +ҤȤ?ͤ +ˡ?ñ +褦? +ۤۤ?ˡ +ޤˤΡ? +Ȥˤ?Żˤ +䤯Ƥ뤳ȤǤ뤳Ȥ餭Ƥ롣? +ΩƤ뤳ȤǤ뤳Ȥ餭Ƥ롣 + + + 塡 +㡡 +ġơȡ 㡡塡 +ʡˡ̡͡Ρ ˤ㡡 ˤ塡ˤ +ϡҡաءۡ Ҥ Ҥ Ҥ + դդդդ +ޡߡࡡᡡ⡡ ߤ㡡ߤ塡ߤ +䡡() () 衡 㡡塡 +顡ꡡ롡졡 塡 + () 𡡡 + ? + 㡡塡 + +㡡 塡 +¡šǡɡ¤㡡¤ ¤ ¤¤ +Сӡ֡١ܡӤ㡡Ӥ塡Ӥ +ѡԡסڡݡԤ㡡Ԥ塡Ԥ + diff --git a/intl/icu/source/extra/uconv/samples/hangul-eucKR.txt b/intl/icu/source/extra/uconv/samples/hangul-eucKR.txt new file mode 100644 index 0000000000..90a0393b77 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/hangul-eucKR.txt @@ -0,0 +1,2 @@ + +ѱ diff --git a/intl/icu/source/extra/uconv/samples/hania-eucKR.txt b/intl/icu/source/extra/uconv/samples/hania-eucKR.txt new file mode 100644 index 0000000000..3197f751bc --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/hania-eucKR.txt @@ -0,0 +1,2 @@ +?? +??
\ No newline at end of file diff --git a/intl/icu/source/extra/uconv/samples/ibm-37-test.txt b/intl/icu/source/extra/uconv/samples/ibm-37-test.txt new file mode 100644 index 0000000000..d5fd11637f --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/ibm-37-test.txt @@ -0,0 +1 @@ +z@Ö@@`@@K%
\ No newline at end of file diff --git a/intl/icu/source/extra/uconv/samples/iso8859-1.txt b/intl/icu/source/extra/uconv/samples/iso8859-1.txt new file mode 100644 index 0000000000..95e57d6c72 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/iso8859-1.txt @@ -0,0 +1,18 @@ + Foto ______ det formodede fdehus, hjrnet ______ Hans Jensensstrde og Bangs Boder. + (H. C. Andersens Hus, Odense). + +BARNDOMSOMGIVELSERNE</b> + Sknt H. C. Andersens barndomsomgivelser var meget fattige, blev de i hans rige fantasi + solbeskinnede. + Der findes en mandtalsliste fra nogle f r ______ H. C. Andersens fdsel. Den er ______ 1801 + og den giver klare oplysninger om, hvor mange der boede ______ Odense og hvad de var + beskftigede ______. Den omfatter 1199 husstande. Hvis man fordeler disse ______ erhverv, + fr man 102 embeds- og bestillingsmnd, 26 officerer, 12 der beskftiger sig med + immaterielle erhverv, 81 som lever ______ handel, 36 vrtshusholdere, 460 hndvrkere, 39 + avlsmnd og urtemnd, 121 soldater, 97 daglejere, 139 enlige kvinder og + almissemedlemmer, 29 pensionister og rentenydere. + H. C. Andersens forldre tilhrte samfundets laveste lag. Faderen var friskomager, og + nr han meldte sig ______ militrtjeneste ______ Napoleons side, har det nok ikke s meget vret + idealisme som praktisk konomi. For at sikre sig en soldats vrgeln. Han kom ikke + lngere end ______ Holsten. Han fik hj feber og mtte sendes hjem. Da han kom hjem, + forvrredes sygdommen og han dde. diff --git a/intl/icu/source/extra/uconv/samples/koi8r.txt b/intl/icu/source/extra/uconv/samples/koi8r.txt new file mode 100644 index 0000000000..578a30f123 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/koi8r.txt @@ -0,0 +1,6 @@ + . + . + . - + . , + .. + diff --git a/intl/icu/source/extra/uconv/samples/utf8/armenian.txt b/intl/icu/source/extra/uconv/samples/utf8/armenian.txt new file mode 100644 index 0000000000..21e2a73526 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/armenian.txt @@ -0,0 +1,4 @@ +Աեցեհի իմ լավ ?ւղիե լավարար, +Կյաեբս չտայի կասկածի մհգիե... +Այեպհս կ?ւզհի մհկե իեծ ?ավատր, +Այեպհս կ?ւզհի ?ավատալ մհկիե։ diff --git a/intl/icu/source/extra/uconv/samples/utf8/banviet.txt b/intl/icu/source/extra/uconv/samples/utf8/banviet.txt new file mode 100644 index 0000000000..7d9cafe73d --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/banviet.txt @@ -0,0 +1,11 @@ +Bần chỉ là một anh nghèo xác, ngày ngày lang-thang +khắp xóm này qua xóm khác xin ăn. Quần áo +rách-mướp Bần cũng chả coi sao, chân không có giầy và +đầu cũng chẳng có mũ. Giời nắng hay giời mưa Bần +không bao giờ quan-tâm. Một ngưòi như Bần thì ai cũng +tưởng là không còn có gì là quí-vât nữa. Thế nhưng ta +nhầm vì Bần có cái quạt mo, Bần quí lắm. Bần quí và giữ +luôn không rời bỏ bao giờ. Giời nắng thì Bần che đầu, +giời mưa Bần cũng nhờ nó mà đỡ ướt. Muốn ngồi Bần +dùng làm chiếu và đêm đến các chú muỗi vo-ve thì Bần +dùng để dánh đuổi các chú ấy. diff --git a/intl/icu/source/extra/uconv/samples/utf8/chinese-ulysses.txt b/intl/icu/source/extra/uconv/samples/utf8/chinese-ulysses.txt new file mode 100644 index 0000000000..d2d2c6cfe2 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/chinese-ulysses.txt @@ -0,0 +1,222 @@ + 俺正和首都警署的老特洛伊在凉亭山街角那儿寒喧呢,该死的,冷不丁儿的来了一名扫烟囱的背时家伙,他那长玩意儿差点儿戳进了俺那眼睛里头去。俺转回脑袋,正打算狠々地教训他一顿,没曾想一眼看见石头斜墻街那儿来了个人,道是谁呢,原来是约・哈因斯。 + ___囉,约,俺说。你怎么样?那个扫烟囱的背时家伙,用他的长把儿刷子差点儿把我的眼睛捅掉。你看见了吗? + ___煤烟到,运气好,约说。你刚才说话的那个老小子是谁? + ___老特洛伊呗,俺说,原来是部队的。那家伙又是扫帚又是梯子,把交通都堵塞起来了,俺恨不得把他逮起来。 + ___你到这片儿来干吗?约问。 + ___没有什么屁事,俺说。兵营教堂那边,小鸡胡同口上有一个背时的大个子,不要脸的恶棍__老特洛伊就是给我透了那家伙的一点儿底__要了天主知道多少茶叶和糖,他答应每星期付三先令,说是在唐郡还有个农庄。货主是那边海梯斯堡街附近的一个小矮子,名叫摩西・赫佐格的。 + ___割包皮的吗?#1 约说。 +==注1:尤太教男人自幼即割去包皮。== + ___可不吗,俺说。头上去了一点儿。一个姓吉拉蒂的老管子工。我已经钉了他两个星期,可是一个便士也挤不出来。 + ___你现在就干这勾当?约说。 + ___可不吗,俺说。大人物落魄到这种地步:收倒帐、荒帐。可这家伙呀,像他这样臭名远扬的背时土匪,你走上一天的路也难得见到一个,一脸的麻子够装一场阵头雨的。{你就告诉他吧,}他说「我等着他呢,}他说,{我专门儿地等着他再派你来,只要他敢,}他说,{我就让法庭给他发传票,没有错儿,告他个无照营业。}他说完这话, 那模样就像要爆炸赛的。耶稣哪,那犹太小子火冒三丈的模样儿可真逗笑!{他喝我的茶。他吃我的糖。他倒不付我的帐?}[241] + 茲有都柏林市沃德码头区凯文道十三号商人摩西・赫佐格,下称售方,出售耐久食品并送交都柏林市阿伦码头区凉亭山二十九号绅士迈克尔・ E・吉拉蒂先生,下称购方,计开一级茶叶五磅,常衡制,每常衡制磅价三先令零便士,碎晶体白糖常衡制三斯通#1,每常衡制磅价三便士,该购方由该售方供应物品后应付该售方英币一镑五先令又六便士,此款应由该购方以每周分偿办法付与售方,即每七历日付英币三先令零便士;该购方对该耐久食品不得典当、抵押、出售或作其他方式转让,该售方拥有并继续拥有全面而不可侵犯之所有权,该售方有权自由任意处理,直至此款由该购方按照此约所定方式向该售方付清为止,此约于由该售方与其财产继承人、业务继承人、委托代理人、指定受让人为一方,该购方与其财产继承人业务继承人、委托代理人、指定受让人为另一方于本日于此议定。 +==注1:「斯通」为英国重量单位,一般合十四磅。== + ___你是严格的滴酒不入的吗?约说。 + ___除了 喝酒的时候,啥也不喝,俺说。 + ___去拜访一下咱们那位朋友怎么样?约说。 + ___谁?俺说。他呀,精神错乱上了天主的约翰那儿去了#2,可怜的家伙。 +==注2:「天主的约翰」为都柏林郡 一瘋人院。== + ___是喝他自己的貨色喝的吧? + ___可不嗎,俺說。威士忌加水,上了腦子。 + ___走吧,上巴尼・基尔南酒店吧,约说。我要找公民。 + ___就是巴尼宝贝儿吧,俺说。有什么怪事儿或是好事儿吗? + ___屁事儿也没有,约说。我采访城标饭店那个会议了。 + ___啥会,约?俺说。 + ___牧牛贸易,约说,讨论口啼疫的。我要给公民透个信儿。 + 俺们绕过亚麻厂兵营,绕着法院后头,边走边聊。约这位老兄,有 的时候是挺够朋友的,可他就是老没有。耶稣 呀,俺可咽不下背时的滑头吉拉蒂这口气,白日打劫的土匪。告他个无照营业,他说。 + 在那美丽的伊尼斯菲尔#3,有那么一片土地,圣迈肯的土地。#4一座高塔在此拔地而起,四周远处都能望见。有许多大人物在此安眠,许多大名鼎々的英雄王公在此安眠如生。这片土地委实赏心悦目,上有潺々流水,水中群 鱼嬉戏,有鲂 ,有鲽鱼。有拟鲤,有大比目,有尖嘴黑绒鳕,有鲑鱼,有黄盖鲽,有菱鲆,有鲆鲽,有青鳕,还有各种杂鱼,以其他各类不计其数的水族。在西方和东方,高大的树木在和风吹拂之中,向四面八方搖晃着极其优美的枝叶,有飘々然的悬铃木,有黎巴嫩雪松,有挺拔的梧桐,有改良桉树,以及其他树木世界优良品种,这一地区应有尽有。美妙女郎散坐在美妙树木之间,唱着最美妙的歌曲,并以形々色々美妙物品为游戏,诸如金块、银鱼、大筐的鲱、整网的鳗鱼、小鳕鱼、整篓的仔鱼、紫色的海宝、活泼々的昆[242]虫。四方英雄远道来向她们求爱。从爱勃兰纳到斯里符玛奇山#1,无可匹敌的王子们来自自由世界的芒斯特省,来自公道的康诺特省,来自光滑、整洁的莱因斯特省,来自克罗阿蝉的地域,来自光辉的阿尔马郡,来自高贵的博伊尔区,是王子们,都是国王的后代。 +==注3:「伊尼斯菲尔」为爱尔兰语意为「命运之岛」系对爱尔兰的称呼之一。== +==注4:圣迈肯教堂离此不远,其地下墓穴以尸体保存良好著称。== +==注1:爱博兰纳为古地名,即今都柏林所在地。== + 一座亮晶々的宫殿耸立在那里,驾驶着为此目的而建造的船舶在大海航行的人从远处就能望见它的水晶屋顶闪々发光。当地所有的畜群 、肥犊、首批鲜果,纷々运来这里,由奥康内尔・茨赛门收费,他是世传的酋长#2 巨大的货车载来了丰富的农田的产物,有长筐装的菜花,有大盘装的菠菜、菠萝段、仰光瓜,有大筐装的蕃茄,有桶装的无花果,有成堆的瑞典萝卜、球状马铃薯,有成捆的各色甘兰、约克菜、皱叶菜,有成盘的土中珍珠洋葱头,还有浅盘装的蘑菇、蛋奶豌豆、肥巢菜、油菜、以及红的、绿的、黄的、棕的、赤褐色的甜、大、苦、熟带斑的苹果,还有小篓小篓的草莓、一篮一篮的醋栗,肉鼓々毛茸々的;可供王侯享用的草莓、新摘的紫莓。 +==注2:费茨赛门为1904 年都柏林食品商店总管,商场在基尔南酒店附近。== + {我等着他呢,}他说,{我专门儿地等着他呢。}你给我滚到这儿来吧,吉拉蒂,你这个臭名远扬的拦路抢劫的背时土匪! + 一条路上来的,还有不计其数的牲畜群 ,有系铃带头的去势公羊、催情补饲的母羊、初剪羊毛的壮羊、羔羊、灰雁、中号菜牛、吼喘母马、截角牛犊、长毛羊、待肥育羊、卡夫头等待产牛、等外品、阉母猪、咸肉用猪、各种不同品种高级生猪、安格斯小母牛、最佳纯种去角阉牛、以及获奖的头等的奶牛与菜牛;这里不断听到蹄子声、咯々声、吼叫声、哞々声、咩々声、咆哮声、隆々声、呼噜声、吃料声、咀嚼声,有羊群、有猪群、有蹄子沉重的牛群 ,来自勒斯克、鲁希、卡里克孟的牧场,来自索孟德那水流丰富的山谷,来自麦吉利口客地那些难于攀登的石堆,来自气势宏大深不可测的香农河,来自基亚族地区那些平缓的山坡,乳房因奶过多而肿胀不堪,还有大桶的黄油、乳酪酶、农家木桶装的羔羊前胸肉、大筐的玉米,还有十打十打的禽蛋,各种大小都有,玛瑙色的和暗褐色的。 + 这么的,俺们拐近了巴尼・基尔南酒店,可不吗,公民正在那角落里头,一边跟他自个儿和那条背时的癞皮杂种狗加里欧文大会谈,一边等着天上掉下什么喝的来呢。 + ___瞧他,俺说,克露斯金朗不离身,#3 带着他的大事业公事卷宗守着窝。 +==注3:「克露金斯朗」为爱尔兰语歌曲名,即「满々一小坛酒」。== + 背时的杂种狗发出一种悻々的声音,叫人听了毛骨悚然的。要是有人把那条恶狗的命结束了,那才是地道的善行呢。俺听说过一件真事,桑特里一名武警来送传票,是执照的事,叫这条狗啃去大半条裤子。 + ___站住,交出来,他说。 + ___没有事儿,公民,约说。自己人。 + ___自己人放行,他说。 + 然后他用手揉々一只眼睛说: + 你们对时局有什么看法? + 他搞矛兵#1 和山上罗利#2 那一套呢。可是,老天在上,约倒是能对付这局面的。 +==注1:「矛兵」为十七世纪起义抗英的爱尔兰游击队。== +==注2:「山上的罗利」为十九世纪民歌中歌颂的反英农民志士。 + ___我看是物价要涨,他说着把手顺着裤裆伸了下去。 + 老天在上,公民把爪子往膝盖上一拍说: ___都是外国的战争造成的。 + 约在口袋里翘着大姆指说: + ___是俄国佬想统治。 + ___去你的吧,约,俺说。你那套背时胡扯算了吧。俺可渴坏了,半个克朗也不够解的。 + ___你说是什么吧,公民,约说。 + ___咱本国的酒,他说。 + ___你呢?约说。 + ___照着葫芦画瓢,俺说。 + ___来三品脱,特里,约说。老伙计怎么样,公民?他说。 + ----再好也没有,a chara#1 。他说。怎么样,加里?咱们会胜利的,是吧? + ----他说着话,一把抓住了那背时老狗的后颈皮,耶稣 啊,差不点儿把它勒死。 +==注1:爱尔兰语:「我的朋友」。 + 坐在圆塔前大石墩上的是一条好汉,肩膀宽阔、胸膛厚实、四肢强壮、眼光坦率、头发发红、雀斑斑斓、胡子蓬松、嘴巴宽大、鼻子高耸、脑袋长々、嗓音深沉、膝盖外露、两腿多毛、脸色发红、双臂多腱。他两肩之间宽达数伊尔#3,双膝嶙峋 如山岩,膝上和身体其余外露部分相同,都长着厚々的一层黄褐色刺毛,颜色和硬度都像山荊豆(Ulex Europeus#4)。两个鼻孔中伸出同样黄褐色的硬毛,鼻孔之大,可容草地 鹨在其洞穴深处筑巢。两只大眼睛的尺寸,和大头的菜花相仿,常有一滴眼泪和一丝微笑在争夺眼内地盘。#5 从他的口中深处,不时有一股发热的强气流冒出,而他的巨大心脏的博动,那响亮有力的节奏造成强大的共鸣,雷声隆々,将地面、高耸的塔顶和比塔更高的洞壁都震得颤动搖晃不已。 +==注3:「伊尔」为旧时英制长度,合45 英寸。 +==注4: +==注5:典出穆尔诗<爱琳,你眼中的眼泪和微笑>。== + 他穿一件无袖长衣,用新剥牛皮制成,下垂及膝如苏格兰短裙,腰间用一根芦苇毛草编成的绳子束住。裙子下面是鹿皮裤子,用肠线粗缝而成的。他的下肢套着用地衣紫染过的巴尔布里根裹腿,脚上套着盐渍粗牛皮靴子,靴带是同一牲口的气管。他的腰带上 挂着一大串海石子,都随着他那奇特的身体的每一个动作发出哐啷哐啷的声音,上面镌刻着粗犷 而生动的部落艺术人像,都是爱尔兰古代的男女英雄,有:库丘陵、身经百战的康恩、九个人质的尼尔、金克拉的布莱恩王、玛拉基大帝、阿特・麦克墨罗、沙恩・奥尼尔、约翰・墨菲神父、欧文・罗、派特里克・萨斯菲尔德、红色的休・奥唐奈、红色的吉姆・麦克德莫特、Soggarth Eogham O'Growney ,迈克尔・德怀尔、弗朗西・希金斯、亨利・乔伊・迈克拉肯、歌利亚、霍勒斯・惠特利、托马斯・Conneff、佩格・沃芬顿、村铁匠、Captain Moonlight、Captain 杯葛、丹蒂・lighieri、克里斯托费・哥伦布、S fursa(费萨)、S 布伦丹、马歇尔・麦克马洪、查理曼、西奥博尔德・沃尔夫・托恩、马加比家族之母、thelast of Mohicans 、卡斯蒂利亚的罗斯、the Man for Galway(戈尔韦、在蒙特卡洛砸银行的曼、在加普的曼、The Woman who didn't`本杰明・富兰克林、拿破仑・波拿巴、约翰・L・沙利文、克娄巴特拉、Savourneen Deelish、米利叶斯・凯撒、帕拉切尔苏斯、托马斯・利普顿先生、威廉・退尔、米开朗琪罗、海斯、穆罕默德、the bride of Lammermoor、隐士彼得、彼得the Packer、黑姑娘罗莎琳、巴特里克・W・莎士比亚、布赖恩・孔子、Murtagh 谷登堡、巴特里科(Patricio) 贝拉斯克斯、船长内穆(Nemo),特里斯丹和yi 瑟(Isolde)(Isult)、第一任威尔士亲王、 + + + + + + +他身旁放着一支磨尖的花岗岩长矛备用,脚边卧着一头犬族猛兽,它发出的喘hōu声表明它虽已入睡却睡不安稳。足以证明情况确实如此的,是它不时有一些低沉而粗厉的喉音,还有一些抽搐似的动作,都被它的主人用一根旧石器时代制成的粗糙大石棍敲着镇了下去。 + 不管怎么的,特里送来了那三品脱,是约请客,老天在上,俺看见他真掏出一镑钱来,差点儿把眼睛都瞪瞎了。嘿,俺说的可是千真万确的。一枚漂亮的元首。 + ___还有的是呢,他说。 + ___你抢了教堂里的施舍箱吗,约?俺说。 + ___我的血汗钱,约说。是那位谨慎会员给我的消息。#6 +==注6:共济会章程禁止在外人前作有关共济会的「不谨慎的谈话」。 + ___俺遇见你以前也见到他了,俺说。他在辟尔胡同、希腊街那一带转悠,瞪着他的鳕鱼眼珠子数鱼肠子的数目呢。 + 是谁穿过迈肯的土地来了,披着黑貂 的甲冑?奥布卢姆,罗利的儿子:就是他。罗利的儿子,他不知畏惧为何物:他是生性谨慎的人。 + ----是为王子街老太婆,公民说,那份受津贴的机关报#1 在议会会场上受誓言约束的那个政党#2。你们看一看这份倒霉破报纸吧。他说。看一看吧,他说。<爱尔兰独立报>,请你们注意吧,帕内尔创办的为劳动者说话的报纸哩#3。听一听这份<爱尔兰一切为了爱尔兰的独立报>上的出生栏和死亡栏消息吧,我得谢々你们,还有结婚栏。 +==注1:<自由人报>(布卢姆为它拉广告)在王子街,其立场温和接近以地方自治为目标的爱尔兰议会党团,因而被要求彻底独立的民族主义者认为受其津贴。== +==注2:自十九世纪中叶起,英国议会中的爱尔兰议员曾采用起誓联合支持英国两大政党之一的办法,支持条件为该政党采取改善爱尔兰地位的政策,帕内尔在八十年代即运用此战略与英国自由党建立联合阵线,1890 年帕垮台后这一阵线逐渐解体。== +==注3:<爱尔兰独立报>为帕内尔垮台后创建,但至1891 年帕去世后方开始出版,并即为反帕的保守立场。== + 于是他高声念起来: + ___埃克塞特市#4 邦非尔德路戈登;圣安妮海滨伊弗利的雷德曼,威廉・T ・ 雷德曼夫人生一儿子。怎么样,嗯?赖特与弗林特;文森特与吉勒特,司多克威尔市克拉彭路179 号吉勒特府罗莎与故乔治・艾尔弗雷德之女罗瑟・玛莉恩;普莱伍德与黑茲代尔在肯辛顿区圣祖德教堂,由伍斯特教长十分可敬的福里斯特博士证婚。嗯?死亡拦。伦敦白厅胡同布里斯托;纽英顿,斯托克的卡尔,死于胃炎及心脏病;切普斯托区城壕府科克伯恩..... +==注4:「埃克塞特市」为英国地名。以下公民所念均为 + ___我认识那家伙,约说,我亲身受过罪#5。英国地名。== +==注5:「科克伯恩」可理解为「鸡巴疼」,即性病。== + ___科克伯恩。 丁赛,前海军部戴维・丁赛之妻;托顿翰市米勒,终年八十五;利物浦市堪宁街35 号韦尔什,伊莎贝拉・海伦,六月十二日。这就是咱们的民族报纸了,嗯?球!这就是那位班特里奸商马丁・墨菲的贡献了#6,嗯? +==注6:<爱尔兰独立报>业主墨菲,营造业起家为爱尔兰班特里人。== + ___啊,算了吧,约一边传酒一边说。感谢天主,他们抢了先。喝吧,公民。 + ----我喝,他说。 + ----祝你健康,约,俺说。还有在座的各位。 + ----啊!噢!別说话了!俺等那一品脱都等得长青霉了。俺敢对天主起誓,那酒到俺胃里头,俺都听到它落在胃底上的滴嗒声了。 + 瞧呀,正当他们在痛饮欢乐之杯时,一位仪表如神的使者,一位光耀如太阳的俊美青年快步走了进来,而他的身后正走过一位面目高贵、步履庄严的长者,手捧神圣的律卷,跟他一起的是他的贵妇妻子,其出身盖世无双,其容貌娇好无比。 + 小阿尔夫・伯根钻进门来,马上躲进了巴尔尼的小间里头,笑得直不起腰来。角落里还有人坐在那儿呢,俺没有看见,喝醉了人事不知,在那里头打鼾 ,原来是鲍勃・窦 冉。俺不明白是啥事儿,阿尔夫一个劲儿朝门外做手势。老天在上,啥事儿呢原来是背时的老傻瓜丹尼斯・布林,脚上穿一双拖鞋,胳肢窝儿里夹着两本背时的大书,他老婆紧跟在他后头,可怜的倒霉女人,颠得像只小狗似的。阿尔夫简直要爆炸了。 ___你们瞅着他,他说。布林。他把都柏林全市都溜遍了,就因为有人寄给他一张明信片,上边写着卜一上,他要起..... + 他笑得弯下了腰。 + ___起啥?俺问他。 + ___起诉,他说。要一万镑。 + ___见鬼!俺说。 + 背时的杂种狗开始发出低沉的吼声,那声音叫你听着毛骨耸然的感到要出事,可是公民对他肚子上踢了一脚。 + ___安静,他说。 + ___谁?约说。 + ___布林,阿尔夫说。他先到约翰・亨利・门顿那儿,然后绕到考立斯_沃德事务所,然后汤姆・罗奇福德碰见他,把他支到付长官办公处去找乐子去了。天主哪,我可是笑得肚皮痛了。卜一:上。长家伙狠々地瞪了他一眼, 现在背时的老白痴到格林街找侦探去了。 + ___长约翰什么时候绞死蒙乔伊监狱里那家伙?乔说。 + ___伯根,鲍勃・窦 冉说,他醒了。是阿尔夫・伯根吗? + ----是,阿尔夫说。绞死吗?等我给你们瞧。喂,特里,给咱们一小杯。那个背时的老笨蛋。一万镑 +呢。长约翰那个瞪着大眼睛的劲儿,才好看呢。卜一..... + 他又笑起来了。 + ___你笑谁?鲍勃・窦冉说。你是伯根吗? + ___快点儿,特里 ,阿尔夫说。 + 特伦斯・奥赖恩听到他的话,立即送来一只水晶杯,满々地装着乌黑起沫的爱尔啤酒,那是酒老板艾弗和酒老板阿迪朗两位孪生兄弟#1不停地在他们的仙酒缸里酿造的,其干炼可比长生不老的勒达的儿子们#2。因为他们善于采集啤酒花鲜美多汁的浆果,将之集堆、筛选、捣碎、酿造,再掺入酸汁,然后将此酒汁用圣火加热,这两位干练的弟兄日夜不停,两位酿酒的大王。 +==注1:艾弗和阿迪朗即第五章提到的两贵族兄弟(并非孪生),为吉尼斯啤酒厂老板。== +==注2:勒达为希腊神话中仙女,与化作天鹅的大神宙斯相亲而生二儿二女,二儿一善驯马,一善拳击。== + 于是你,生来就侠义的特伦斯,捧出那神仙饮料,用水晶杯子献给那口渴的人,那俊美如神的侠义人物。 然而他,那奥伯根族的年轻族长,决不容忍別人的慷慨行为超过自己,因而仪态大方地放下一枚以最贵重的青铜铸成的宝币。币面有精致浮雕凸像,是一位尊贵无比的女王,她是不伦瑞克贵族后裔#3,名维多利亚,凭天主之恩宠而为不列颠、爱尔兰、以及不列颠海外领地联合王国最优秀的女王殿下,宗教信仰的保护者,印度的女皇帝,她是许多民族的统治者,众人热烈爱戴的胜利者,从太阳升起的地方到太阳落下的地方,浅色的、深色的、红色的、黑色的人,统々都熟悉她、爱戴她。 +==注3:「宝币」即便士,上有维多利亚女王像,其祖父英王乔治三世为德国不伦瑞克公爵之后。== + ___那个背时的共济会员在外面溜来溜去干什么?公民说。 + ___这是什么?约说。 + ___给,阿儿夫一面扔过钱去一面说。刚才谈到绞刑,我有一些你们从来没有见过的东西给你们看。刽子手书信。看这些。 [247] + 他从口袋里掏出一扎连封代瓤儿的信件来。 + ___你胡扯吧?俺说。 + ___骗你不是人,阿尔夫说。你们自己看信。 + 约就拿起了信件来。 + ___你笑的是谁?鲍勃・窦 冉说。 + 俺砸摸要出点子麻烦,鲍勃肚子里酒泛上来可是个怪角色,所以俺没话找话地说: + ___威利・默里近来怎么样,阿尔夫? + ___我不知道,阿尔夫说。刚才我还在卡佩尔大街上看见他呢,他和派迪・狄格南在一起。不过我正跟着那个..... + ___你什么?约扔下信件说。和谁在一起? + ___和狄格南呀,阿尔夫说。 + ___是派迪吗?约说。 + ___对呀,阿尔夫说。怎么啦? + ___你不知道他死了吗?约说。 + ___派迪・狄格南死了!阿尔夫说。 + ___对了,约说。 + ___肯定我刚见到他的,五分钟还不到呢,阿尔夫说。明々白々的。 + ___谁死了?鲍勃・窦 冉说。 + ___那么你看见了他的鬼魂,约说。求天主保佑我们莫遭灾祸。 + ___什么?阿尔夫说。好基督呀,刚々五.....什么?.....而且威利・默里还和他在一起呢,两个人在靠近那家叫什么的.....什么?狄格南死了? + ___狄格南怎么了?鲍勃・窦 冉说。谁说的.....? + ___死了!阿尔夫说。他和你们一模一样地活着呢。 + ___也许这样,约说。可是,人们今天上午可不客气,把他埋了。 + ___派迪?阿尔夫说。 + ___对了,约说。他还清了他的人生债,天主慈悲他吧。 + ___好基督呀!阿尔夫说。 + 老天在上,他可真是你所谓的目瞪口呆了。 + 在那幽暗之中,可以感觉到幽灵的手在微々颤动,而按照密宗经典所作的祷告送达应达处之后#1,逐渐可以见到一股宝石红光隐约出现并越来越亮。由于头顶和脸部都放射吉瓦光,虚灵体呈现格外逼真形象#2。信息交流是通过脑下垂体实现的。也利用骶区与腹腔神经丛所发出的桔黄色与紫红色光线。喊他的地上名字问他现在天上何处,他表示现在正走上Pra/aya 或回归之途#3,但仍受超感觉层中较低层次上某些嗜血成分的困扰。问他越过人世界线之后最初有何感受,他表示原来看去如隔暗玻璃,然而已经超越界线的人,眼前就展开了最广阔的发展阿特曼的机会#1。问他那边的生活是否和我们的肉体生活相仿,他表示,他听灵体经验已较丰富的说,他们的住所拥有各种各样现代家庭舒适生活设备,诸如Talafana ,alavatar,atakalda,wataklasat#2应有尽有,而最高级的里手则浸沉于最纯洁的欣心浪潮之中。这时一夸脱的酪 乳应其要求送到,显然正解其渴。问他对生者有什么嘱咐,他劝告一切尚未摆脱玛耶的人#3,应认清真正道路,因为天道中人都已获得消息,现在火星和木星已出来在白羊星势力所在的东角捣乱。又问逝世者有无特殊愿望,回答是:{我们向你们仍在肉体中生活的地上朋友们致意。请注意康・凯勿推跺。}据了解,康・凯即康尼利厄斯・凯莱赫先生,他是颇受欢迎的奥尼尔夕宾仪馆的经理,死者的朋友,这次的安葬就是他安排的。临走他要求嘱咐他的亲爱的儿子派齐,他找不到的另一只靴子,现在小屋内的马桶箱下,这双靴子应送卡伦皮鞋店换底,后跟尚好不比换。他表示,这事使他在彼域心情异常不安,务请转达他的愿望。他在得到这事一定办到的保证后,表示十分满意。 +==注1:「密宗经典」为印度教经典,为欧美通神学等玄理派別所信奉。== +==注2:「吉瓦」为印度教用语,指灵魂之活力;「虚灵体」为通灵学用语,与「「实密体」相结合而成人,人出生时虚灵体比实密体出现早,人死亡时虚灵体并不立即消灭,因而灵魂有再生之可能。== +==注3:Pralaya 为通灵学梵 文术语,指人死后灵魂休养生息期。== +==注1:「阿特曼」为通灵学用语,指人的最内在的本质。== +==注2:仿梵文(因通灵学派崇尚梵文)的英语讹体:「电话、电梯、热冷(水)、卫生间」。== +==注3:「耶玛」为印度教术语,意为虚幻。== + 奥狄格南呀,我们的朝阳,他离开尘俗世界而去了。额角放光的派特里克呀,当初他在蕨丛间奔跑的脚步是何等轻疾!号哭吧,班芭#4,刮起你的风来;号哭吧,海洋呀,刮起你的旋风来。 +==注4:「班芭」为传闻中最早开辟爱尔兰的三姐妹之一,常被奉为司死亡女神。== + ___他又来了,公民瞪着门外说。 + ___谁?俺说。 + ___布卢姆,他说。他在那儿来回站岗放哨足有十分钟了。 + 可不吗,老天在上。俺瞅见他探头探脑的张望一下又溜开去了。 + 小阿尔夫可傻了眼。说真格的,傻了眼。 + ___好基督呀!他说。我能起誓,就是他。 + 鲍勃・窦 冉把帽子推在后脑壳上,这家伙灌足了酒,可算得上是都柏林最凶恶的恶棍了,他说: + ___谁说基督是好的? + ___你说的是什么话,阿尔夫说。 + 谁把可怜的小个儿威利・狄格南弄走了,鲍勃・窦 冉说,还算是个好基督吗? + ----哎呀,阿尔夫说着,想把事情对付过去算了。他总算把烦恼都结束了。 + 可是鲍勃・窦 冉大喊大叫的不答应。 + ___我说,谁把可怜的小个儿威利・狄格南弄走,谁就是个大混蛋! + 特里走过来。给他使了个眼色叫他安静,说他们这里是个有执照的体面酒店,店内不能容许这样的话语。于是鲍勃・窦 冉哭起派迪・迪格南来,一点儿也不假。 + ___天下最好的人哪,他抽々噎々地说,最纯洁的人品呀。 + 眼泪说来就来。信口开河。顶好快回家去,去找他娶的那位喜欢梦游的小母狗吧,追屁股法警穆尼那个女儿,她娘在哈德威克街管一所公寓房子,班塔姆・莱昂斯在那儿住过,他说她半夜两点钟一丝不 挂 地在楼梯平台上溜达,赤身露体让人看,来者不拒,不偏不倚 +一律欢迎。 + ___最高贵,最真诚的,他说。他就这么的走了,可怜的小个子派迪・狄格南呀。 + 他用沉重心情和悲伤的眼泪,哀悼这那上天之光的陨灭。 + 老狗加里文又开始发出低沉的吼声,这回是对门边窥探的布卢姆。 + ___进来吧,怎么啦,公民说。它不会吃掉你的。 + 于是布卢姆把鳕鱼眼睛盯住了那条狗,侧着身子踅了进来。他问特里,马丁・坎宁安在不在。 + 唷,基督麦基翁!约看那些信件之一说。你们要不要听一听 +这个? + 他读起信来。 + ___{呈都柏林 + 都柏林行政长官 +敬启者小人愿为上述痛心案件效力小人曾于1900 年2 月12 日布特尔监狱绞死约・盖恩小人又.....} + ___让俺们看吧,约,俺说。 + ___{在彭顿维尔监狱绞死残杀洁细・贴尔悉特的列兵阿瑟・蔡斯小人又.....} + ___耶稣 呀,俺说。 + ___{.....在比林顿处决极恶的杀人犯托德・史密斯时任助手.....} + 公民伸手抢信。 + ___等着,约说。{小人套绞索有妙法套住出不来希望录用小人向长官小人费用五畿尼。 + 利物浦亨特街七号剃头师傅 + 哈・郎博尔德} ___一个杀人不眨眼的蛮子,公民说。 + ___那小子写的不成东西,又糟又乱的,约说。拿走吧,阿尔夫,拿得远々的。哈喽,布卢姆,你要什么? 于是他们两讨论起这一点来了,布卢姆说不想要什么不能要什么请原谅没有別的意思等々,然后他说好吧,他要一支雪加。老天,他真是个谨慎会员,没错儿。 + ___特里,把你那些头等臭货给我们来一支,约说。 + 阿尔夫这时在给俺们讲,有一个家伙寄来一张带黑框的报丧卡片。 [250] + ___都是黑国剃头的,他说。只要付他们五镑现金加旅费,他们连自己的老子也愿意绞死的。 + 他还告诉俺们,底下还有两个家伙等着,只等他从活板口坠下,马上抓住他的脚后跟往下拽,周到地道地叫他断气,事情完了还把绳索剁断分段卖掉,一个脑袋能卖几个先令。 + 在那黑暗的国土上,居住着复仇心切的剃刀骑士们。他们手抓致人死命的绳索;是的,不管是谁有血案,他们都用这圈将他套往埃里伯斯#1,因为那是我绝不容许的,主是这样说的。 +==注1:「埃里伯斯」为希腊神话中人世与冥府之间的幽暗世界。 + 于是他们开始谈论死刑问题,布卢姆当然就拿出了他那些原因喽、理由喽等々一大套有关的鳕鱼理论,那条狗可是不断地嗅他。有人跟俺念叨过,这些犹太佬让狗闻着有一种特殊的气味, + ___有一样东西是它起不了作用的,阿尔夫说。 + ___什么东西?约说。 + ___被绞死的倒霉蛋的家伙,阿尔夫说。 + ___真的吗? + ___一点儿也不假,阿尔夫说。我听基尔曼汉监牢绞死无敌会的约布雷迪那时的狱长说的。他告诉我,他们绞过之后,把他放下的时候那玩意儿对着他们的脸直立着,像一根拨火棍儿似的。 + ___有人说过,热情如炽 ,至死不休,约说。 + ___这里可以用科学解释的,布卢姆说。它不过是一种自然现象,你们不明白吗,因为由于..... + 于是他说起了他那些绕脖子话头儿来了,又是现象又是科学,这个现象啦那个现象的。 + 杰出科学家卢依波尔德・布卢门德夫特教授先生已提出医学根据阐明,依照最获医学界赞许的科学传统,急性颈椎骨 折及其导致的脊髓横断可被认为必将对人体内生殖器官神经中枢产生强烈的神经节刺激,致使cor +pora caver-nosa#2 中弹性细孔迅速扩张,血流瞬即畅通,流入人体结构内所谓阴茎即男性器官部分,从而形成医学界所谓in articulo mortis per diminutionemcapitis #3 病态上升涨大的繁殖性勃起现象。 +==注2:拉丁文:「海绵体」。== +==注3:拉丁文:「死亡时断颈所致」。== + + + + + 杰出科学家卢依波尔德@布卢门德夫特教授先生已提出医学根据, +阐明依照最获赞许的医学传统,颈椎cㄤ 折与后随脊髓断裂可被认为 +必将对人体内生殖器官神经中枢造成神经节强烈刺激,致使corpora caver- +nosa#2 中弹性细孔迅速扩张,血流瞬即畅通,流入人体结构内所谓阴颈 +即男性器官部分,从而形成医学界所谓in articulo mortis per diminutionem +capitis #3 病态上升涨大的繁殖性勃起现象。 +==注2:拉丁文:「海绵体」。== +==注3:拉丁文:「死亡时断颈所致」。== + diff --git a/intl/icu/source/extra/uconv/samples/utf8/croat.txt b/intl/icu/source/extra/uconv/samples/utf8/croat.txt new file mode 100644 index 0000000000..406e264c0a --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/croat.txt @@ -0,0 +1,2 @@ +Dobar dan! Kako ste? Ja sam Marina +Dodigović. Kako se Vi zovete? diff --git a/intl/icu/source/extra/uconv/samples/utf8/danish.txt b/intl/icu/source/extra/uconv/samples/utf8/danish.txt new file mode 100644 index 0000000000..70111d290f --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/danish.txt @@ -0,0 +1,18 @@ + Foto ______ det formodede fødehus, hjørnet ______ Hans Jensensstræde og Bangs Boder. + (H. C. Andersens Hus, Odense). + +BARNDOMSOMGIVELSERNE</b> + Skønt H. C. Andersens barndomsomgivelser var meget fattige, blev de i hans rige fantasi + solbeskinnede. + Der findes en mandtalsliste fra nogle få år ______ H. C. Andersens fødsel. Den er ______ 1801 + og den giver klare oplysninger om, hvor mange der boede ______ Odense og hvad de var + beskæftigede ______. Den omfatter 1199 husstande. Hvis man fordeler disse ______ erhverv, + får man 102 embeds- og bestillingsmænd, 26 officerer, 12 der beskæftiger sig med + immaterielle erhverv, 81 som lever ______ handel, 36 værtshusholdere, 460 håndværkere, 39 + avlsmænd og urtemænd, 121 soldater, 97 daglejere, 139 enlige kvinder og + almissemedlemmer, 29 pensionister og rentenydere. + H. C. Andersens forældre tilhørte samfundets laveste lag. Faderen var friskomager, og + når han meldte sig ______ militærtjeneste ______ Napoleons side, har det nok ikke så meget været + idealisme som praktisk økonomi. For at sikre sig en soldats værgeløn. Han kom ikke + længere end ______ Holsten. Han fik høj feber og måtte sendes hjem. Da han kom hjem, + forværredes sygdommen og han døde. diff --git a/intl/icu/source/extra/uconv/samples/utf8/greek.txt b/intl/icu/source/extra/uconv/samples/utf8/greek.txt new file mode 100644 index 0000000000..383222ea10 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/greek.txt @@ -0,0 +1,44 @@ +<html lang="gr" dir="ltr"> +<head> +<meta http-equiv="content-type" content="text/html; charset=UNICODE-1-1-UTF-8"> +</head> +<body> + + + +[σταρτ ηερε] ΓΕΝΕΣΙΣ 1-3 +Ψηαπτερ 1 +<br> +1. +ͲΕν αρχη εποιησεν ο θεος τον ουρανον και την γην. +<br> +2. +η δε γη ην +<br> +αορατος και ακατασκευαστος, και σκοτος επανω της αβυσσου, και +<br> +πνευμα θεου επεφερετο επανω του υδατος. +<br> +3. +και ειπεν ο θεος +<br> +Γενηθητω φως. και εγενετο φως. +<br> +4. +και ειδεν ο θεος το φως οτι +<br> +καλον. και διεχωρισεν ο θεος ανα μεσον του φωτος και ανα μεσον +<br> +του σκοτους. +<br> +5. +και εκαλεσεν ο θεος το φως ημεραν και το σκοτος +<br> +εκαλεσεν νυκτα. και εγενετο εσπερα και εγενετο πρωι, ημερα μια. +<br> + + +</body> +</html> + + diff --git a/intl/icu/source/extra/uconv/samples/utf8/hangul.txt b/intl/icu/source/extra/uconv/samples/utf8/hangul.txt new file mode 100644 index 0000000000..3e8071c4a9 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/hangul.txt @@ -0,0 +1,2 @@ +도서관 +한국 diff --git a/intl/icu/source/extra/uconv/samples/utf8/hania.txt b/intl/icu/source/extra/uconv/samples/utf8/hania.txt new file mode 100644 index 0000000000..c7e36c6f75 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/hania.txt @@ -0,0 +1,2 @@ +圖書館 +韓國
\ No newline at end of file diff --git a/intl/icu/source/extra/uconv/samples/utf8/jap.txt b/intl/icu/source/extra/uconv/samples/utf8/jap.txt new file mode 100644 index 0000000000..47a6b3eca5 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/jap.txt @@ -0,0 +1,37 @@ +ウインカリッスの日本語 + 「ユーザーフレンドリ」|と|呼ばれる| + りゆうは、ごく |普通の|人がごく| + 関単に |使用 |方法を|マスター|でき、| + 毎日の|タイプ | の |仕事にすぐ | + 役立てることができることからきている。| + よばれる - 呼ばれる + りゅうは、ごく - 理由は、ごく + ふつうの - 普通の + ひとがごく - 人がごく + かんたんに - 簡単に + しよう - 使用 + ほうほうを - 方法を + まいにちの - 毎日の + しごとにすぐ - 仕事にすぐ + やくだてることができることからきている。- + 役立てることができることからきている。 +あ い う え お +ぁ ぃ ぅ ぇ ぉ +か き く け こ きゃ きゅ きょ +さ し す せ そ しゃ しゅ しぇ しょ +た ち つ て と ちゃ ちゅ ちぇ ちょ +な に ぬ ね の にゃ にゅ にょ +は ひ ふ へ ほ ひゃ ひゅ ひょ + ふぁ ふぃ ふぇ ふぉ +ま み む め も みゃ みゅ みょ +や (い) ゆ (え) よ ゃ ゅ ょ +ら り る れ ろ りゃ りゅ りょ +わ ゐ (う) ゑ を ゎ ヰ ヱ +ん ゔ っ カ ケ +が ぎ ぐ げ ご ぎゃ ぎゅ ぎょ + ぐゎ ぐぇ +ざ じ ず ぜ ぞ じゃ じゅ じぇ じょ +だ ぢ づ で ど ぢゃ ぢぃ ぢゅ ぢぇ ぢぉ +ば び ぶ べ ぼ びゃ びゅ びょ +ぱ ぴ ぷ ぺ ぽ ぴゃ ぴゅ ぴょ +ワ ウィ ウ ウェ ウォ ヴァ ヴィ ヴ ヴェ ヴォ diff --git a/intl/icu/source/extra/uconv/samples/utf8/korean.txt b/intl/icu/source/extra/uconv/samples/utf8/korean.txt new file mode 100644 index 0000000000..bc2c3dccb6 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/korean.txt @@ -0,0 +1,20 @@ +중앙일보 - 사건/사회 - 극지탐험 협회결 +극지탐험 협회결성 체계적 연구 + 지구상의 3대 극지라 불리는 남극·북극·에베레스트를 한번이라 +도 다녀와야 정회원으로 들어갈 수 있는 한국극지협회가 발족다 +.윤석순(한·러시아극동협회 상임고문)씨과 홍석하(사람과 산 발 +행인)씨가 극지탐험과 이곳에서의 학술연구를 체계적 으로 해보자 +는데 뜻을 같이하고 협회결성에 나섰다. + 이 협회는 지난 16일 호텔신라에서 20여명의 준비위원이 참 +석한 가운데 준비위원회를 가졌으며 내년 3월 정식 출범한다. + 이 협회에는 3극오지를 모두 밟은 세계적인 산악인 허영호씨를 +비롯해 에베레스트를 올랐던 엄홍길·박영석·정승권씨 등 국내의 +저명한 산악인들이 회원으로 참여할 것으로 보인다. + 극지협회는 극지탐험가는 물론 학계·경제계인사들도 참가시킬 계 +획이다. 단순한 탐험차원을 넘어 지구상에 마지막 남은 자원의 +보고인 극지에서의 연구활동도 하겠다는 의미다. + 한국극지협회가 발족하면 우리 극지탐험이 체계화될 것으로 기대 +되고있다.협회는 극지관련자료를 데이터베이스로 축적해 오지탐험가 +들에게 제공할 계획이다. +입력시간 +ⓒ 중앙일보사 diff --git a/intl/icu/source/extra/uconv/samples/utf8/linji.txt b/intl/icu/source/extra/uconv/samples/utf8/linji.txt new file mode 100644 index 0000000000..8eb0ec003f --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/linji.txt @@ -0,0 +1,918 @@ +*臨濟語錄 +鎮州臨濟慧照禪師語錄序。 +延康殿學士金紫光祿大夫真。 +定府路安撫使兼馬步軍都總。 +管兼知成德軍府事馬防、撰。 +黃檗山頭、曾遭痛棒。大愚肋下、方解築拳。饒 +舌老婆、尿床鬼子。這風顛漢、再捋虎鬚。巖谷 +栽松、後人標榜。钁頭斸地、幾被活埋。肯箇後 +生、驀口自摑。辭焚机案、坐斷舌頭。不是河南、 +便歸河北。院臨古渡、運濟往來。把定要津、壁 +立萬仞。奪人奪境、陶鑄仙陀。三要三玄、鈐鎚 +衲子。常在家舍、不離途中。無位真人、面門出 +入。兩堂齊喝、賓主歷然。照用同時、本無前 +後。菱花對像、虛谷傳聲。妙應無方、不留朕 +跡。拂衣南邁、戾止大名。興化師承、東堂迎 +侍。銅瓶鐵C、掩室杜詞。松老雲閑、曠然自 +適。面壁未幾、密付將終。正法誰傳、瞎驢邊 +滅。圓覺老演、今為流通。點撿將來、故無差 +舛。唯餘一喝、尚要商量。具眼禪流、冀無賺 +舉。宣和庚子中秋日謹序。 +鎮州臨濟慧照禪師語錄。 +住三聖嗣法小師慧然集。 +府主王常侍、與諸官請師升座。師上堂云、山 +僧今日事不獲已、曲順人情、方登此座。若約 +祖宗門下、稱揚大事、直是開口不得、無爾措 +足處。山僧此日以常侍堅請、那隱綱宗。還有 +作家戰將、直下展陣開旗麼。對眾證據看。僧 +問、如何是佛法大意。師便喝。僧禮拜。師云、 +這箇師僧、卻堪持論。問、師唱誰家曲、宗風嗣 +阿誰。師云、我在黃蘗處、三度發問、三度被打。 +僧擬議。師便喝、隨後打云、不可向虛空A釘 +橛去也。有座主問、三乘十二分教、豈不是明 +佛性。師云、荒草不曾鋤。主云、佛豈賺人也。 +師云、佛在什麼處。主無語。師云、對常侍前、擬 +瞞老僧。速退速退。妨他別人諸問。復云、此日 +法筵、為一大事故。更有問話者麼。速致問來。 +爾纔開口、早勿交涉也。何以如此。不見釋尊 +云、法離文字、不屬因不在緣故。為爾信不及、 +所以今日葛藤。恐滯常侍與諸官員、眛他佛 +性。不如且退。喝一喝云、少信根人、終無了日。 +久立珍重。 +師、因一日到河府。府主王常侍、請師升座。 +時麻谷出問、大悲千手眼、那箇是正眼。師云、 +大悲千手眼、那箇是正眼、速道速道。麻谷拽 +師下座、麻谷卻坐。師近前云、不審。麻谷擬 +議。師亦拽麻谷下座、師卻坐。麻谷便出去。師 +便下座。 +上堂。云、赤肉團上有一無位真人、常從汝等 +諸人面門出入。未證據者看看。時有僧出問、 +如何是無位真人。師下禪床、把住云、道道。其 +僧擬議。師托開云、無位真人是什麼乾屎橛。 +便歸方丈。 +上堂。有僧出禮拜。師便喝。僧云、老和尚莫 +探頭好。師云、爾道落在什麼處。僧便喝。又有 +僧問、如何是佛法大意。師便喝。僧禮拜。師 +云、爾道好喝也無。僧云、草賊大敗。師云、過 +在什麼處。僧云、再犯不容。師便喝。是日兩堂 +首座相見、同時下喝。僧問師、還有賓主也無。 +師云、賓主歷然。師云、大眾、要會臨濟賓主句、 +問取堂中二首座。便下座。 +上堂。僧問、如何是佛法大意。師豎起拂子。 +僧便喝。師便打。又僧問、如何是佛法大意。師 +亦豎起拂子。僧便喝。師亦喝。僧擬議。師便 +打。師乃云、大眾、夫為法者、不避喪身失命。我 +二十年、在黃蘗先師處、三度問佛法的的大 +意、三度蒙他賜杖。如蒿枝拂著相似。如今更 +思得一頓棒喫。誰人為我行得。時有僧出眾 +云、某甲行得。師拈棒與他。其僧擬接。師便打。 +上堂。僧問、如何是劍刃上事。師云、禍事、禍 +事。僧擬議。師便打。問、祇如石室行者、踏碓忘 +卻移腳、向什麼處去。師云、沒溺深泉。師乃 +云、但有來者、不虧欠伊。總識伊來處。若與麼 +來、恰似失卻。不與麼來、無繩自縛。一切時中、 +莫亂斟酌。會與不會、都來是錯。分明與麼道。 +一任天下人貶剝。久立珍重。 +上堂。云、一人在孤峰頂上、無出身之路。一 +人在十字街頭、亦無向背。那箇在前、那箇在 +後。不作維摩詰、不作傅大士。珍重。 +上堂。云、有一人、論劫在途中、不離家舍。有 +一人、離家舍、不在途中。那箇合受人天供養。 +便下座。 +上堂。僧問、如何是第一句。師云、三要印開 +朱點側、未容擬議主賓分。問、如何是第二 +句。師云、妙解豈容無著問、漚和爭負截流機。 +問、如何是第三句。師云、看取棚頭弄傀儡、抽 +牽都來A有人。師又云、一句語須具三玄 +門、一玄門須具三要、有權有用。汝等諸人、作 +麼生會。下座。 +師晚參示眾云、有時奪人不奪境、有時奪境 +不奪人、有時人境俱奪、有時人境俱不奪。時、 +有僧問、如何是奪人不奪境。師云、煦日發生 +鋪地錦、瓔孩垂髮白如絲。僧云、如何是奪境 +不奪人。師云、王令已行天下遍、將軍塞外絕 +煙塵。僧云、如何是人境兩俱奪。師云、并汾絕 +信、獨處一方。僧云、如何是人境俱不奪。師云、 +王登寶殿、野老謳歌。師乃云、今時學佛法者、 +且要求真正見解。若得真正見解、生死不染、 +去住自由。不要求殊勝、殊勝自至。道流、祇如 +自古先德、皆有出人底路。如山僧指示人處、 +祇要爾不受人惑。要用便用、更莫遲疑。如今 +學者不得、病在甚處。病在不自信處。爾若自 +信不及、即便忙忙地徇一切境轉、被他萬 +境回換、不得自由。爾若能歇得念念馳求心、 +便與祖佛不別。爾欲得識祖佛麼。祇爾面前 +聽法底是。學人信不及、便向外馳求。設求得 +者、皆是文字勝相、終不得他活祖意。莫錯、諸 +禪德。此時不遇、萬劫千生、輪回三界、徇好 +境掇去、驢牛肚A生。道流、約山僧見處、與釋 +迦不別。今日多般用處、欠少什麼。六道神光、 +未曾間歇。若能如是見得、祇是一生無事人。 +大德、三界無安、猶如火宅。此不是爾久停住 +處。無常殺鬼、一剎那間、不揀貴賤老少。爾要 +與祖佛不別、但莫外求。爾一念心上清淨光、 +是爾屋A法身佛。爾一念心上無分別光、是 +爾屋A報身佛。爾一念心上無差別光、是爾 +屋A化身佛。此三種身、是爾即今目前聽法 +底人。祇為不向外馳求、有此功用。據經論家、 +取三種身為極則。約山僧見處、不然。此三種 +身是名言、亦是三種依。古人云、身依義立、土 +據體論。法性身、法性土、明知是光影。大德、爾 +且識取弄光影底人、是諸佛之本源、一切處 +是道流歸舍處。是爾四大色身、不解說法聽 +法。脾胃肝膽、不解說法聽法。虛空不解說法 +聽法。是什麼解說法聽法。是爾目前歷歷底、 +勿一箇形段孤明、是這箇解說法聽法。若如 +是見得、便與祖佛不別。但一切時中、更莫間 +斷、觸目皆是。祇為情生智隔、想變體殊、所以 +輪回三界、受種種苦。若約山僧見處、無不甚 +深、無不解脫。道流、心法無形、通貫十方。在眼 +曰見、在耳曰聞、在鼻嗅香、在口談論、在手執 +捉、在足運奔。本是一精明、分為六和合。一 +心既無、隨處解脫。山僧與麼說、意在什麼處。 +祇為道流一切馳求心不能歇、上他古人閑 +機境。道流、取山僧見處、坐斷報化佛頭、十地 +滿心、猶如客作兒、等妙二覺、擔枷鎖漢、羅漢 +辟支、猶如廁穢、菩提涅槃、如繫驢橛。何以如 +此、祇為道流不達三祇劫空、所以有此障礙。 +若是真正道人、終不如是。但能隨緣消舊業、 +任運著衣裳、要行即行、要坐即坐、無一念心 +希求佛果。緣何如此。古人云、若欲作業求佛、 +佛是生死大兆。大德、時光可惜。祇擬傍家波 +波地、學禪學道、認名認句、求佛求祖、求善知 +識意度。莫錯、道流。爾祇有一箇父母、更求何 +物。爾自返照看。古人云、演若達多失卻頭、求 +心歇處即無事。大德、且要平常、莫作模樣。有 +一般不識好惡禿奴、便即見神見鬼、指東劃 +西、好晴好雨。如是之流、盡須抵債、向閻老前、 +吞熱鐵丸有日。好人家男女、被這一般野狐 +精魅所著、便即捏怪。瞎屢生、索飯錢有日 +在。 +師示眾云、道流、切要求取真正見解、向天下 +橫行、免被這一般精魅惑亂。無事是貴人。但 +莫造作、祇是平常。爾擬向外傍家求過、覓腳 +手。錯了也。祇擬求佛、佛是名句。爾還識馳求 +底麼。三世十方佛祖出來、也祇為求法。如今 +參學道流、也祇為求法。得法始了。未得、依前 +輪回五道。云何是法。法者是心法。心法無形、 +通貫十方、目前現用。人信不及、便乃認名認 +句、向文字中、求意度佛法。天地懸殊。道流、山 +僧說法、說什麼法。說心地法。便能入凡入聖、 +入淨入穢、入真入俗。要且不是爾真俗凡聖、 +能與一切真俗凡聖、安著名字。真俗凡聖、與 +此人安著名字不得。道流、把得便用、更不著 +名字、號之為玄旨。山僧說法、與天下人別。祇 +如有箇文殊普賢、出來目前、各現一身問法、 +纔道咨和尚、我早辨了也。老僧穩坐、更有道 +流、來相見時、我盡辨了也。何以如此。祇為我 +見處別、外不取凡聖、內不住根本、見徹更不 +疑謬。 +師示眾云、道流、佛法無用功處、祇是平常無 +事。屙屎送尿、著衣喫飯、困來即臥。愚人笑 +我、智乃知焉。古人云、向外作工夫、總是癡 +頑漢。爾且隨處作主、立處皆真。境來回換不 +得。縱有從來習氣、五無間業、自為解脫大海。 +今時學者、總不識法、猶如觸鼻羊、逢著物安 +在口A。奴郎不辨、賓主不分。如是之流、邪心 +入道、鬧處即入。不得名為真出家人、正是真 +俗家人。夫出家者、須辨得平常真正見解、辨 +佛辨魔、辨真辨偽、辨凡辨聖。若如是辨得、名 +真出家。若魔佛不辨、正是出一家入一家。喚 +作造業眾生、未得名為真出家。祇如今有一 +箇佛魔、同體不分、如水乳合、鵝王喫乳。如明 +眼道流、魔佛俱打。爾若愛聖憎凡、生死海A +浮沈。 +問、如何是佛魔。師云、爾一念心疑處是魔。 +爾若達得萬法無生、心如幻化、更無一塵一 +法、處處清淨是佛。然佛與魔、是染淨二境。約 +山僧見處、無佛無眾生、無古無今、得者便得、 +不歷時節。無修無證、無得無失。一切時中、更 +無別法。設有一法過此者、我說如夢如化。山 +僧所說皆是。道流、即今目前孤明歷歷地聽 +者、此人處處不滯、通貫十方、三界自在。入一 +切境差別、不能回換。一剎那間、透入法界、逢 +佛說佛、逢祖說祖、逢羅漢說羅漢、逢餓鬼說 +餓鬼。向一切處、游履國土、教化眾生、未曾離 +一念。隨處清淨、光透十方、萬法一如。道流、大 +丈夫兒、今日方知本來無事。祇為爾信不及、 +念念馳求、捨頭覓頭、自不能歇。如圓頓菩 +薩、入法界現身、向淨土中、厭凡忻聖。如此之 +流、取捨未忘、染淨心在。如禪宗見解、又且不 +然。直是現今、更無時節。山僧說處、皆是一期 +藥病相治、總無實法。若如是見得、是真出家、 +日消萬兩黃金。道流、莫取次被諸方老師印 +破面門、道我解禪解道。辯似懸河、皆是造地 +獄業。若是真正學道人、不求世間過、切急 +要求真正見解。若達真正見解圓明、方始了 +畢。 +問、如何是真正見解。師云、爾但一切入凡入 +聖、入染入淨、入諸佛國土、入彌勒樓閣、入毘 +盧遮那法界、處處皆現國土、成住壞空。佛出 +于世、轉大法輪、卻入涅槃、不見有去來相貌。 +求其生死、了不可得。便入無生法界、處處游 +履國土、入華藏世界、盡見諸法空相、皆無 +實法。唯有聽法無依道人、是諸佛之母。所 +以佛從無依生。若悟無依、佛亦無得。若如是 +見得者、是真正見解。學人不了、為執名句、被 +他凡聖名礙、所以障其道眼、不得分明。祇如 +十二分教、皆是表顯之說。學者不會、便向表 +顯名句上生解。皆是依倚、落在因果、未免三 +界生死。爾若欲得生死去住、脫著自由、即今 +識取聽法底人。無形無相、無根無本、無住處、 +活撥撥地。應是萬種施設、用處祇是無處。 +所以覓著轉遠、求之轉乖。號之為祕密。道流、 +爾莫認著箇夢幻伴子。遲晚中間、便歸無常。 +爾向此世界中、覓箇什麼物作解脫。覓取一 +口飯喫、補毳過時、且要訪尋知識。莫因循逐 +樂。光陰可惜、念念無常。H則被地水火風、細 +則被生住異滅四相所逼。道流、今時且要識 +取四種無相境、免被境擺撲。 +問、如何是四種無相境。師云、爾一念心疑、被 +地來礙。爾一念心愛、被水來溺。爾一念心嗔、 +被火來燒。爾一念心喜、被風來飄。若能如是 +辨得、不被境轉、處處用境。東涌西沒、南涌北 +沒、中涌邊沒、邊涌中沒、履水如地、履地如水。 +緣何如此。為達四大如夢如幻故。道流、爾祇 +今聽法者、不是爾四大、能用爾四大。若能如 +是見得、便乃去住自由。約山僧見處、勿嫌底 +法。爾若愛聖、聖者聖之名。有一般學人、向五 +臺山A求文殊。早錯了也。五臺山無文殊。爾 +欲識文殊麼。祇爾目前用處、始終不異、處處 +不疑、此箇是活文殊。爾一念心無差別光、處 +處總是真普賢。爾一念心自能解縛、隨處解 +脫、此是觀音三昧法。互為主伴、出則一時 +出。一即三、三即一。如是解得、始好看教。 +師示眾云、如今學道人、且要自信。莫向外覓。 +總上他閑塵境、都不辨邪正。祇如有祖有佛、 +皆是教跡中事。有人拈起一句子語、或隱顯 +中出、便即疑生、照天照地、傍家尋問、也大 +忙然。大丈夫兒、莫祇麼論主論賊、論是論 +非、論色論財、論說閑話過日。山僧此間、不論 +僧俗、但有來者、盡識得伊。任伊向甚處出來、 +但有聲名文句、皆是夢幻。卻見乘境底人、是 +諸佛之玄旨。佛境不能自稱我是佛境。還是 +這箇無依道人、乘境出來。若有人出來、問我 +求佛、我即應清淨境出。有人問我菩薩、我即 +應玆悲境出。有人問我菩提、我即應淨妙境 +出。有人問我涅槃、我即應寂靜境出。境即 +萬般差別、人即不別。所以應物現形、如水中 +月。道流、爾若欲得如法、直須是大丈夫兒始 +得。若萎萎隨隨地、則不得也。夫如I嗄。 +之器、不堪貯醍醐。如大器者、直要不受 +人惑。隨處作主、立處皆真。但有來者、皆不得 +受。爾一念疑、即魔入心。如菩薩疑時、生死魔 +得便。但能息念、更莫外求。物來則照。爾但信 +現今用底、一箇事也無。爾一念心生三界、隨 +緣被境、分為六塵。爾如今應用處、欠少什麼。 +一剎那間、便入淨入穢、入彌勒樓閣、入三眼 +國土、處處遊履、唯見空名。 +問、如何是三眼國土。師云、我共爾入淨妙國 +土中、著清淨衣、說法身佛。又入無差別國土 +中、著無差別衣、說報身佛。又入解脫國土中、 +著光明衣、說化身佛。此三眼國土、皆是依變。 +約經論家、取法身為根本、報化二身為用。山 +僧見處、法身即不解說法。所以古人云、身依 +義立、土據體論。法性身、法性土、明知是建立 +之法、依通國土。空拳黃葉、用誑小兒。蒺藜夌 +刺、枯骨上覓什麼汁。心外無法、內亦不可得、 +求什麼物。爾諸方言道、有修有證。莫錯。設有 +修得者、皆是生死業。爾言六度萬行齊修。我 +見皆是造業。求佛求法、即是造地獄業。求菩 +薩、亦是造業。看經看教、亦是造業。佛與祖師、 +是無事人。所以有漏有為、無漏無為、為清淨 +業。有一般瞎禿子、飽喫飯了、便坐禪觀行、 +把捉念漏、不令放起、厭喧求靜、是外道法。祖 +師云、爾若住心看靜、舉心外照、攝心內澄、凝 +心入定、如是之流、皆是造作。是爾如今與麼 +聽法底人、作麼生擬修他證他莊嚴他。渠且 +不是修底物、不是莊嚴得底物。若教他莊嚴、 +一切物即莊嚴得。爾且莫錯。道流、爾取這一 +般老師口A語、為是真道、是善知識不思議、 +我是凡夫心、不敢測度他老宿。瞎屢生、爾一 +生祇作這箇見解、辜負這一雙眼。冷噤噤地、 +如凍凌上驢駒相似。我不敢毀善知識、怕生 +口業。道流、夫大善知識、始敢毀佛毀祖、是非 +天下、排斥三藏教、罵辱諸小兒、向逆順中覓 +人。所以我於十二年中、求一箇業性、知芥子 +許不可得。若似新婦子禪師、便即怕趁出院、 +不與飯喫、不安不樂。自古先輩、到處人不 +信、被遞出、始知是貴。若到處人盡肯、堪作什 +麼。所以師子一吼、野干腦裂。道流、諸方說、有 +道可修、有法可證。爾說證何法、修何道。爾今 +用處、欠少什麼物、修補何處。後生小阿師不 +會、便即信這般野狐精魅、許他說事、繫縛 +人、言道理行相應、護惜三業、始得成佛。如此 +說者、如春細雨。古人云、路逢達道人、第一莫 +向道。所以言、若人修道道不行、萬般邪境競 +頭生。智劍出來無一物、明頭未顯暗頭明。所 +以古人云、平常心是道。大德、覓什麼物。現今 +目前聽法無依道人、歷歷地分明、未曾欠少。 +爾若欲得與祖佛不別、但如是見、不用疑誤。 +爾心心不異、名之活祖。心若有異、則性相別。 +心不異故、即性相不別。 +問、如何是心心不異處。師云、爾擬問、早異了 +也、性相各分。道流、莫錯。世出世諸法、皆無自 +性、亦無生性。但有空名、名字亦空。爾祇麼 +認他閑名為實。大錯了也。設有、皆是依變之 +境。有箇菩提依、涅繫依、解脫依、三身依、境智 +依、菩薩依、佛依。爾向依變國土中、覓什麼物。 +乃至三乘十二分教、皆是拭不淨故紙。佛是 +幻化身、祖是老比丘。爾還是娘生已否。爾若 +求佛、即被佛魔攝。爾若求祖、即被祖魔縛。 +爾若有求皆苦。不如無事。有一般禿比丘、向 +學人道、佛是究竟、於三大阿僧祇劫、修行 +果滿、方始成道。道流、爾若道佛是究竟、緣什 +麼八十年後、向拘尸羅城、雙林樹間、側臥而死 +去。佛今何在。明知與我生死不別。爾言、三十 +二相八十種好是佛。轉輪聖王應是如來。明 +知是幻化。古人云、如來舉身相、為順世間情。 +恐人生斷見、權且立虛名。假言三十二、八十 +也空聲。有身非覺體、無相乃真形。爾道、佛有 +六通、是不可思議。一切諸天、神仙、阿修羅、大 +力鬼、亦有神通。應是佛否。道流、莫錯。祇如阿 +修羅、與天帝釋戰、戰敗領八萬四千眷屬、入 +藕絲孔中藏。莫是聖否。如山僧所舉、皆是業 +通依通。夫如佛六通者、不然。入色界不被色 +惑、入聲界不被聲惑、入香界不被香惑、入味 +界不被味惑、入觸界不被觸惑、入法界不被 +法惑。所以達六種色聲香味觸法皆是空相、 +不能繫縛此無依道人。雖是五蘊漏質、便是 +地行神通。道流、真佛無形、真法無相。爾祇麼 +幻化上頭、作模作樣。設求得者、皆是野狐精 +魅、並不是真佛、是外道見解。夫如真學道人、 +並不取佛、不取菩薩羅漢、不取三界殊勝。迥 +無獨脫、不與物拘。乾坤倒覆、我更不疑。十方 +諸佛現前、為一念心喜、三塗地獄頓現、無一 +念心怖。緣何如此。我見諸法空相、變即有、不 +變即無。三界唯心、萬法唯識。所以夢幻空花、 +何勞把捉。唯有道流、目前現今聽法底人、入 +火不燒、入水不溺、入三塗地獄、如遊園觀、入 +餓鬼畜生、而不受報。緣何如此。無嫌底法。爾 +若愛聖憎凡、生死海A沈浮。煩惱由心故有、 +無心煩惱何拘。不勞分別取相、自然得道須 +臾。爾擬傍家波波地學得、於三祇劫中、終歸 +生死。不如無事、向叢林中、床角頭交腳坐。道 +流、如諸方有學人來、主客相見了、便有一句 +子語、辨前頭善知識。被學人拈出箇機權語 +路、向善知識口角頭攛過、看爾識不識。爾若 +識得是境、把得便拋向坑子A。學人便即尋 +常、然後便索善知識語。依前奪之。學人云、上 +智哉、是大善知識。即云、爾大不識好惡。如善 +知識、把出箇境塊子、向學人面前弄。前人辨 +得、下下作主、不受境惑。善知識便即現半 +身、學人便喝。善知識又入一切差別語路中 +擺撲。學人云、不識好惡老禿奴。善知識歎曰、 +真正道流。如諸方善知識、不辨邪正。學人來 +問、菩提涅槃、三身境智、瞎老師便與他解說。 +被他學人罵著、便把棒打他、言無禮度。自是 +爾善知識無眼、不得嗔他。有一般不識好惡 +禿奴、即指東劃西、好晴好雨、好燈籠露柱。爾 +看、眉毛有幾莖。這箇具機緣。學人不會、便即 +心狂。如是之流、總是野狐精魅魍魎。被他好 +學人嗌嗌微笑、言瞎老禿奴惑亂他天下人。 +道流、出家兒且要學道。祇如山僧、往日曾向 +毘尼中留心、亦曾於經論尋討。後方知是濟 +世藥、表顯之說、遂乃一時拋卻、即訪道參禪。 +後遇大善知識、方乃道眼分明、始識得天下 +老和尚、知其邪正。不是娘生下便會、還是體 +究練磨、一朝自省。道流、爾欲得如法見解、但 +莫受人惑。向A向外、逢著便殺。逢佛殺佛、逢 +祖殺祖、逢羅漢殺羅漢、逢父母殺父母、逢 +親眷殺親眷、始得解脫、不與物拘、透脫自 +在。如諸方學道流、未有不依物出來底。山僧 +向此間、從頭打。手上出來手上打。口A出來 +口A打。眼A出來眼A打。未有一箇獨脫出 +來底。皆是上他古人閑機境。山僧無一法與 +人、祇是治病解縛。爾諸方道流、試不依物出 +來、我要共爾商量。十年五歲、並無一人。皆是 +依草附葉、竹木精靈、野狐精魅、向一切糞塊 +上亂咬。瞎漢、枉消他十方信施、道我是出家 +兒、作如是見解。向爾道、無佛無法、無修無證。 +祇與麼傍家擬求什麼物。瞎漢、頭上安頭。是 +爾欠少什麼。道流、是爾目前用底、與祖佛不 +別。祇麼不信、便向外求。莫錯。向外無法、內亦 +不可得。爾取山僧口A語、不如休歇無事去。 +已起者莫續、未起者不要放起、便勝爾十年 +行腳。約山僧見處、無如許多般、祇是平常。著 +衣喫飯、無事過時。爾諸方來者、皆是有心求 +佛求法、求解脫、求出離三界。癡人、爾要出三 +界、什麼處去。佛祖是賞繫底名句。爾欲識三 +界麼。不離爾今聽法底心地。爾一念心貪是 +欲界。爾一念心瞋是色界。爾一念心癡是無 +色界、是爾屋A家具子。三界不自道、我是三 +界。還是道流、目前靈靈地照燭萬般、酌度世 +界底人、與三界安名。大德、四大色身是無常。 +乃至脾胃肝膽、髮毛爪齒、唯見諸法空相。爾 +一念心歇得處、喚作菩提樹。爾一念心不能 +歇得處、喚作無明樹。無明無住處、無明無 +始終。爾若念念心歇不得、便上他無明樹、便 +入六道四生、披毛戴角。爾若歇得、便是清淨 +身界。爾一念不生、便是上菩提樹、三界神通 +變化、意生化身、法喜禪悅、身光自照。思衣羅 +綺千重、思食百味具足、更無橫病。菩提無住 +處、是故無得者。道流、大丈夫漢、更疑箇什麼。 +目前用處、更是阿誰。把得便用、莫著名字、號 +為玄旨。與麼見得、勿嫌底法。古人云、心隨萬 +境轉、轉處實能幽。隨流認得性、無喜亦無憂。 +道流、如禪宗見解、死活循然。參學之人、大須 +子細。如主客相見、便有言論往來。或應物現 +形、或全體作用、或把機權喜怒、或現半身、或 +乘師子、或乘象王。如有真正學人、便喝先拈 +出一箇膠盆子。善知識不辨是境、便上他境 +上、作模作樣。學人便喝。前人不肯放。此是膏 +肓之病、不堪醫。喚作客看主。或是善知識不 +拈出物、隨學人問處即奪。學人被奪、抵死不 +放。此是主看客。或有學人、應一箇清淨境、出 +善知識前。善知識辨得是境、把得拋向坑A。 +學人言、大好善知識。即云、咄哉、不識好惡。學 +人便禮拜。此喚作主看主。或有學人、披枷帶 +鎖、出善知識前。善知識更與安一重枷鎖。學 +人歡喜、彼此不辨。呼為客看客。大德、山僧如 +是所舉、皆是辨魔揀異、知其邪正。道流、寔 +情大難、佛法幽玄、解得可可地。山僧竟日與 +他說破、學者總不在意。千遍萬遍、腳底踏過、 +黑沒焌地、無一箇形段、歷歷孤明。學人信不 +及、便向名句上生解。年登半百、祇管傍家負 +死屍行、檐卻檐子天下走。索草鞋錢有日在。 +大德、山僧說向外無法、學人不會、便即向A +作解、便即倚壁坐、舌拄上齶、湛然不動、取此 +為是祖門佛法也。大錯。是爾若取不動清淨 +境為是、爾即認他無明為郎主。古人云、湛湛 +黑暗深坑、寔可怖畏。此之是也。爾若認他動 +者是、一切草木皆解動、應可是道也。所以動 +者是風大、不動者是地大。動與不動、俱無自 +性。爾若向動處捉他、他向不動處立。爾若向 +不動處捉他、他向動處立。譬如潛泉魚、鼓波 +而自躍。大德、動與不動、是二種境。還是無依 +道人、用動用不動。如諸方學人來、山僧此間、 +作三種根器斷。如中下根器來、我便奪其境、 +而不除其法。或中上根器來、我便境法俱奪。 +如上上根器來、我便境法人俱不奪。如有出 +格見解人來、山僧此間、便全體作用、不歷根 +器。大德、到這A、學人著力處不通風、石火電 +光即過了也。學人若眼定動、即沒交涉。擬心 +即差、動念即乖。有人解者、不離目前。大德、爾 +檐CB屎檐子、傍家走求佛求法。即今與麼 +馳求底、爾還識渠麼。活撥撥地、祇是勿根 +株。擁不聚、撥不散。求著即轉遠、不求還在目 +前、靈音屬耳。若人不信、徒勞百年。道流、一 +剎那間、便入華藏世界、入毘盧遮那國土、入 +解脫國土、入神通國土、入清淨國土、入法界、 +入穢入淨、入凡入聖、入餓鬼畜生、處處討覓 +尋、皆不見有生有死、唯有空名。幻化空花、不 +勞把捉、得失是非、一時放卻。道流、山僧佛法、 +的的相承、從麻谷和尚、丹霞和尚、道一和尚、 +盧山拽石頭和尚、一路行遍天下。無人信得、 +盡皆起謗。如道一和尚用處、純一無雜、學人 +三百五百、盡皆不見他意。如盧山和尚、自在 +真正、順逆用處、學人不測涯際、悉皆忙然。 +如丹霞和尚、翫珠隱顯、學人來者、皆悉被罵。 +如麻谷用處、苦如黃蘗、近皆不得。如石鞏用 +處、向箭頭上覓人、來者皆懼。如山僧今日用 +處、真正成壞、翫弄神變、入一切境、隨處無事、 +境不能換。但有來求者、我即便出看渠。渠不 +識我、我便著數般衣、學人生解、一向入我言 +句。苦哉、瞎禿子無眼人、把我著底衣、認青黃 +赤白。我脫卻入清淨境中、學人一見、便生忻 +欲。我又脫卻、學人失心、忙然狂走、言我無 +衣。我即向渠道、爾識我著衣底人否。忽爾回 +頭、認我了也。大德、爾莫認衣。衣不能動、人 +能著衣。有箇清淨衣、有箇無生衣、菩提衣、涅 +槃衣、有祖衣、有佛衣。大德、但有聲名文句、 +皆悉是衣變。從臍輪氣海中鼓激、牙齒敲磕、 +成其句義。明知是幻化。大德、外發聲語業、內 +表心所法。以思有念、皆悉是衣。爾祇麼認他 +著底衣為寔解。縱經塵劫、祇是衣通。三界循 +還、輪回生死。不如無事。相逢不相識、共語不 +知名。今時學人不得、蓋為認名字為解。大策 +子上、抄死老漢語、三重五重複子裹、不教人 +見、道是玄旨、以為保重。大錯。瞎屢生、爾向 +枯骨上、覓什麼汁。有一般不識好惡、向教中 +取意度商量、成於句義。如把屎塊子、向口A +含了、吐過與別人。猶如俗人打傳口令相似、 +一生虛過。也道我出家、被他問著佛法、便即 +杜口無詞、眼似漆突、口如楄檐。如此之類、逢 +彌勒出世、移置他方世界、寄地獄受苦。大德、 +爾波波地往諸方、覓什麼物、踏爾腳板闊。無 +佛可求、無道可成、無法可得。外求有相佛、與 +汝不相似。欲識汝本心、非合亦非離。道流、真 +佛無形、真道無體、真法無相。三法混融、和合 +一處。辨既不得、喚作忙忙業識眾生。 +問、如何是真佛真法真道、乞垂開示。師云、佛 +者心清淨是。法者心光明是。道者處處無礙 +淨光是。三即一、皆是空名、而無寔有。如真正 +學道人、念念心不間斷。自達磨大師從西土 +來、祇是覓箇不受人惑底人。後遇二祖、一言 +便了、始知從前虛用功夫。山僧今日見處、與 +祖佛不別。若第一句中得、與祖佛為師。若第 +二句中得、與人天為師。若第三句中得、自救 +不了。 +問、如何是西來意。師云、若有意、自救不了。云、 +既無意、云何二祖得法。師云、得者是不得。云、 +既若不得、云何是不得底意。師云、為爾向一 +切處馳求心不能歇。所以祖師言、咄哉丈夫、 +將頭覓頭。爾言下便自回光返照、更不別求、 +知身心與祖佛不別、當下無事、方名得法。大 +德、山僧今時、事不獲已、話度說出許多不才 +淨。爾且莫錯。據我見處、寔無許多般道理。要 +用便用、不用便休。祇如諸方說六度萬行、以 +為佛法、我道、是莊嚴門佛事門、非是佛法。乃 +至持齋持戒、擎油不A、道眼不明、盡須抵債、 +索飯錢有日在。何故如此。入道不通理、復身 +還信施。長者八十一、其樹不生耳。乃至孤峰 +獨宿、一食卯齋、長坐不臥、六時行道、皆是造 +業底人。乃至頭目髓腦、國城妻子、象馬七珍、盡 +皆捨施、如是等見、皆是苦身心故、還招苦果。 +不如無事、純一無雜。乃至十地滿心菩薩、皆 +求此道流蹤跡、了不可得。所以諸天歡喜、地 +神捧足、十方諸佛、無不稱歎。緣何如此。為今 +聽法道人、用處無蹤跡。 +問、大通智勝佛、十劫坐道場、佛法不現前、不 +得成佛道。未審此意如何。乞師指示。師云、 +大通者、是自己於處處、達其萬法無性無相、 +名為大通。智勝者、於一切處不疑、不得一法、 +名為智勝。佛者心清淨、光明透徹法界、得名 +為佛。十劫坐道場者、十波羅蜜是。佛法不現 +前者、佛本不生、法本不滅、云何更有現前。不 +得成佛道者、佛不應更作佛。古人云、佛常在 +世間、而不染世間法。道流、爾欲得作佛、莫隨 +萬物。心生種種法生、心滅種種法滅。一心不 +生、萬法無咎。世與出世、無佛無法、亦不現前、 +亦不曾失。設有者、皆是名言章句、接引小兒、 +施設藥病、表顯名句。且名句不自名句、還是 +爾目前昭昭靈靈、鑒覺聞知照燭底、安一切 +名句。大德、造五無間業、方得解脫。 +問、如何是五無間業。師云、殺父害母、出佛身 +血、破和合僧、焚燒經像等、此是五無間業。云、 +如何是父。師云、無明是父。爾一念心、求起滅 +處不得、如響應空、隨處無事、名為殺父。云、如 +何是母。師云、貪愛為母。爾一念心、入欲界中、 +求其貪愛、唯見諸法空相、處處無著、名為害 +母。云、如何是出佛身血。師云、爾向清淨法界 +中、無一念心生解、便處處黑暗、是出佛身血。 +云、如何是破和合僧。師云、爾一念心、正達煩 +惱結使、如空無所依、是破和合僧。云、如何是 +焚燒經像。師云、見因緣空、心空、法空、一念決 +定斷、迥然無事、便是焚燒經像。大德、若如是 +達得、免被他凡聖名礙。爾一念心、祇向空拳 +指上生寔解、根境法中虛捏怪。自輕而退屈 +言、我是凡夫、他是聖人。禿屢生、有甚死急、披 +他師子皮、卻作野干鳴。大丈夫漢、不作丈夫 +氣息、自家屋A物不肯信、祇麼向外覓、上他 +古人閑名句、倚陰博陽、不能特達。逢境便緣、 +逢塵便執、觸處惑起、自無准定。道流、莫取山 +僧說處。何故。說無憑據、一期間圖畫虛空、如 +彩畫像等喻。道流、莫將佛為究竟。我見猶如 +廁孔、菩薩羅漢、盡是枷鎖、縛人底物。所以文 +殊仗劍、殺於瞿曇、鴦掘持刀、害於釋氏。道流、 +無佛可得。乃至三乘五性、圓頓教跡、皆是一 +期藥病相治、並無實法。設有、皆是相似、表顯 +路布、文字差排、且如是說。道流、有一般禿子、 +便向A許著功、擬求出世之法。錯了也。若人 +求佛、是人失佛。若人求道、是人失道。若人求 +祖、是人失祖。大德、莫錯。我且不取爾解經論、 +我亦不取爾國王大臣、我亦不取爾辯似懸 +河、我亦不取爾聰明智慧、唯要爾真正見解。 +道流、設解得百本經論、不如一箇無事底阿 +師。爾解得、即輕蔑他人。勝負修羅、人我無 +明、長地獄業。如善星比丘、解十二分教、生身 +陷地獄、大地不容。不如無事休歇去。飢來喫 +飯、睡來合眼。愚人笑我、智乃知焉。道流、莫 +向文字中求。心動疲勞、吸冷氣無益。不如一 +念緣起無生、超出三乘權學菩薩。大德、莫因 +循過日。山僧往日、未有見處時、黑漫漫地。光 +陰不可空過、腹熱心忙、奔波訪道。後還得力、 +始到今日、共道流如是話度。勸諸道流、莫為 +衣食。看世界易過、善知識難遇。如優曇花時 +一現耳。爾諸方聞道有箇臨濟老漢、出來便 +擬問難、教語不得。被山僧全體作用、學人空 +開得眼、口總動不得。懵然不知以何答我。我 +向伊道、龍象蹴踏、非驢所堪。爾諸處祇指胸 +點肋、道我解禪解道、三箇兩箇、到這A不奈 +何。咄哉、爾將這箇身心、到處簸兩片皮、誑謼 +閭閻。喫鐵棒有日在。非出家兒、盡向阿修羅 +界攝。夫如至理之道、非諍論而求激揚、鏗鏘 +以摧外道。至於佛祖相承、更無別意。設有言 +教、落在化儀三乘五性、人天因果。如圓頓之 +教、又且不然。童子善財、皆不求過。大德、莫錯 +用心。如大海不停死屍。祇麼擔卻、擬天下走。 +自起見障、以礙於心。日上無雲、麗天普照。眼 +中無翳、空A無花。道流、爾欲得如法、但莫 +生疑。展則彌綸法界、收則絲髮不立。歷歷孤 +明、未曾欠少。眼不見、耳不聞、喚作什麼物。古 +人云、說似一物則不中。爾但自家看。更有什 +麼。說亦無盡、各自著力。珍重。 +勘辨。 +黃蘗、因入廚次、問飯頭、作什麼。飯頭云、揀眾 +僧米。黃蘗云、一日喫多少。飯頭云、二石五。 +黃蘗云、莫太多麼。飯頭云、猶恐少在。黃檗便 +打。飯頭卻舉似師。師云、我為汝勘這老漢。纔 +到侍立次、黃蘗舉前話。師云、飯頭不會、請和 +尚代一轉語。師便問、莫太多麼。黃檗云、何不 +道、來日更喫一頓。師云、說什麼來日、即今便 +喫。道了便掌。黃蘗云、這風顛漢、又來這A捋 +虎鬚。師便喝出去。後溈山問仰山、此二尊宿、 +意作麼生。仰山云、和尚作麼生。溈山云、養子 +方知父慈。仰山云、不然。溈山云、子又作麼生。 +仰山云、大似勾賊破家。 +師問僧、什麼處來。僧便喝。師便揖坐。僧擬議。 +師便打。師見僧來、便豎起拂子。僧禮拜。師便 +打。又見僧來、亦豎起拂子。僧不顧。師亦打。 +師、一日同普化、赴施主家齋次、師問、毛吞巨 +海、芥納須彌。為是神通妙用、本體如然。普化 +踏倒飯床。師云、太H生。普化云、這A是什麼 +所在、說H說細。師來日、又同普化赴齋。問、今 +日供養、何似昨日。普化依前踏倒飯床。師云、 +得即得、太H生。普化云、瞎漢、佛法說什麼 +H細。師乃吐舌。 +師一日、與河陽木塔長老、同在僧堂地爐內 +坐。因說、普化每日在街市、掣風掣顛。知他是 +凡是聖。言猶未了、普化入來。師便問、汝是凡 +是聖。普化云、汝且道、我是凡是聖。師便喝。 +普化以手指云、河陽新婦子、木塔老婆禪。臨 +濟小廝兒、卻具一隻眼。師云、這賊。普化云賊 +賊、便出去。 +一日、普化在僧堂前、喫生菜。師見云、大似一 +頭驢。普化便作驢鳴。師云、這賊。普化云賊賊、 +便出去。 +因普化、常於街市搖鈴云、明頭來、明頭打、暗 +頭來、暗頭打、四方八面來、旋風打、虛空來、連架 +打。師令侍者去、纔見如是道、便把住云、總不 +與麼來時如何。普化托開云、來日大悲院A +有齋。侍者回、舉似師。師云、我從來疑著這漢。 +有一老宿參師、未曾人事、便問、禮拜即是、不 +禮拜即是。師便喝。老宿便禮拜。師云、好箇草 +賊。老宿云賊賊、便出去。師云、莫道無事好。 +首座侍立次、師云、還有過也無。首座云、有。師 +云、賓家有過、主家有過。首座云、二俱有過。 +師云、過在什麼處。首座便出去。師云、莫道無 +事好。後有僧舉似南泉。南泉云、官馬相踏。 +師因入軍營赴齋、門首見員僚。師指露柱問、 +是凡是聖。員僚無語。師打露柱云、直饒道得、 +也祇是箇木橛。便入去。 +師問院主、什麼處來。主云、州中糶黃米去來。 +師云、糶得盡麼。主云、糶得盡。師以杖面前畫 +一畫云、還糶得這箇麼。主便喝。師便打。典 +座至。師舉前語。典座云、院主不會和尚意。師 +云、爾作麼生。典座便禮拜。師亦打。有座主來 +相看次、師問、座主講何經說。主云、某甲荒 +虛、粗習百法論。師云、有一人、於三乘十二分 +教明得。有一人、於三乘十二分教明不得。是 +同是別。主云、明得即同、明不得即別。樂普為 +侍者、在師後立云、座主、這A是什麼所在、說 +同說別。師回首問侍者、汝又作麼生。侍者便 +喝。師送座主回來、遂問侍者、適來是汝喝老 +僧。侍者云、是。師便打。 +師聞第二代德山垂示云、道得也三十棒、道 +不得也三十棒、師令樂普去問、道得為什麼 +也三十棒、待伊打汝、接住棒送一送、看他作 +麼生。普到彼、如教而問。德山便打。普接住送 +一送。德山便歸方丈。普回舉似師。師云、我從 +來疑著這漢。雖然如是、汝還見德山麼。普擬 +議。師便打。 +王常侍、一日訪師。同師於僧堂前看、乃問、這 +一堂僧、還看經麼。師云、不看經。侍云、還學禪 +麼。師云、不學禪。侍云、經又不看、禪又不學、 +畢竟作箇什麼。師云、總教伊成佛作祖去。侍 +云、金屑雖貴、落眼成翳。又作麼生。師云、將 +為爾是箇俗漢。 +師問杏山、如何是露地白牛。山云、吽吽。師 +云、啞那。山云、長老作麼生。師云、這畜生。 +師問樂普云、從上來、一人行棒、一人行喝。阿 +那箇親。普云、總不親。師云、親處作麼生。普 +便喝。師乃打。 +師見僧來、展開兩手。僧無語。師云、會麼。云、不 +會。師云、渾崙擘不開、與爾兩文錢。 +大覺到參。師舉起拂子。大覺敷坐具。師擲下 +拂子。大覺收坐具、入僧堂。眾僧云、這僧莫是 +和尚親故、不禮拜、又不喫棒。師聞、令喚覺。覺 +出。師云、大眾道、汝未參長老。覺云不審、便 +自歸眾。 +趙州行腳時參師。遇師洗腳次、州便問、如何 +是祖師西來意。師云、恰值老僧洗腳。州近前、 +作聽勢。師云、更要第二杓惡水潑在。州便下 +去。 +有定上座、到參問、如何是佛法大意。師下繩 +床、擒住與一掌、便托開。定佇立。傍僧云、定 +上座、何不禮拜。定方禮拜、忽然大悟。 +麻谷到參。敷坐具問、十二面觀音、阿那面 +正。師下繩床、一手收坐具、一手搊麻谷云、十 +二面觀音、向什麼處去也。麻谷轉身、擬坐繩 +床。師拈拄杖打。麻谷接卻、相捉入方丈。 +師問僧、有時一喝、如金剛王寶劍。有時一喝、 +如踞地金毛師子。有時一喝、如探竿影草。有 +時一喝、不作一喝用。汝作麼生會。僧擬議。 +師便喝。 +504b +師問一尼、善來惡來。尼便喝。師拈棒云、更道 +更道。尼又喝。師便打。 +龍牙問、如何是祖師西來意。師云、與我過禪 +板來。牙便過禪板與師。師接得便打。牙云、打 +即任打、要且無祖師意。牙後到翠微問、如何 +是祖師西來意。微云、與我過蒲團來。牙便過 +蒲團與翠微。翠微接得便打。牙云、打即任打、 +要且無祖師意。牙住院後、有僧入室請益云、 +和尚行腳時、參二尊宿因緣、還肯他也無。牙 +云、肯即深肯、要且無祖師意。 +徑山有五百眾、少人參請。黃檗令師到徑山。 +乃謂師曰、汝到彼作麼生。師云、某甲到彼、自 +有方便。師到徑山、裝腰上法堂、見徑山。徑山 +方舉頭、師便喝。徑山擬開口、師拂袖便行。尋 +有僧問徑山、這僧適來有什麼言句、便喝和 +尚。徑山云、這僧從黃檗會A來。爾要知麼、 +且問取他。徑山五百眾、太半分散。 +普化一日、於街市中、就人乞直裰。人皆與之。 +普化俱不要。師令院主買棺一具。普化歸來。 +師云、我與汝做得箇直裰了也。普化便自擔 +去、繞街市叫云、臨濟與我做直裰了也。我往 +東門遷化去。市人競隨看之。普化云、我今 +日未、來日往南門遷化去。如是三日、人皆 +不信。至第四日、無人隨看。獨出城外、自入棺 +內、倩路行人釘之。即時傳布。市人競往開棺、 +乃見全身脫去。祇聞空中鈴響、隱隱而去。 +行錄。 +師初在黃蘗會下、行業純一。首座乃歎曰、雖 +是後生、與眾有異。遂問、上座在此、多少時。師 +云、三年。首座云、曾參問也無。師云、不曾參 +問。不知問箇什麼。首座云、汝何不去問堂頭 +和尚、如何是佛法的的大意。師便去問。聲未 +絕、黃蘗便打。師下來。首座云、問話作麼生。 +師云、某甲問聲未絕、和尚便打。某甲不會。首 +座云、但更去問。師又去問。黃蘗又打。如是三 +度發問、三度被打。師來白首座云、幸蒙慈悲、 +令某甲問訊和尚。三度發問、三度被打。自恨 +障緣不領深旨。今且辭去。首座云、汝若去時、 +須辭和尚去。師禮拜退。首座先到和尚處云、 +問話底後生、甚是如法。若來辭時、方便接他。 +向後穿鑿成一株大樹、與天下人作廕涼去 +在。師去辭黃蘗。蘗云、不得往別處去。汝向高 +安灘頭大愚處去、必為汝說。師到大愚。大愚 +問、什麼處來。師云、黃蘗處來。大愚云、黃蘗 +有何言句。師云、某甲三度問佛法的的大意、 +三度被打。不知某甲有過無過。大愚云、黃蘗 +與麼老婆、為汝得徹困。更來這A、問有過無 +過。師於言下大悟云、元來黃蘗佛法無多子。 +大愚搊住云、這尿床鬼子、適來道有過無過、 +如今卻道、黃蘗佛法無多子。爾見箇什麼道 +理、速道速道。師於大愚脅下、築三拳。大愚托 +開云、汝師黃蘗、非于我事。師辭大愚、卻回黃 +蘗。黃蘗見來便問、這漢來來去去、有什麼了 +期。師云、祇為老婆心切。便人事了侍立。黃蘗 +問、什麼處去來。師云、昨奉慈旨、令參大愚 +去來。黃蘗云、大愚有何言句。師遂舉前話。黃 +蘗云、作麼生得這漢來、待痛與一頓。師云、說 +什麼待來、即今便喫。隨後便掌。黃蘗云、這風 +顛漢、卻來這A捋虎鬚。師便喝。黃蘗云、侍者、 +引這風顛漢、參堂去。後、溈山舉此話、問仰山、 +臨濟當時、得大愚力、得黃蘗力。仰山云、非但 +騎虎頭、亦解把虎尾。 +師栽松次、黃蘗問、深山A栽許多作什麼。師 +云、一與山門作境致、二與後人作標榜。道了、 +將钁頭打地三下。黃蘗云、雖然如是、子已喫 +吾三十棒了也。師又以钁頭打地三下、作噓 +噓聲。黃蘗云、吾宗到汝、大興於世。後溈山舉 +此語、問仰山、黃蘗當時、祇囑臨濟一人、更有 +人在。仰山云、有。祇是年代深遠、不欲舉似和 +尚。溈山云、雖然如是、吾亦要知。汝但舉看。仰 +山云、一人指南、吳越令行、遇大風即止。〔讖風穴和尚也。〕 +師侍立德山次、山云、今日困。師云、這老漢E +語作什麼。山便打。師掀倒繩床。山便休。 +師普請鋤地次、見黃蘗來、拄钁而立。黃蘗云、 +這漢困那。師云、钁也未舉、困箇什麼。黃蘗便 +打。師接住棒、一送送倒。黃蘗喚維那、維那扶 +起我。維那近前扶云、和尚爭容得這風顛漢 +無禮。黃蘗纔起、便打維那。師钁地云、諸方火 +葬、我這A一時活埋。後溈山問仰山、黃蘗打 +維那、意作麼生。仰山云、正賊走卻、邏蹤人 +喫棒。師一日、在僧堂前坐。見黃蘗來、便閉卻 +目。黃蘗乃作怖勢、便歸方丈。師隨至方丈禮 +謝。首座在黃蘗處侍立。黃蘗云、此僧雖是後 +生、卻知有此事。首座云、老和尚腳跟不點地、 +卻證據箇後生。黃蘗自於口上打一摑。首座 +云、知即得。 +師在堂中睡。黃蘗下來見、以拄杖打板頭一 +下。師舉頭、見是黃蘗、卻睡。黃蘗又打板頭一 +下、卻往上間、見首座坐禪、乃云、下間後生 +卻坐禪、汝這A妄想作什麼。首座云、這老漢 +作什麼。黃蘗打板頭一下、便出去。後、溈山問 +仰山、黃蘗入僧堂、意作麼生。仰山云、兩彩 +一賽。 +一日普請次、師在後行。黃蘗回頭、見師空手、 +乃問、钁頭在什麼處。師云、有一人將去了也。 +黃蘗云、近前來、共汝商量箇事。師便近前。黃 +蘗豎起钁頭云、祇這箇、天下人拈掇不起。師 +就手掣得、豎起云、為什麼卻在某甲手A。黃 +蘗云、今日大有人普請。便歸院。後溈山問仰 +山、钁頭在黃蘗手A、為什麼卻被臨濟奪卻。 +仰山云、賊是小人、智過君子。 +師為黃蘗馳書去溈山。時仰山作知客。接得 +書、便問、這箇是黃蘗底、那箇是專使底。師便 +掌。仰山約住云、老兄知是般事、便休。同去見 +溈山。溈山便問、黃蘗師兄多少眾。師云、七百 +眾。溈山云、什麼人為導首。師云、適來已達書 +了也。師卻問溈山、和尚此間多少眾。溈山云、 +一千五百眾。師云、太多生。溈山云、黃蘗師兄 +亦不少。師辭溈山。仰山送出云、汝向後北去、 +有箇住處。師云、豈有與麼事。仰山云、但去、已 +後有一人佐輔老兄在。此人祇是有頭無尾、 +有始無終。師後到鎮州、普化已在彼中。師出 +世、普化佐贊於師。師住未久、普化全身脫去。 +師因半夏上黃蘗、見和尚看經。師云、我將謂 +是箇人、元來是唵黑豆老和尚。住數日、乃辭 +去。黃蘗云、汝破夏來、不終夏去。師云、某甲 +暫來禮拜和尚。黃蘗遂打、趁令去。師行數里、 +疑此事、卻回終夏。師一日、辭黃蘗。蘗問、什麼 +處去。師云、不是河南、便歸河北。黃蘗便打。師 +約住與一掌。黃蘗大笑、乃喚侍者、將百丈先 +師禪板机案來。師云、侍者、將火來。黃蘗云、雖 +然如是、汝但將去。已後坐卻天下人舌頭去 +在。後溈山問仰山、臨濟莫辜負他黃蘗也無。 +仰山云、不然。溈山云、子又作麼生。仰山云、 +知恩方解報恩。溈山云、從上古人、還有相似 +506a +底也無。仰山云、有。祇是年代深遠、不欲舉似 +和尚。溈山云、雖然如是、吾亦要知。子但舉 +看。仰山云、祇如楞嚴會上、阿難讚佛云、將此 +深心奉塵剎、是則名為報佛恩。豈不是報恩 +之事。溈山云、如是如是。見與師齊、減師半德。 +見過於師、方堪傳授。 +師到達磨塔頭。塔主云、長老、先禮佛、先禮祖。 +師云、佛祖俱不禮。塔主云、佛祖與長老是什 +麼冤家。師便拂袖而出。 +師行腳時、到龍光。光上堂。師出問云、不展鋒 +鋩、如何得勝。光據坐。師云、大善知識、豈無方 +便。光瞪目云、嗄。師以手指云、這老漢、今日敗 +闕也。 +到三峰。平和尚問曰、什麼處來。師云、黃蘗來。 +平云、黃蘗有何言句。師云、金牛昨夜遭塗炭、 +直至如今不見蹤。平云、金風吹玉管、那箇是 +知音。師云、直透萬重關、不住清霄內。平云、 +子這一問太高生。師云、龍生金鳳子、衝破碧 +琉璃。平云、且坐喫茶。又問、近離甚處。師云、 +龍光。平云、龍光近日如何。師便出去。 +到大慈。慈在方丈內坐。師問、端居丈室時如 +何。慈云、寒松一色千年別、野老拈花萬國春。 +師云、今古永超圓智體、三山鎖斷萬重關。慈 +便喝。師亦喝。慈云、作麼。師拂袖便出。 +到襄州華嚴。嚴倚拄杖、作睡勢。師云、老和尚 +瞌睡作麼。嚴云、作家禪客、宛爾不同。師云、侍 +者、點茶來、與和尚喫。嚴乃喚維那、第三位安 +排這上座。 +到翠峰。峰問、甚處來。師云、黃蘗來。峰云、黃 +蘗有何言句、指示於人。師云、黃蘗無言句。峰 +云、為什麼無。師云、設有、亦無舉處。峰云、但 +舉看。師云、一箭過西天。 +到象田。師問、不凡不聖、請師速道。田云、老 +僧祇與麼。師便喝云、許多禿子、在這A覓什 +麼D。 +到明化。化問、來來去去作什麼。師云、祇徒 +踏破草鞋。化云、畢竟作麼生。師云、老漢話頭 +也不識。 +往鳳林。路逢一婆。婆問、甚處去。師云、鳳林 +去。婆云、恰值鳳林不在。師云、甚處去。婆便 +行。師乃喚婆。婆回頭。師便打。 +到鳳林。林問、有事相借問、得麼。師云、何得 +剜肉作瘡。林云、海月澄無影、遊魚獨自迷。師 +云、海月既無影、遊魚何得迷。鳳林云、觀風 +知浪起、翫水野帆飄。師云、孤輪獨照江山靜、 +自笑一聲天地驚。林云、任將三寸輝天地、一 +句臨機試道看。師云、路逢劍客須呈劍、不是 +詩人莫獻詩。鳳林便休。師乃有頌、大道絕同、 +任向西東、石火莫及、電光罔通。溈山問仰山、 +石火莫及、電光罔通。從上諸聖、將什麼為人。 +仰山云、和尚意作麼生。溈山云、但有言說、都 +無寔義。仰山云、不然。溈山云、子又作麼生。 +仰山云、官不容針、私通車馬。 +到金牛。牛見師來、橫按拄杖、當門踞坐。師以 +手敲拄杖三下、卻歸堂中第一位坐。牛下來 +見、乃問、夫賓主相見、各具威儀。上座從何而 +來、太無禮生。師云、老和尚道什麼。牛擬開 +口。師便打。牛作倒勢。師又打。牛云、今日不 +著便。溈山問仰山、此二尊宿、還有勝負也無。 +仰山云、勝即總勝、負即總負。 +師臨遷化時、據坐云、吾滅後、不得滅卻吾正 +法眼藏。三聖出云、爭敢滅卻和尚正法眼藏。 +師云、已後有人問爾、向他道什麼。三聖便喝。 +師云、誰知吾正法眼藏、向這瞎驢邊滅卻。言 +F、端然示寂。 +師諱義玄、曹州南華人也。俗姓邢氏。幼而 +穎異、長以孝聞。及落髮受具、居於講肆、精究 +毘尼、博賾經論。俄而歎曰、此濟世之醫方也、 +非教外別傳之旨。即更衣游方、首參黃蘗、次 +謁大愚。其機緣語句、載于行錄。既受黃蘗印 +可、尋抵河北。鎮州城東南隅、臨滹沱河側、小 +院住持。其臨濟因地得名。時普化先在彼、佯 +狂混眾、聖凡莫測。師至即佐之。師正旺化、普 +化全身脫去。乃符仰山小釋迦之懸記也。適 +丁兵革、師即棄去。太尉默君和、於城中捨宅 +為寺、亦以臨濟為額、迎師居焉。後拂衣南邁、 +至河府。府主王常侍、延以師禮。住未幾、即來 +大名府興化寺、居于東堂。師無疾、忽一日攝 +衣據坐、與三聖問答畢、寂然而逝。時唐咸通 +八年丁亥、孟陬月十日也。門人以師全身、建 +塔于大名府西北隅。G謚慧照禪師、塔號澄 +靈。合掌稽首、記師大略。住鎮州保壽嗣法小 +師廷沼謹書。 +鎮州臨濟慧照禪師語錄終。 +住大名府興化嗣法小師存獎校勘。 +永享九年八月十五日板在法性寺東經所。
\ No newline at end of file diff --git a/intl/icu/source/extra/uconv/samples/utf8/many.txt b/intl/icu/source/extra/uconv/samples/utf8/many.txt new file mode 100644 index 0000000000..47af19648b --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/many.txt @@ -0,0 +1,18 @@ +外国語の勉強と教え +Изучение и обучение иностранных языков +語文教學・语文教学 +Enseñanza y estudio de idiomas +Изучаване и Преподаване на Чужди Езипи +ქართული ენის შესწავლა და სწავლება +'læŋɡwidʒ 'lɘr:niŋ ænd 'ti:ʃiŋ +Lus kawm thaib qhia +Ngôn Ngữ, Sự học, +ללמוד וללמד את השֵפה +L'enseignement et l'étude des langues +㜊㞕㧍㒟㦮 㐀㛲㭘㒟 +Nauka języków obcych +Γλωσσική Εκμὰθηση και Διδασκαλία +ﺗﺪﺭﯾﺲ ﻭ ﯾﺎﺩﮔﯿﺮﯼ ﺯﺑﺎﻥ +Sprachlernen und -lehren +ﺗﻌﻠُّﻢ ﻭﺗﺪﺭﻳﺲ ﺍﻟﻌﺮﺑﻴﺔ +เรียนและสอนภาษา diff --git a/intl/icu/source/extra/uconv/samples/utf8/maopoem.txt b/intl/icu/source/extra/uconv/samples/utf8/maopoem.txt new file mode 100644 index 0000000000..6c27be8ba3 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/maopoem.txt @@ -0,0 +1,23 @@ +和毛泽东 <<重上井冈山>>. 严永欣, 一九八八年 + 和毛泽东 <<重上井冈山>>. 严永欣, 一九八八年. + + 久有归天愿 + 终过鬼门关 + 千里来寻归宿 + 春华变苍颜 + 到处群魔乱舞 + 更有妖雾盘绕 + 暗道入阴间 + 过了阎王殿 + 险处不须看 + + 风雷动 + 旌旗奋 + 忆人寰 + 八十三年过去 + 弹指一挥间 + 中原千军逐蒋 + 城楼万众检阅 + 褒贬满载还 + 世上无难事 + 只怕我癫痫 diff --git a/intl/icu/source/extra/uconv/samples/utf8/russian.txt b/intl/icu/source/extra/uconv/samples/utf8/russian.txt new file mode 100644 index 0000000000..ce3a85cc0d --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/russian.txt @@ -0,0 +1,6 @@ +Американские суда находятся в международных водах. Япония +выразила серьезное беспокойство советскими действиями. +Пентагон беспокойства не проявил. США проводят подобное слеже- +ние за советскими судами в Карибском море. Правда, количество +советских самолетов вызвало некоторое удивление.. + diff --git a/intl/icu/source/extra/uconv/samples/utf8/simplechinese.txt b/intl/icu/source/extra/uconv/samples/utf8/simplechinese.txt new file mode 100644 index 0000000000..79e89247c5 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/simplechinese.txt @@ -0,0 +1,14 @@ +简介 + + CALIS 是一个专为语文教学而设计的电脑软件。在当今这个电脑时代,"电脑辅助教学"是一个极为教师与学生所喜欢的教学媒体。 + + 因为教学须要注重个别化,每一个学生有其不同的程度,如何有效地针对每一个学生给予不同的反应及立刻的指正,以及最后不同的记分,只有仰赖"电脑"去完成。 CALIS 正是扮演了这个角色。不仅减轻了老师的负担与时间,更能兼顾到每一个学生之需要。除此之外,CALIS 提供了生动的学习环境,所以更能激发学生的学习兴趣与动机。 + + WinCALIS 是 CALIS 的延伸,涵盖了更多的功能。比如:运用视窗 (window) 及滑鼠 (mouse),让教师及学生更为有效地操作;"拼音更正 "(spelling check) 提供适当的暗示,给予学生思考及改正;在编排课程方面,教师只须在显现的各类视窗中,输入所设计的教材內容〔课程內容、问题、解答、或是参考资料〕。电脑立即自动产生 SCRIPTS,教师可以不须具备太多的电脑常识或编写语言 (authoring language) 即可编著课程,让学生使用。 + + + WinCALIS更可以让教师利用激光影碟 (laserdisc)及录像机所产生的生动画面融入教学,让课程更为生动活泼。同时WinCALIS提供了不同语言的键盘输入,用者可以很容易地同时使用多种语言,只需透过滑鼠(mouse)来选择即可从一种语言到另一种语言。 + + 由于 CALIS具备了容易及灵活使用的本质,使得它已经成为 90 年代电脑辅助教学(CAI)的标准。 + + CALIS的第二具备了中文文书处理的功能。它不仅可以编写,也可以印出。您手边的这份文件即是用CALIS 编印出来的。 diff --git a/intl/icu/source/extra/uconv/samples/utf8/turkish.txt b/intl/icu/source/extra/uconv/samples/utf8/turkish.txt new file mode 100644 index 0000000000..88a28db92d --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/turkish.txt @@ -0,0 +1,6 @@ +Yukarda mavi gök, asağıda yağız yer yaratıldıkta; ikisinin arasında insan +oğlu yaratılmış. İnsan oğulları üzerine ecdadım Bumın hakan, İstemi hakan +tahta oturmuş; oturarak Türk milletinin ülkesini, türesini, idare edivermiş, +tanzim edivermis. Dört taraf hep düşman imiş. Asker sevk edip dört taraftaki +kavmi hep (itaati altına) almış hep muti kılmış. Başlılara baş eğdirmiş, +dizlilere diz çöktürmüş. diff --git a/intl/icu/source/extra/uconv/samples/utf8/utf-8-demo.txt b/intl/icu/source/extra/uconv/samples/utf8/utf-8-demo.txt new file mode 100644 index 0000000000..fdc1d1c9d8 --- /dev/null +++ b/intl/icu/source/extra/uconv/samples/utf8/utf-8-demo.txt @@ -0,0 +1,7 @@ +UTF-8 encoded sample plain-text file +‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾ +This file is a place-holder. The original file is available from Markus Kuhn's website at http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-demo.txt. + +Markus says this about copying files that are on his site: + +Please do not copy any of my publications onto your own Internet server for public access without explicit permission. If you want to refer to any of my texts, please use a hyperlink to my original and not a copy. I update some of the texts frequently and I want to prevent the confusion that arises if people read somewhere else obsolete versions that are not under my control. diff --git a/intl/icu/source/extra/uconv/sources.txt b/intl/icu/source/extra/uconv/sources.txt new file mode 100644 index 0000000000..ed227fe3b1 --- /dev/null +++ b/intl/icu/source/extra/uconv/sources.txt @@ -0,0 +1,2 @@ +uconv.cpp +uwmsg.c diff --git a/intl/icu/source/extra/uconv/uconv.1.in b/intl/icu/source/extra/uconv/uconv.1.in new file mode 100644 index 0000000000..cd5c827935 --- /dev/null +++ b/intl/icu/source/extra/uconv/uconv.1.in @@ -0,0 +1,445 @@ +.\" Hey, Emacs! This is -*-nroff-*- you know... +.\" +.\" uconv.1: manual page for the uconv utility. +.\" +.\" Copyright (C) 2016 and later: Unicode, Inc. and others. +.\" License & terms of use: http://www.unicode.org/copyright.html +.\" Copyright (C) 2000-2013 IBM, Inc. and others. +.\" +.\" Manual page by Yves Arrouye <yves@realnames.com>. +.\" +.TH UCONV 1 "2005-jul-1" "ICU MANPAGE" "ICU @VERSION@ Manual" +.SH NAME +.B uconv +\- convert data from one encoding to another +.SH SYNOPSIS +.B uconv +[ +.BR "\-h\fP, \fB\-?\fP, \fB\-\-help" +] +[ +.BI "\-V\fP, \fB\-\-version" +] +[ +.BI "\-s\fP, \fB\-\-silent" +] +[ +.BI "\-v\fP, \fB\-\-verbose" +] +[ +.BI "\-l\fP, \fB\-\-list" +| +.BI "\-l\fP, \fB\-\-list\-code" " code" +| +.BI "\-\-default-code" +| +.BI "\-L\fP, \fB\-\-list\-transliterators" +] +[ +.BI "\-\-canon" +] +[ +.BI "\-x" " transliteration +] +[ +.BI "\-\-to\-callback" " callback" +| +.B "\-c" +] +[ +.BI "\-\-from\-callback" " callback" +| +.B "\-i" +] +[ +.BI "\-\-callback" " callback" +] +[ +.BI "\-\-fallback" +| +.BI "\-\-no\-fallback" +] +[ +.BI "\-b\fP, \fB\-\-block\-size" " size" +] +[ +.BI "\-f\fP, \fB\-\-from\-code" " encoding" +] +[ +.BI "\-t\fP, \fB\-\-to\-code" " encoding" +] +[ +.BI "\-\-add\-signature" +] +[ +.BI "\-\-remove\-signature" +] +[ +.BI "\-o\fP, \fB\-\-output" " file" +] +[ +.IR file .\|.\|. +] +.SH DESCRIPTION +.B uconv +converts, or transcodes, each given +.I file +(or its standard input if no +.I file +is specified) from one +.I encoding +to another. +The transcoding is done using Unicode as a pivot encoding +(i.e. the data are first transcoded from their original encoding to +Unicode, and then from Unicode to the destination encoding). +.PP +If an +.I encoding +is not specified or is +.BR - , +the default encoding is used. Thus, calling +.B uconv +with no +.I encoding +provides an easy way to validate and sanitize data files for +further consumption by tools requiring data in the default encoding. +.PP +When calling +.BR uconv , +it is possible to specify callbacks that are used to handle invalid +characters in the input, or characters that cannot be transcoded to +the destination encoding. Some encodings, for example, offer a default +substitution character that can be used to represent the occurrence of +such characters in the input. Other callbacks offer a useful visual +representation of the invalid data. +.PP +.B uconv +can also run the specified +.IR transliteration +on the transcoded data, +in which case transliteration will happen as an intermediate step, +after the data have been transcoded to Unicode. +The +.I transliteration +can be either a list of semicolon-separated transliterator names, +or an arbitrarily complex set of rules in the ICU transliteration +rules format. +.PP +For transcoding purposes, +.B uconv +options are compatible with those of +.BR iconv (1), +making it easy to replace it in scripts. It is not necessarily the case, +however, that the encoding names used by +.B uconv +and ICU are the same as the ones used by +.BR iconv (1). +Also, options that provide informational data, such as the +.B \-l\fP, \fB\-\-list +one offered by some +.BR iconv (1) +variants such as GNU's, produce data in a slightly different and +easier to parse format. +.SH OPTIONS +.TP +.BR "\-h\fP, \fB\-?\fP, \fB\-\-help" +Print help about usage and exit. +.TP +.BR "\-V\fP, \fB\-\-version" +Print the version of +.B uconv +and exit. +.TP +.BI "\-s\fP, \fB\-\-silent" +Suppress messages during execution. +.TP +.BI "\-v\fP, \fB\-\-verbose" +Display extra informative messages during execution. +.TP +.BI "\-l\fP, \fB\-\-list" +List all the available encodings and exit. +.TP +.BI "\-l\fP, \fB\-\-list\-code" " code" +List only the +.I code +encoding and exit. If +.I code +is not a proper encoding, exit with an error. +.TP +.BI "\-\-default-code" +List only the name of the default encoding and exit. +.TP +.BI "\-L\fP, \fB\-\-list\-transliterators" +List all the available transliterators and exit. +.TP +.BI "\--canon" +If used with +.BI "\-l\fP, \fB\-\-list" +or +.BR "\-\-default-code" , +the list of encodings is produced in a format compatible with +.BR convrtrs.txt (5). +If used with +.BR "\-L\fP, \fB\-\-list\-transliterators" , +print only one transliterator name per line. +.TP +.BI "\-x" " transliteration" +Run the given +.IR transliteration +on the transcoded Unicode data, +and use the transliterated data as input for the transcoding to +the destination encoding. +.TP +.BI "\-\-to\-callback" " callback" +Use +.I callback +to handle characters that cannot be transcoded to the destination +encoding. See section +.B CALLBACKS +for details on valid callbacks. +.TP +.B "\-c" +Omit invalid characters from the output. +Same as +.BR "\-\-to\-callback skip" . +.TP +.BI "\-\-from\-callback" " callback" +Use +.I callback +to handle characters that cannot be transcoded from the original +encoding. See section +.B CALLBACKS +for details on valid callbacks. +.TP +.B "\-i" +Ignore invalid sequences in the input. +Same as +.BR "\-\-from\-callback skip" . +.TP +.BI "\-\-callback" " callback" +Use +.I callback +to handle both characters that cannot be transcoded from the original +encoding and characters that cannot be transcoded to the destination +encoding. See section +.B CALLBACKS +for details on valid callbacks. +.TP +.BI "\-\-fallback" +Use the fallback mapping when transcoding from +Unicode to the destination encoding. +.TP +.BI "\-\-no\-fallback" +Do not use the fallback mapping when transcoding from Unicode to the +destination encoding. +This is the default. +.TP +.BI "\-b\fP, \fB\-\-block\-size" " size" +Read input in blocks of +.I size +bytes at a time. The default block size is +4096. +.TP +.BI "\-f\fP, \fB\-\-from\-code" " encoding" +Set the original encoding of the data to +.IR encoding . +.TP +.BI "\-t\fP, \fB\-\-to\-code" " encoding" +Transcode the data to +.IR encoding . +.TP +.BI "\-\-add\-signature" +Add a U+FEFF Unicode signature character (BOM) if the output charset +supports it and does not add one anyway. +.TP +.BI "\-\-remove\-signature" +Remove a U+FEFF Unicode signature character (BOM). +.TP +.BI "\-o\fP, \fB\-\-output" " file" +Write the transcoded data to +.IR file . +.SH CALLBACKS +.B uconv +supports specifying callbacks to handle invalid data. Callbacks can be +set for both directions of transcoding: from the original encoding to +Unicode, with the +.BR "\-\-from\-callback" +option, and from Unicode to the destination encoding, with the +.BR "\-\-to\-callback" +option. +.PP +The following is a list of valid +.I callback +names, along with a description of their behavior. The list of +callbacks actually supported by +.B uconv +is displayed when it is called with +.BR "\-h\fP, \fB\-\-help" . +.PP +.TP \w'\fBescape-unicode'u+3n +.B substitute +Write the encoding's substitute sequence, or the Unicode +replacement character +.B U+FFFD +when transcoding to Unicode. +.TP +.B skip +Ignore the invalid data. +.TP +.B stop +Stop with an error when encountering invalid data. +This is the default callback. +.TP +.B escape +Same as +.BR escape-icu . +.TP +.B escape-icu +Replace the missing characters with a string of the format +.BR %U\fIhhhh\fP +for plane 0 characters, and +.BR %U\fIhhhh\fP%U\fIhhhh\fP +for planes 1 and above characters, +where +.I hhhh +is the hexadecimal value of one of the UTF-16 code units representing the +character. Characters from planes 1 and above are written as a pair of +UTF-16 surrogate code units. +.TP +.B escape-java +Replace the missing characters with a string of the format +.BR \eu\fIhhhh\fP +for plane 0 characters, and +.BR \eu\fIhhhh\fP\eu\fIhhhh\fP +for planes 1 and above characters, +where +.I hhhh +is the hexadecimal value of one of the UTF-16 code units representing the +character. Characters from planes 1 and above are written as a pair of +UTF-16 surrogate code units. +.TP +.B escape-c +Replace the missing characters with a string of the format +.BR \eu\fIhhhh\fP +for plane 0 characters, and +.BR \eU\fIhhhhhhhh\fP +for planes 1 and above characters, +where +.I hhhh +and +.I hhhhhhhh +are the hexadecimal values of the Unicode codepoint. +.TP +.B escape-xml +Same as +.BR escape-xml-hex . +.TP +.B escape-xml-hex +Replace the missing characters with a string of the format +.BR &#x\fIhhhh\fP; , +where +.I hhhh +is the hexadecimal value of the Unicode codepoint. +.TP +.B escape-xml-dec +Replace the missing characters with a string of the format +.BR &#\fInnnn\fP; , +where +.I nnnn +is the decimal value of the Unicode codepoint. +.TP +.B escape-unicode +Replace the missing characters with a string of the format +.BR {U+\fIhhhh\fP} , +where +.I hhhh +is the hexadecimal value of the Unicode codepoint. +That hexadecimal string is of variable length and can use from 4 to +6 digits. +This is the format universally used to denote a Unicode codepoint in +the literature, delimited by curly braces for easy recognition of those +substitutions in the output. +.SH EXAMPLES +Convert data from a given +.I encoding +to the platform encoding: + +.RS 4 +.B \fR$ \fPuconv \-f \fIencoding\fP +.RE +.PP +Check if a +.I file +contains valid data for a given +.IR encoding : + +.RS 4 +.B \fR$ \fPuconv \-f \fIencoding\fP \-c \fIfile\fP >/dev/null +.RE +.PP +Convert a UTF-8 +.I file +to a given +.I encoding +and ensure that the resulting text is good for any version of HTML: + +.RS 4 +.B \fR$ \fPuconv \-f utf-8 \-t \fIencoding\fP \e +.br +.B " \-\-callback escape-xml-dec \fIfile\fP" +.RE +.PP +Display the names of the Unicode code points in a UTF-file: + +.RS 4 +.B \fR$ \fPuconv \-f utf-8 \-x any-name \fIfile\fP +.RE +.PP +Print the name of a Unicode code point whose value is known (\fBU+30AB\fP +in this example): + +.RS 4 +.B \fR$ \fPecho '\eu30ab' | uconv \-x 'hex-any; any-name'; echo +.br +{KATAKANA LETTER KA}{LINE FEED} +.br +$ +.RE + +(The names are delimited by curly braces. +Also, the name of the line terminator is also displayed.) +.PP +Normalize UTF-8 data using Unicode NFKC, remove all control characters, +and map Katakana to Hiragana: + +.RS 4 +.B \fR$ \fPuconv \-f utf-8 \-t utf-8 \e +.br +.B " \-x '::nfkc; [:Cc:] >; ::katakana-hiragana;'" +.SH CAVEATS AND BUGS +.B uconv +does report errors as occurring at the first invalid byte +encountered. This may be confusing to users of GNU +.BR iconv (1), +which reports errors as occurring at the first byte of an invalid +sequence. For multi-byte character sets or encodings, this means that +.BR uconv +error positions may be at a later offset in the input stream than +would be the case with GNU +.BR iconv (1). +.PP +The reporting of error positions when a transliterator is used may be +inaccurate or unavailable, in which case +.BR uconv +will report the offset in the output stream at which the error +occurred. +.SH AUTHORS +Jonas Utterstroem +.br +Yves Arrouye +.SH VERSION +@VERSION@ +.SH COPYRIGHT +Copyright (C) 2000-2005 IBM, Inc. and others. +.SH SEE ALSO +.BR iconv (1) diff --git a/intl/icu/source/extra/uconv/uconv.cpp b/intl/icu/source/extra/uconv/uconv.cpp new file mode 100644 index 0000000000..0f4af65663 --- /dev/null +++ b/intl/icu/source/extra/uconv/uconv.cpp @@ -0,0 +1,1397 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/***************************************************************************** +* +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + +/* + * uconv(1): an iconv(1)-like converter using ICU. + * + * Original code by Jonas Utterström <jonas.utterstrom@vittran.norrnod.se> + * contributed in 1999. + * + * Conversion to the C conversion API and many improvements by + * Yves Arrouye <yves@realnames.com>, current maintainer. + * + * Markus Scherer maintainer from 2003. + * See source code repository history for changes. + */ + +#include <unicode/utypes.h> +#include <unicode/putil.h> +#include <unicode/ucnv.h> +#include <unicode/uenum.h> +#include <unicode/unistr.h> +#include <unicode/translit.h> +#include <unicode/uset.h> +#include <unicode/uclean.h> +#include <unicode/utf16.h> + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include "cmemory.h" +#include "cstring.h" +#include "ustrfmt.h" + +#include "unicode/uwmsg.h" + +U_NAMESPACE_USE + +#if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__) +#include <io.h> +#include <fcntl.h> +#if U_PLATFORM_USES_ONLY_WIN32_API +#define USE_FILENO_BINARY_MODE 1 +/* Windows likes to rename Unix-like functions */ +#ifndef fileno +#define fileno _fileno +#endif +#ifndef setmode +#define setmode _setmode +#endif +#ifndef O_BINARY +#define O_BINARY _O_BINARY +#endif +#endif +#endif + +#ifdef UCONVMSG_LINK +/* below from the README */ +#include "unicode/utypes.h" +#include "unicode/udata.h" +U_CFUNC char uconvmsg_dat[]; +#endif + +#define DEFAULT_BUFSZ 4096 +#define UCONVMSG "uconvmsg" + +static UResourceBundle *gBundle = 0; /* Bundle containing messages. */ + +/* + * Initialize the message bundle so that message strings can be fetched + * by u_wmsg(). + * + */ + +static void initMsg(const char *pname) { + static int ps = 0; + + if (!ps) { + char dataPath[2048]; /* XXX Sloppy: should be PATH_MAX. */ + UErrorCode err = U_ZERO_ERROR; + + ps = 1; + + /* Set up our static data - if any */ +#if defined(UCONVMSG_LINK) && U_PLATFORM != U_PF_OS390 /* On z/OS, this is failing. */ + udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err); + if (U_FAILURE(err)) { + fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n", + pname, u_errorName(err)); + err = U_ZERO_ERROR; /* It may still fail */ + } +#endif + + /* Get messages. */ + gBundle = u_wmsg_setPath(UCONVMSG, &err); + if (U_FAILURE(err)) { + fprintf(stderr, + "%s: warning: couldn't open bundle %s: %s\n", + pname, UCONVMSG, u_errorName(err)); +#ifdef UCONVMSG_LINK + fprintf(stderr, + "%s: setAppData was called, internal data %s failed to load\n", + pname, UCONVMSG); +#endif + + err = U_ZERO_ERROR; + /* that was try #1, try again with a path */ + uprv_strcpy(dataPath, u_getDataDirectory()); + uprv_strcat(dataPath, U_FILE_SEP_STRING); + uprv_strcat(dataPath, UCONVMSG); + + gBundle = u_wmsg_setPath(dataPath, &err); + if (U_FAILURE(err)) { + fprintf(stderr, + "%s: warning: still couldn't open bundle %s: %s\n", + pname, dataPath, u_errorName(err)); + fprintf(stderr, "%s: warning: messages will not be displayed\n", pname); + } + } + } +} + +/* Mapping of callback names to the callbacks passed to the converter + API. */ + +static struct callback_ent { + const char *name; + UConverterFromUCallback fromu; + const void *fromuctxt; + UConverterToUCallback tou; + const void *touctxt; +} transcode_callbacks[] = { + { "substitute", + UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, + UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 }, + { "skip", + UCNV_FROM_U_CALLBACK_SKIP, 0, + UCNV_TO_U_CALLBACK_SKIP, 0 }, + { "stop", + UCNV_FROM_U_CALLBACK_STOP, 0, + UCNV_TO_U_CALLBACK_STOP, 0 }, + { "escape", + UCNV_FROM_U_CALLBACK_ESCAPE, 0, + UCNV_TO_U_CALLBACK_ESCAPE, 0}, + { "escape-icu", + UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, + UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU }, + { "escape-java", + UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, + UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA }, + { "escape-c", + UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, + UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C }, + { "escape-xml", + UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, + UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, + { "escape-xml-hex", + UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, + UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, + { "escape-xml-dec", + UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, + UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC }, + { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, + UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE } +}; + +/* Return a pointer to a callback record given its name. */ + +static const struct callback_ent *findCallback(const char *name) { + int i, count = + UPRV_LENGTHOF(transcode_callbacks); + + /* We'll do a linear search, there aren't many of them and bsearch() + may not be that portable. */ + + for (i = 0; i < count; ++i) { + if (!uprv_stricmp(name, transcode_callbacks[i].name)) { + return &transcode_callbacks[i]; + } + } + + return 0; +} + +/* Print converter information. If lookfor is set, only that converter will + be printed, otherwise all converters will be printed. If canon is non + zero, tags and aliases for each converter are printed too, in the format + expected for convrters.txt(5). */ + +static int printConverters(const char *pname, const char *lookfor, + UBool canon) +{ + UErrorCode err = U_ZERO_ERROR; + int32_t num; + uint16_t num_stds; + const char **stds; + + /* If there is a specified name, just handle that now. */ + + if (lookfor) { + if (!canon) { + printf("%s\n", lookfor); + return 0; + } else { + /* Because we are printing a canonical name, we need the + true converter name. We've done that already except for + the default name (because we want to print the exact + name one would get when calling ucnv_getDefaultName() + in non-canon mode). But since we do not know at this + point if we have the default name or something else, we + need to normalize again to the canonical converter + name. */ + + const char *truename = ucnv_getAlias(lookfor, 0, &err); + if (U_SUCCESS(err)) { + lookfor = truename; + } else { + err = U_ZERO_ERROR; + } + } + } + + /* Print converter names. We come here for one of two reasons: we + are printing all the names (lookfor was null), or we have a + single converter to print but in canon mode, hence we need to + get to it in order to print everything. */ + + num = ucnv_countAvailable(); + if (num <= 0) { + initMsg(pname); + u_wmsg(stderr, "cantGetNames"); + return -1; + } + if (lookfor) { + num = 1; /* We know where we want to be. */ + } + + num_stds = ucnv_countStandards(); + stds = (const char **) uprv_malloc(num_stds * sizeof(*stds)); + if (!stds) { + u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR)); + return -1; + } else { + uint16_t s; + + if (canon) { + printf("{ "); + } + for (s = 0; s < num_stds; ++s) { + stds[s] = ucnv_getStandard(s, &err); + if (canon) { + printf("%s ", stds[s]); + } + if (U_FAILURE(err)) { + u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err)); + goto error_cleanup; + } + } + if (canon) { + puts("}"); + } + } + + for (int32_t i = 0; i < num; i++) { + const char *name; + uint16_t num_aliases; + + /* Set the name either to what we are looking for, or + to the current converter name. */ + + if (lookfor) { + name = lookfor; + } else { + name = ucnv_getAvailableName(i); + } + + /* Get all the aliases associated to the name. */ + + err = U_ZERO_ERROR; + num_aliases = ucnv_countAliases(name, &err); + if (U_FAILURE(err)) { + printf("%s", name); + + UnicodeString str(name, ""); + putchar('\t'); + u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), + u_wmsg_errorName(err)); + goto error_cleanup; + } else { + uint16_t a, s, t; + + /* Write all the aliases and their tags. */ + + for (a = 0; a < num_aliases; ++a) { + const char *alias = ucnv_getAlias(name, a, &err); + + if (U_FAILURE(err)) { + UnicodeString str(name, ""); + putchar('\t'); + u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), + u_wmsg_errorName(err)); + goto error_cleanup; + } + + /* Print the current alias so that it looks right. */ + printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") , + alias, + (canon ? "" : " ")); + + /* Look (slowly, linear searching) for a tag. */ + + if (canon) { + /* -1 to skip the last standard */ + for (s = t = 0; s < num_stds-1; ++s) { + UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err); + if (U_SUCCESS(err)) { + /* List the standard tags */ + const char *standardName; + UBool isFirst = true; + UErrorCode enumError = U_ZERO_ERROR; + while ((standardName = uenum_next(nameEnum, nullptr, &enumError))) { + /* See if this alias is supported by this standard. */ + if (!strcmp(standardName, alias)) { + if (!t) { + printf(" {"); + t = 1; + } + /* Print a * after the default standard name */ + printf(" %s%s", stds[s], (isFirst ? "*" : "")); + } + isFirst = false; + } + } + } + if (t) { + printf(" }"); + } + } + /* Terminate this entry. */ + if (canon) { + puts(""); + } + + /* Move on. */ + } + /* Terminate this entry. */ + if (!canon) { + puts(""); + } + } + } + + /* Free temporary data. */ + + uprv_free(stds); + + /* Success. */ + + return 0; +error_cleanup: + uprv_free(stds); + return -1; +} + +/* Print all available transliterators. If canon is non zero, print + one transliterator per line. */ + +static int printTransliterators(UBool canon) +{ +#if UCONFIG_NO_TRANSLITERATION + printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n"); + return 1; +#else + UErrorCode status = U_ZERO_ERROR; + UEnumeration *ids = utrans_openIDs(&status); + int32_t i, numtrans = uenum_count(ids, &status); + + char sepchar = canon ? '\n' : ' '; + + for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) { + int32_t len; + const char *nextTrans = uenum_next(ids, &len, &status); + + printf("%s", nextTrans); + if (i < numtrans - 1) { + putchar(sepchar); + } + } + + uenum_close(ids); + + /* Add a terminating newline if needed. */ + + if (sepchar != '\n') { + putchar('\n'); + } + + /* Success. */ + + return 0; +#endif +} + +enum { + uSP = 0x20, // space + uCR = 0xd, // carriage return + uLF = 0xa, // line feed + uNL = 0x85, // newline + uLS = 0x2028, // line separator + uPS = 0x2029, // paragraph separator + uSig = 0xfeff // signature/BOM character +}; + +static inline int32_t +getChunkLimit(const UnicodeString &prev, const UnicodeString &s) { + // find one of + // CR, LF, CRLF, NL, LS, PS + // for paragraph ends (see UAX #13/Unicode 4) + // and include it in the chunk + // all of these characters are on the BMP + // do not include FF or VT in case they are part of a paragraph + // (important for bidi contexts) + static const char16_t paraEnds[] = { + 0xd, 0xa, 0x85, 0x2028, 0x2029 + }; + enum { + iCR, iLF, iNL, iLS, iPS, iCount + }; + + // first, see if there is a CRLF split between prev and s + if (prev.endsWith(paraEnds + iCR, 1)) { + if (s.startsWith(paraEnds + iLF, 1)) { + return 1; // split CRLF, include the LF + } else if (!s.isEmpty()) { + return 0; // complete the last chunk + } else { + return -1; // wait for actual further contents to arrive + } + } + + const char16_t *u = s.getBuffer(), *limit = u + s.length(); + char16_t c; + + while (u < limit) { + c = *u++; + if ( + ((c < uSP) && (c == uCR || c == uLF)) || + (c == uNL) || + ((c & uLS) == uLS) + ) { + if (c == uCR) { + // check for CRLF + if (u == limit) { + return -1; // LF may be in the next chunk + } else if (*u == uLF) { + ++u; // include the LF in this chunk + } + } + return (int32_t)(u - s.getBuffer()); + } + } + + return -1; // continue collecting the chunk +} + +enum { + CNV_NO_FEFF, // cannot convert the U+FEFF Unicode signature character (BOM) + CNV_WITH_FEFF, // can convert the U+FEFF signature character + CNV_ADDS_FEFF // automatically adds/detects the U+FEFF signature character +}; + +static inline char16_t +nibbleToHex(uint8_t n) { + n &= 0xf; + return + n <= 9 ? + (char16_t)(0x30 + n) : + (char16_t)((0x61 - 10) + n); +} + +// check the converter's Unicode signature properties; +// the fromUnicode side of the converter must be in its initial state +// and will be reset again if it was used +static int32_t +cnvSigType(UConverter *cnv) { + UErrorCode err; + int32_t result; + + // test if the output charset can convert U+FEFF + USet *set = uset_open(1, 0); + err = U_ZERO_ERROR; + ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err); + if (U_SUCCESS(err) && uset_contains(set, uSig)) { + result = CNV_WITH_FEFF; + } else { + result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted + } + uset_close(set); + + if (result == CNV_WITH_FEFF) { + // test if the output charset emits a signature anyway + const char16_t a[1] = { 0x61 }; // "a" + const char16_t *in; + + char buffer[20]; + char *out; + + in = a; + out = buffer; + err = U_ZERO_ERROR; + ucnv_fromUnicode(cnv, + &out, buffer + sizeof(buffer), + &in, a + 1, + nullptr, true, &err); + ucnv_resetFromUnicode(cnv); + + if (nullptr != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), nullptr, &err) && + U_SUCCESS(err) + ) { + result = CNV_ADDS_FEFF; + } + } + + return result; +} + +class ConvertFile { +public: + ConvertFile() : + buf(nullptr), outbuf(nullptr), fromoffsets(nullptr), + bufsz(0), signature(0) {} + + void + setBufferSize(size_t bufferSize) { + bufsz = bufferSize; + + buf = new char[2 * bufsz]; + outbuf = buf + bufsz; + + // +1 for an added U+FEFF in the intermediate Unicode buffer + fromoffsets = new int32_t[bufsz + 1]; + } + + ~ConvertFile() { + delete [] buf; + delete [] fromoffsets; + } + + UBool convertFile(const char *pname, + const char *fromcpage, + UConverterToUCallback toucallback, + const void *touctxt, + const char *tocpage, + UConverterFromUCallback fromucallback, + const void *fromuctxt, + UBool fallback, + const char *translit, + const char *infilestr, + FILE * outfile, int verbose); +private: + friend int main(int argc, char **argv); + + char *buf, *outbuf; + int32_t *fromoffsets; + + size_t bufsz; + int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character +}; + +// Convert a file from one encoding to another +UBool +ConvertFile::convertFile(const char *pname, + const char *fromcpage, + UConverterToUCallback toucallback, + const void *touctxt, + const char *tocpage, + UConverterFromUCallback fromucallback, + const void *fromuctxt, + UBool fallback, + const char *translit, + const char *infilestr, + FILE * outfile, int verbose) +{ + FILE *infile; + UBool ret = true; + UConverter *convfrom = 0; + UConverter *convto = 0; + UErrorCode err = U_ZERO_ERROR; + UBool flush; + UBool closeFile = false; + const char *cbufp, *prevbufp; + char *bufp; + + uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */ + + const char16_t *unibuf, *unibufbp; + char16_t *unibufp; + + size_t rd, wr; + +#if !UCONFIG_NO_TRANSLITERATION + Transliterator *t = 0; // Transliterator acting on Unicode data. + UnicodeString chunk; // One chunk of the text being collected for transformation. +#endif + UnicodeString u; // String to do the transliteration. + int32_t ulen; + + // use conversion offsets for error messages + // unless a transliterator is used - + // a text transformation will reorder characters in unpredictable ways + UBool useOffsets = true; + + // Open the correct input file or connect to stdin for reading input + + if (infilestr != 0 && strcmp(infilestr, "-")) { + infile = fopen(infilestr, "rb"); + if (infile == 0) { + UnicodeString str1(infilestr, ""); + str1.append((UChar32) 0); + UnicodeString str2(strerror(errno), ""); + str2.append((UChar32) 0); + initMsg(pname); + u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer()); + return false; + } + closeFile = true; + } else { + infilestr = "-"; + infile = stdin; +#ifdef USE_FILENO_BINARY_MODE + if (setmode(fileno(stdin), O_BINARY) == -1) { + initMsg(pname); + u_wmsg(stderr, "cantSetInBinMode"); + return false; + } +#endif + } + + if (verbose) { + fprintf(stderr, "%s:\n", infilestr); + } + +#if !UCONFIG_NO_TRANSLITERATION + // Create transliterator as needed. + + if (translit != nullptr && *translit) { + UParseError parse; + UnicodeString str(translit), pestr; + + /* Create from rules or by ID as needed. */ + + parse.line = -1; + + if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) { + t = Transliterator::createFromRules(UNICODE_STRING_SIMPLE("Uconv"), str, UTRANS_FORWARD, parse, err); + } else { + t = Transliterator::createInstance(UnicodeString(translit, -1, US_INV), UTRANS_FORWARD, err); + } + + if (U_FAILURE(err)) { + str.append((UChar32) 0); + initMsg(pname); + + if (parse.line >= 0) { + char16_t linebuf[20], offsetbuf[20]; + uprv_itou(linebuf, 20, parse.line, 10, 0); + uprv_itou(offsetbuf, 20, parse.offset, 10, 0); + u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(), + u_wmsg_errorName(err), linebuf, offsetbuf); + } else { + u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(), + u_wmsg_errorName(err)); + } + + if (t) { + delete t; + t = 0; + } + goto error_exit; + } + + useOffsets = false; + } +#endif + + // Create codepage converter. If the codepage or its aliases weren't + // available, it returns nullptr and a failure code. We also set the + // callbacks, and return errors in the same way. + + convfrom = ucnv_open(fromcpage, &err); + if (U_FAILURE(err)) { + UnicodeString str(fromcpage, ""); + initMsg(pname); + u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(), + u_wmsg_errorName(err)); + goto error_exit; + } + ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err); + if (U_FAILURE(err)) { + initMsg(pname); + u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); + goto error_exit; + } + + convto = ucnv_open(tocpage, &err); + if (U_FAILURE(err)) { + UnicodeString str(tocpage, ""); + initMsg(pname); + u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(), + u_wmsg_errorName(err)); + goto error_exit; + } + ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err); + if (U_FAILURE(err)) { + initMsg(pname); + u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); + goto error_exit; + } + ucnv_setFallback(convto, fallback); + + UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode; + int8_t sig; + + // OK, we can convert now. + sig = signature; + rd = 0; + + do { + willexit = false; + + // input file offset at the beginning of the next buffer + infoffset += static_cast<uint32_t>(rd); + + rd = fread(buf, 1, bufsz, infile); + if (ferror(infile) != 0) { + UnicodeString str(strerror(errno)); + initMsg(pname); + u_wmsg(stderr, "cantRead", str.getTerminatedBuffer()); + goto error_exit; + } + + // Convert the read buffer into the new encoding via Unicode. + // After the call 'unibufp' will be placed behind the last + // character that was converted in the 'unibuf'. + // Also the 'cbufp' is positioned behind the last converted + // character. + // At the last conversion in the file, flush should be set to + // true so that we get all characters converted. + // + // The converter must be flushed at the end of conversion so + // that characters on hold also will be written. + + cbufp = buf; + flush = (UBool)(rd != bufsz); + + // convert until the input is consumed + do { + // remember the start of the current byte-to-Unicode conversion + prevbufp = cbufp; + + unibuf = unibufp = u.getBuffer((int32_t)bufsz); + + // Use bufsz instead of u.getCapacity() for the targetLimit + // so that we don't overflow fromoffsets[]. + ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp, + buf + rd, useOffsets ? fromoffsets : nullptr, flush, &err); + + ulen = (int32_t)(unibufp - unibuf); + u.releaseBuffer(U_SUCCESS(err) ? ulen : 0); + + // fromSawEndOfBytes indicates that ucnv_toUnicode() is done + // converting all of the input bytes. + // It works like this because ucnv_toUnicode() returns only under the + // following conditions: + // - an error occurred during conversion (an error code is set) + // - the target buffer is filled (the error code indicates an overflow) + // - the source is consumed + // That is, if the error code does not indicate a failure, + // not even an overflow, then the source must be consumed entirely. + fromSawEndOfBytes = (UBool)U_SUCCESS(err); + + if (err == U_BUFFER_OVERFLOW_ERROR) { + err = U_ZERO_ERROR; + } else if (U_FAILURE(err)) { + char pos[32], errorBytes[32]; + int8_t i, length, errorLength; + + UErrorCode localError = U_ZERO_ERROR; + errorLength = (int8_t)sizeof(errorBytes); + ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError); + if (U_FAILURE(localError) || errorLength == 0) { + errorLength = 1; + } + + // print the input file offset of the start of the error bytes: + // input file offset of the current byte buffer + + // length of the just consumed bytes - + // length of the error bytes + length = + (int8_t)snprintf(pos, sizeof(pos), "%d", + (int)(infoffset + (cbufp - buf) - errorLength)); + + // output the bytes that caused the error + UnicodeString str; + for (i = 0; i < errorLength; ++i) { + if (i > 0) { + str.append((char16_t)uSP); + } + str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4)); + str.append(nibbleToHex((uint8_t)errorBytes[i])); + } + + initMsg(pname); + u_wmsg(stderr, "problemCvtToU", + UnicodeString(pos, length, "").getTerminatedBuffer(), + str.getTerminatedBuffer(), + u_wmsg_errorName(err)); + + willexit = true; + err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ + } + + // Replaced a check for whether the input was consumed by + // looping until it is; message key "premEndInput" now obsolete. + + if (ulen == 0) { + continue; + } + + // remove a U+FEFF Unicode signature character if requested + if (sig < 0) { + if (u.charAt(0) == uSig) { + u.remove(0, 1); + + // account for the removed char16_t and offset + --ulen; + + if (useOffsets) { + // remove an offset from fromoffsets[] as well + // to keep the array parallel with the UChars + memmove(fromoffsets, fromoffsets + 1, ulen * 4); + } + + } + sig = 0; + } + +#if !UCONFIG_NO_TRANSLITERATION + // Transliterate/transform if needed. + + // For transformation, we use chunking code - + // collect Unicode input until, for example, an end-of-line, + // then transform and output-convert that and continue collecting. + // This makes the transformation result independent of the buffer size + // while avoiding the slower keyboard mode. + // The end-of-chunk characters are completely included in the + // transformed string in case they are to be transformed themselves. + if (t != nullptr) { + UnicodeString out; + int32_t chunkLimit; + + do { + chunkLimit = getChunkLimit(chunk, u); + if (chunkLimit < 0 && flush && fromSawEndOfBytes) { + // use all of the rest at the end of the text + chunkLimit = u.length(); + } + if (chunkLimit >= 0) { + // complete the chunk and transform it + chunk.append(u, 0, chunkLimit); + u.remove(0, chunkLimit); + t->transliterate(chunk); + + // append the transformation result to the result and empty the chunk + out.append(chunk); + chunk.remove(); + } else { + // continue collecting the chunk + chunk.append(u); + break; + } + } while (!u.isEmpty()); + + u = out; + ulen = u.length(); + } +#endif + + // add a U+FEFF Unicode signature character if requested + // and possible/necessary + if (sig > 0) { + if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) { + u.insert(0, (char16_t)uSig); + + if (useOffsets) { + // insert a pseudo-offset into fromoffsets[] as well + // to keep the array parallel with the UChars + memmove(fromoffsets + 1, fromoffsets, ulen * 4); + fromoffsets[0] = -1; + } + + // account for the additional char16_t and offset + ++ulen; + } + sig = 0; + } + + // Convert the Unicode buffer into the destination codepage + // Again 'bufp' will be placed behind the last converted character + // And 'unibufp' will be placed behind the last converted unicode character + // At the last conversion flush should be set to true to ensure that + // all characters left get converted + + unibuf = unibufbp = u.getBuffer(); + + do { + bufp = outbuf; + + // Use fromSawEndOfBytes in addition to the flush flag - + // it indicates whether the intermediate Unicode string + // contains the very last UChars for the very last input bytes. + ucnv_fromUnicode(convto, &bufp, outbuf + bufsz, + &unibufbp, + unibuf + ulen, + nullptr, (UBool)(flush && fromSawEndOfBytes), &err); + + // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done + // converting all of the intermediate UChars. + // See comment for fromSawEndOfBytes. + toSawEndOfUnicode = (UBool)U_SUCCESS(err); + + if (err == U_BUFFER_OVERFLOW_ERROR) { + err = U_ZERO_ERROR; + } else if (U_FAILURE(err)) { + char16_t errorUChars[4]; + const char *errtag; + char pos[32]; + UChar32 c; + int8_t i, length, errorLength; + + UErrorCode localError = U_ZERO_ERROR; + errorLength = UPRV_LENGTHOF(errorUChars); + ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError); + if (U_FAILURE(localError) || errorLength == 0) { + // need at least 1 so that we don't access beyond the length of fromoffsets[] + errorLength = 1; + } + + int32_t ferroffset; + + if (useOffsets) { + // Unicode buffer offset of the start of the error UChars + ferroffset = (int32_t)((unibufbp - unibuf) - errorLength); + if (ferroffset < 0) { + // approximation - the character started in the previous Unicode buffer + ferroffset = 0; + } + + // get the corresponding byte offset out of fromoffsets[] + // go back if the offset is not known for some of the UChars + int32_t fromoffset; + do { + fromoffset = fromoffsets[ferroffset]; + } while (fromoffset < 0 && --ferroffset >= 0); + + // total input file offset = + // input file offset of the current byte buffer + + // byte buffer offset of where the current Unicode buffer is converted from + + // fromoffsets[Unicode offset] + ferroffset = static_cast<int32_t>(infoffset + (prevbufp - buf) + fromoffset); + errtag = "problemCvtFromU"; + } else { + // Do not use fromoffsets if (t != nullptr) because the Unicode text may + // be different from what the offsets refer to. + + // output file offset + ferroffset = (int32_t)(outfoffset + (bufp - outbuf)); + errtag = "problemCvtFromUOut"; + } + + length = (int8_t)snprintf(pos, sizeof(pos), "%u", (int)ferroffset); + + // output the code points that caused the error + UnicodeString str; + for (i = 0; i < errorLength;) { + if (i > 0) { + str.append((char16_t)uSP); + } + U16_NEXT(errorUChars, i, errorLength, c); + if (c >= 0x100000) { + str.append(nibbleToHex((uint8_t)(c >> 20))); + } + if (c >= 0x10000) { + str.append(nibbleToHex((uint8_t)(c >> 16))); + } + str.append(nibbleToHex((uint8_t)(c >> 12))); + str.append(nibbleToHex((uint8_t)(c >> 8))); + str.append(nibbleToHex((uint8_t)(c >> 4))); + str.append(nibbleToHex((uint8_t)c)); + } + + initMsg(pname); + u_wmsg(stderr, errtag, + UnicodeString(pos, length, "").getTerminatedBuffer(), + str.getTerminatedBuffer(), + u_wmsg_errorName(err)); + u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer()); + + willexit = true; + err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ + } + + // Replaced a check for whether the intermediate Unicode characters were all consumed by + // looping until they are; message key "premEnd" now obsolete. + + // Finally, write the converted buffer to the output file + size_t outlen = (size_t) (bufp - outbuf); + outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile)); + if (wr != outlen) { + UnicodeString str(strerror(errno)); + initMsg(pname); + u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer()); + willexit = true; + } + + if (willexit) { + goto error_exit; + } + } while (!toSawEndOfUnicode); + } while (!fromSawEndOfBytes); + } while (!flush); // Stop when we have flushed the + // converters (this means that it's + // the end of output) + + goto normal_exit; + +error_exit: + ret = false; + +normal_exit: + // Cleanup. + + ucnv_close(convfrom); + ucnv_close(convto); + +#if !UCONFIG_NO_TRANSLITERATION + delete t; +#endif + + if (closeFile) { + fclose(infile); + } + + return ret; +} + +static void usage(const char *pname, int ecode) { + const char16_t *msg; + int32_t msgLen; + UErrorCode err = U_ZERO_ERROR; + FILE *fp = ecode ? stderr : stdout; + int res; + + initMsg(pname); + msg = + ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord", + &msgLen, &err); + UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1)); + UnicodeString mname(msg, msgLen + 1); + + res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer()); + if (!ecode) { + if (!res) { + fputc('\n', fp); + } + if (!u_wmsg(fp, "help")) { + /* Now dump callbacks and finish. */ + + int i, count = + UPRV_LENGTHOF(transcode_callbacks); + for (i = 0; i < count; ++i) { + fprintf(fp, " %s", transcode_callbacks[i].name); + } + fputc('\n', fp); + } + } + + exit(ecode); +} + +extern int +main(int argc, char **argv) +{ + FILE *outfile; + int ret = 0; + + size_t bufsz = DEFAULT_BUFSZ; + + const char *fromcpage = 0; + const char *tocpage = 0; + const char *translit = 0; + const char *outfilestr = 0; + UBool fallback = false; + + UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP; + const void *fromuctxt = 0; + UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP; + const void *touctxt = 0; + + char **iter, **remainArgv, **remainArgvLimit; + char **end = argv + argc; + + const char *pname; + + UBool printConvs = false, printCanon = false, printTranslits = false; + const char *printName = 0; + + UBool verbose = false; + UErrorCode status = U_ZERO_ERROR; + + ConvertFile cf; + + /* Initialize ICU */ + u_init(&status); + if (U_FAILURE(status)) { + fprintf(stderr, "%s: can not initialize ICU. status = %s\n", + argv[0], u_errorName(status)); + exit(1); + } + + // Get and prettify pname. + pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); +#if U_PLATFORM_USES_ONLY_WIN32_API + if (!pname) { + pname = uprv_strrchr(*argv, '/'); + } +#endif + if (!pname) { + pname = *argv; + } else { + ++pname; + } + + // First, get the arguments from command-line + // to know the codepages to convert between + + remainArgv = remainArgvLimit = argv + 1; + for (iter = argv + 1; iter != end; iter++) { + // Check for from charset + if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) { + iter++; + if (iter != end) + fromcpage = *iter; + else + usage(pname, 1); + } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) { + iter++; + if (iter != end) + tocpage = *iter; + else + usage(pname, 1); + } else if (strcmp("-x", *iter) == 0) { + iter++; + if (iter != end) + translit = *iter; + else + usage(pname, 1); + } else if (!strcmp("--fallback", *iter)) { + fallback = true; + } else if (!strcmp("--no-fallback", *iter)) { + fallback = false; + } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) { + iter++; + if (iter != end) { + bufsz = atoi(*iter); + if ((int) bufsz <= 0) { + initMsg(pname); + UnicodeString str(*iter); + initMsg(pname); + u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer()); + return 3; + } + } else { + usage(pname, 1); + } + } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) { + if (printTranslits) { + usage(pname, 1); + } + printConvs = true; + } else if (strcmp("--default-code", *iter) == 0) { + if (printTranslits) { + usage(pname, 1); + } + printName = ucnv_getDefaultName(); + } else if (strcmp("--list-code", *iter) == 0) { + if (printTranslits) { + usage(pname, 1); + } + + iter++; + if (iter != end) { + UErrorCode e = U_ZERO_ERROR; + printName = ucnv_getAlias(*iter, 0, &e); + if (U_FAILURE(e) || !printName) { + UnicodeString str(*iter); + initMsg(pname); + u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer()); + return 2; + } + } else + usage(pname, 1); + } else if (strcmp("--canon", *iter) == 0) { + printCanon = true; + } else if (strcmp("-L", *iter) == 0 + || !strcmp("--list-transliterators", *iter)) { + if (printConvs) { + usage(pname, 1); + } + printTranslits = true; + } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter) + || !strcmp("--help", *iter)) { + usage(pname, 0); + } else if (!strcmp("-c", *iter)) { + fromucallback = UCNV_FROM_U_CALLBACK_SKIP; + } else if (!strcmp("--to-callback", *iter)) { + iter++; + if (iter != end) { + const struct callback_ent *cbe = findCallback(*iter); + if (cbe) { + fromucallback = cbe->fromu; + fromuctxt = cbe->fromuctxt; + } else { + UnicodeString str(*iter); + initMsg(pname); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); + return 4; + } + } else { + usage(pname, 1); + } + } else if (!strcmp("--from-callback", *iter)) { + iter++; + if (iter != end) { + const struct callback_ent *cbe = findCallback(*iter); + if (cbe) { + toucallback = cbe->tou; + touctxt = cbe->touctxt; + } else { + UnicodeString str(*iter); + initMsg(pname); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); + return 4; + } + } else { + usage(pname, 1); + } + } else if (!strcmp("-i", *iter)) { + toucallback = UCNV_TO_U_CALLBACK_SKIP; + } else if (!strcmp("--callback", *iter)) { + iter++; + if (iter != end) { + const struct callback_ent *cbe = findCallback(*iter); + if (cbe) { + fromucallback = cbe->fromu; + fromuctxt = cbe->fromuctxt; + toucallback = cbe->tou; + touctxt = cbe->touctxt; + } else { + UnicodeString str(*iter); + initMsg(pname); + u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); + return 4; + } + } else { + usage(pname, 1); + } + } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) { + verbose = false; + } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) { + verbose = true; + } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) { + printf("%s v2.1 ICU " U_ICU_VERSION "\n", pname); + return 0; + } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) { + ++iter; + if (iter != end && !outfilestr) { + outfilestr = *iter; + } else { + usage(pname, 1); + } + } else if (0 == strcmp("--add-signature", *iter)) { + cf.signature = 1; + } else if (0 == strcmp("--remove-signature", *iter)) { + cf.signature = -1; + } else if (**iter == '-' && (*iter)[1]) { + usage(pname, 1); + } else { + // move a non-option up in argv[] + *remainArgvLimit++ = *iter; + } + } + + if (printConvs || printName) { + return printConverters(pname, printName, printCanon) ? 2 : 0; + } else if (printTranslits) { + return printTransliterators(printCanon) ? 3 : 0; + } + + if (!fromcpage || !uprv_strcmp(fromcpage, "-")) { + fromcpage = ucnv_getDefaultName(); + } + if (!tocpage || !uprv_strcmp(tocpage, "-")) { + tocpage = ucnv_getDefaultName(); + } + + // Open the correct output file or connect to stdout for reading input + if (outfilestr != 0 && strcmp(outfilestr, "-")) { + outfile = fopen(outfilestr, "wb"); + if (outfile == 0) { + UnicodeString str1(outfilestr, ""); + UnicodeString str2(strerror(errno), ""); + initMsg(pname); + u_wmsg(stderr, "cantCreateOutputF", + str1.getBuffer(), str2.getBuffer()); + return 1; + } + } else { + outfilestr = "-"; + outfile = stdout; +#ifdef USE_FILENO_BINARY_MODE + if (setmode(fileno(outfile), O_BINARY) == -1) { + u_wmsg(stderr, "cantSetOutBinMode"); + exit(-1); + } +#endif + } + + /* Loop again on the arguments to find all the input files, and + convert them. */ + + cf.setBufferSize(bufsz); + + if(remainArgv < remainArgvLimit) { + for (iter = remainArgv; iter != remainArgvLimit; iter++) { + if (!cf.convertFile( + pname, fromcpage, toucallback, touctxt, tocpage, + fromucallback, fromuctxt, fallback, translit, *iter, + outfile, verbose) + ) { + goto error_exit; + } + } + } else { + if (!cf.convertFile( + pname, fromcpage, toucallback, touctxt, tocpage, + fromucallback, fromuctxt, fallback, translit, 0, + outfile, verbose) + ) { + goto error_exit; + } + } + + goto normal_exit; +error_exit: +#if !UCONFIG_NO_LEGACY_CONVERSION + ret = 1; +#else + fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n"); +#endif +normal_exit: + + if (outfile != stdout) { + fclose(outfile); + } + + u_cleanup(); + + return ret; +} + + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/intl/icu/source/extra/uconv/uconv.vcxproj b/intl/icu/source/extra/uconv/uconv.vcxproj new file mode 100644 index 0000000000..e00a288f4d --- /dev/null +++ b/intl/icu/source/extra/uconv/uconv.vcxproj @@ -0,0 +1,97 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <PropertyGroup Label="Globals"> + <ProjectGuid>{DBA4088D-F6F9-4F8F-8820-082A4765C16C}</ProjectGuid> + </PropertyGroup> + <PropertyGroup Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseOfMfc>false</UseOfMfc> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <!-- The following import will include the 'default' configuration options for VS projects. --> + <Import Project="..\..\allinone\Build.Windows.ProjectConfiguration.props" /> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup> + <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion> + <OutDir>.\$(Platform)\$(Configuration)\</OutDir> + <IntDir>.\$(Platform)\$(Configuration)\</IntDir> + <MakeCFG>$(Platform)\$(Configuration)</MakeCFG> + <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. --> + <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</OutDir> + <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</IntDir> + <MakeCFG Condition="'$(Platform)'=='Win32'">x86\$(Configuration)</MakeCFG> + <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation --> + <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental> + <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental> + </PropertyGroup> + <!-- Options that are common to *all* configurations --> + <ItemDefinitionGroup> + <Midl> + <TypeLibraryName>$(OutDir)\uconv.tlb</TypeLibraryName> + </Midl> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <CompileAs>Default</CompileAs> + <DisableLanguageExtensions>true</DisableLanguageExtensions> + <AdditionalIncludeDirectories>..\..\..\include;..\..\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> + <PreprocessorDefinitions>UCONVMSG_LINK;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <PrecompiledHeaderOutputFile>$(OutDir)\uconv.pch</PrecompiledHeaderOutputFile> + <AssemblerListingLocation>$(OutDir)/</AssemblerListingLocation> + <ObjectFileName>$(OutDir)/</ObjectFileName> + <ProgramDataBaseFileName>$(OutDir)\uconv.pdb</ProgramDataBaseFileName> + </ClCompile> + <Link> + <SubSystem>Console</SubSystem> + <OutputFile>$(OutDir)\uconv.exe</OutputFile> + <AdditionalDependencies>uconvmsg.lib;%(AdditionalDependencies)</AdditionalDependencies> + <AdditionalLibraryDirectories>$(OutDir);..\..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> + </Link> + <CustomBuildStep> + <Command>copy "$(TargetPath)" ..\..\..\$(IcuBinOutputDir)</Command> + <Outputs>..\..\..\$(IcuBinOutputDir)\$(TargetFileName);%(Outputs)</Outputs> + </CustomBuildStep> + </ItemDefinitionGroup> + <!-- Options that are common to all 'Debug' project configurations --> + <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'"> + <ClCompile> + <BrowseInformation>true</BrowseInformation> + <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> + </ClCompile> + <Link> + <AdditionalDependencies>icuucd.lib;icuind.lib;%(AdditionalDependencies)</AdditionalDependencies> + </Link> + </ItemDefinitionGroup> + <!-- Options that are common to all 'Release' project configurations --> + <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'"> + <ClCompile> + <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary> + <FunctionLevelLinking>true</FunctionLevelLinking> + </ClCompile> + <Link> + <AdditionalDependencies>icuuc.lib;icuin.lib;%(AdditionalDependencies)</AdditionalDependencies> + </Link> + </ItemDefinitionGroup> + <ItemGroup> + <ClCompile Include="uconv.cpp" /> + <ClCompile Include="uwmsg.c" /> + </ItemGroup> + <ItemGroup> + <ClInclude Include="unicode\uwmsg.h" /> + </ItemGroup> + <ItemGroup> + <None Include="resources\fr.txt" /> + <None Include="resources\root.txt" /> + <CustomBuild Include="makedata.mak"> + <Command>nmake /nologo /f %(Filename).mak icup="$(ProjectDir)..\..\.." CFG=$(MakeCFG)</Command> + <Outputs>$(MakeCFG)\uconvmsg.lib;%(Outputs)</Outputs> + </CustomBuild> + <None Include="resfiles.mk" /> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project>
\ No newline at end of file diff --git a/intl/icu/source/extra/uconv/uconv.vcxproj.filters b/intl/icu/source/extra/uconv/uconv.vcxproj.filters new file mode 100644 index 0000000000..04ed73b5d2 --- /dev/null +++ b/intl/icu/source/extra/uconv/uconv.vcxproj.filters @@ -0,0 +1,50 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{c0129698-5425-4988-b34a-fe215c03a818}</UniqueIdentifier> + <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions> + </Filter> + <Filter Include="Header Files"> + <UniqueIdentifier>{8b1cc031-0c2e-41cf-a1f4-53e2ebdfc4ac}</UniqueIdentifier> + <Extensions>h;hpp;hxx;hm;inl</Extensions> + </Filter> + <Filter Include="Resource Bundles"> + <UniqueIdentifier>{bcebaef1-fbff-4aca-9a90-0f652e3fd00b}</UniqueIdentifier> + <Extensions>txt</Extensions> + </Filter> + <Filter Include="Build Scripts"> + <UniqueIdentifier>{7faa3049-c5bb-4b2c-ac0f-10b572b69c83}</UniqueIdentifier> + <Extensions>mak;mk;bat</Extensions> + </Filter> + </ItemGroup> + <ItemGroup> + <ClCompile Include="uconv.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + <ClCompile Include="uwmsg.c"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="unicode\uwmsg.h"> + <Filter>Header Files</Filter> + </ClInclude> + </ItemGroup> + <ItemGroup> + <None Include="resources\fr.txt"> + <Filter>Resource Bundles</Filter> + </None> + <None Include="resources\root.txt"> + <Filter>Resource Bundles</Filter> + </None> + <None Include="resfiles.mk"> + <Filter>Build Scripts</Filter> + </None> + </ItemGroup> + <ItemGroup> + <CustomBuild Include="makedata.mak"> + <Filter>Build Scripts</Filter> + </CustomBuild> + </ItemGroup> +</Project>
\ No newline at end of file diff --git a/intl/icu/source/extra/uconv/unicode/uwmsg.h b/intl/icu/source/extra/uconv/unicode/uwmsg.h new file mode 100644 index 0000000000..d8497d843f --- /dev/null +++ b/intl/icu/source/extra/uconv/unicode/uwmsg.h @@ -0,0 +1,31 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2004, International Business Machines Corporation +* and others. All Rights Reserved. +********************************************************************** + +Get a message out of the default resource bundle, messageformat it, +and print it to stderr +*/ + +#ifndef _UWMSG +#define _UWMSG + +#include <stdio.h> + +#include "unicode/ures.h" + +/* Set the path to wmsg's bundle. + Caller owns storage. +*/ +U_CFUNC UResourceBundle *u_wmsg_setPath(const char *path, UErrorCode *err); + +/* Format a message and print it's output to a given file stream */ +U_CFUNC int u_wmsg(FILE *fp, const char *tag, ... ); + +/* format an error message */ +U_CFUNC const UChar* u_wmsg_errorName(UErrorCode err); + +#endif diff --git a/intl/icu/source/extra/uconv/uwmsg.c b/intl/icu/source/extra/uconv/uwmsg.c new file mode 100644 index 0000000000..2f611e94eb --- /dev/null +++ b/intl/icu/source/extra/uconv/uwmsg.c @@ -0,0 +1,267 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1998-2016, International Business Machines Corporation +* and others. All Rights Reserved. +********************************************************************** +* +* File uwmsg.c +* +* Modification History: +* +* Date Name Description +* 06/14/99 stephen Creation. +******************************************************************************* +*/ + +#include "unicode/ucnv.h" +#include "unicode/ustring.h" +#include "unicode/umsg.h" +#include "unicode/uwmsg.h" +#include "unicode/ures.h" +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" + +#include <stdbool.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +#define BUF_SIZE 128 + +/* Print a ustring to the specified FILE* in the default codepage */ +static void +uprint(const UChar *s, + int32_t sourceLen, + FILE *f, + UErrorCode *status) +{ + /* converter */ + UConverter *converter; + char buf [BUF_SIZE]; + const UChar *mySource; + const UChar *mySourceEnd; + char *myTarget; + int32_t arraySize; + + if(s == 0) return; + + /* set up the conversion parameters */ + mySource = s; + mySourceEnd = mySource + sourceLen; + myTarget = buf; + arraySize = BUF_SIZE; + + /* open a default converter */ + converter = ucnv_open(0, status); + + /* if we failed, clean up and exit */ + if(U_FAILURE(*status)) goto finish; + + /* perform the conversion */ + do { + /* reset the error code */ + *status = U_ZERO_ERROR; + + /* perform the conversion */ + ucnv_fromUnicode(converter, &myTarget, myTarget + arraySize, + &mySource, mySourceEnd, NULL, + true, status); + + /* Write the converted data to the FILE* */ + fwrite(buf, sizeof(char), myTarget - buf, f); + + /* update the conversion parameters*/ + myTarget = buf; + arraySize = BUF_SIZE; + } + while(*status == U_BUFFER_OVERFLOW_ERROR); + +finish: + + /* close the converter */ + ucnv_close(converter); +} + +static UResourceBundle *gBundle = NULL; + +U_STRING_DECL(gNoFormatting, " (UCONFIG_NO_FORMATTING see uconfig.h)", 38); + +U_CFUNC UResourceBundle *u_wmsg_setPath(const char *path, UErrorCode *err) +{ + if(U_FAILURE(*err)) + { + return 0; + } + + if(gBundle != NULL) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + else + { + UResourceBundle *b = NULL; + b = ures_open(path, NULL, err); + if(U_FAILURE(*err)) + { + return 0; + } + + gBundle = b; + + U_STRING_INIT(gNoFormatting, " (UCONFIG_NO_FORMATTING see uconfig.h)", 38); + } + + return gBundle; +} + +/* Format a message and print it's output to fp */ +U_CFUNC int u_wmsg(FILE *fp, const char *tag, ... ) +{ + const UChar *msg; + int32_t msgLen; + UErrorCode err = U_ZERO_ERROR; +#if !UCONFIG_NO_FORMATTING + va_list ap; +#endif + UChar result[4096]; + int32_t resultLength = UPRV_LENGTHOF(result); + + if(gBundle == NULL) + { +#if 0 + fprintf(stderr, "u_wmsg: No path set!!\n"); /* FIXME: codepage?? */ +#endif + return -1; + } + + msg = ures_getStringByKey(gBundle, tag, &msgLen, &err); + + if(U_FAILURE(err)) + { + return -1; + } + +#if UCONFIG_NO_FORMATTING + resultLength = UPRV_LENGTHOF(gNoFormatting); + if((msgLen + resultLength) <= UPRV_LENGTHOF(result)) { + memcpy(result, msg, msgLen * U_SIZEOF_UCHAR); + memcpy(result + msgLen, gNoFormatting, resultLength); + resultLength += msgLen; + uprint(result, resultLength, fp, &err); + } else { + uprint(msg,msgLen, fp, &err); + } +#else + (void)gNoFormatting; // suppress -Wunused-variable + va_start(ap, tag); + + resultLength = u_vformatMessage(uloc_getDefault(), msg, msgLen, result, resultLength, ap, &err); + + va_end(ap); + + if(U_FAILURE(err)) + { +#if 0 + fprintf(stderr, "u_wmsg: failed to format %s:%s, err %s\n", + uloc_getDefault(), + tag, + u_errorName(err)); +#endif + err = U_ZERO_ERROR; + uprint(msg,msgLen, fp, &err); + return -1; + } + + uprint(result, resultLength, fp, &err); +#endif + + if(U_FAILURE(err)) + { +#if 0 + fprintf(stderr, "u_wmsg: failed to print %s: %s, err %s\n", + uloc_getDefault(), + tag, + u_errorName(err)); +#endif + return -1; + } + + return 0; +} + +/* these will break if the # of messages change. simply add or remove 0's .. */ +UChar **gInfoMessages = NULL; + +UChar **gErrMessages = NULL; + +static const UChar *fetchErrorName(UErrorCode err) +{ + if (!gInfoMessages) { + gInfoMessages = (UChar **)malloc((U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START)*sizeof(UChar*)); + memset(gInfoMessages, 0, (U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START)*sizeof(UChar*)); + } + if (!gErrMessages) { + gErrMessages = (UChar **)malloc(U_ERROR_LIMIT*sizeof(UChar*)); + memset(gErrMessages, 0, U_ERROR_LIMIT*sizeof(UChar*)); + } + if(err>=0) + return gErrMessages[err]; + else + return gInfoMessages[err-U_ERROR_WARNING_START]; +} + +U_CFUNC const UChar *u_wmsg_errorName(UErrorCode err) +{ + UChar *msg; + int32_t msgLen; + UErrorCode subErr = U_ZERO_ERROR; + const char *textMsg = NULL; + + /* try the cache */ + msg = (UChar*)fetchErrorName(err); + + if(msg) + { + return msg; + } + + if(gBundle == NULL) + { + msg = NULL; + } + else + { + const char *errname = u_errorName(err); + if (errname) { + msg = (UChar*)ures_getStringByKey(gBundle, errname, &msgLen, &subErr); + if(U_FAILURE(subErr)) + { + msg = NULL; + } + } + } + + if(msg == NULL) /* Couldn't find it anywhere.. */ + { + char error[128]; + textMsg = u_errorName(err); + if (!textMsg) { + sprintf(error, "UNDOCUMENTED ICU ERROR %d", err); + textMsg = error; + } + msg = (UChar*)malloc((strlen(textMsg)+1)*sizeof(msg[0])); + u_charsToUChars(textMsg, msg, (int32_t)(strlen(textMsg)+1)); + } + + if(err>=0) + gErrMessages[err] = msg; + else + gInfoMessages[err-U_ERROR_WARNING_START] = msg; + + return msg; +} |