1 files changed, 605 insertions, 0 deletions
diff --git a/lib/util/charset/charset_macosxfs.c b/lib/util/charset/charset_macosxfs.c
new file mode 100644
index 0000000..2ecfdff
--- /dev/null
+++ b/lib/util/charset/charset_macosxfs.c
@@ -0,0 +1,605 @@
+/*
+   Unix SMB/CIFS implementation.
+   Samba charset module for Mac OS X/Darwin
+   Copyright (C) Benjamin Riefenstahl 2003
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * modules/charset_macosxfs.c
+ *
+ * A Samba charset module to use on Mac OS X/Darwin as the filesystem
+ * and display encoding.
+ *
+ * Actually two implementations are provided here.  The default
+ * implementation is based on the official CFString API.  The other is
+ * based on internal CFString APIs as defined in the OpenDarwin
+ * source.
+ */
+
+#include "replace.h"
+#include "charset.h"
+#include "charset_proto.h"
+#include "lib/util/debug.h"
+#undef realloc
+
+#ifdef DARWINOS
+
+/*
+ * Include OS frameworks.  These are only needed in this module.
+ */
+#include <CoreFoundation/CFString.h>
+
+/*
+ * See if autoconf has found us the internal headers in some form.
+ */
+#if defined(HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H)
+#	include <CoreFoundation/CFStringEncodingConverter.h>
+#	include <CoreFoundation/CFUnicodePrecomposition.h>
+#	define USE_INTERNAL_API 1
+#elif defined(HAVE_CFSTRINGENCODINGCONVERTER_H)
+#	include <CFStringEncodingConverter.h>
+#	include <CFUnicodePrecomposition.h>
+#	define USE_INTERNAL_API 1
+#endif
+
+/*
+ * Compile time configuration: Do we want debug output?
+ */
+/* #define DEBUG_STRINGS 1 */
+
+/*
+ * A simple, but efficient memory provider for our buffers.
+ */
+static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
+{
+	if (newsize > *size) {
+		*size = newsize + 128;
+		buffer = realloc(buffer, *size);
+	}
+	return buffer;
+}
+
+/*
+ * While there is a version of OpenDarwin for intel, the usual case is
+ * big-endian PPC.  So we need byte swapping to handle the
+ * little-endian byte order of the network protocol.  We also need an
+ * additional dynamic buffer to do this work for incoming data blocks,
+ * because we have to consider the original data as constant.
+ *
+ * We abstract the differences away by providing a simple facade with
+ * these functions/macros:
+ *
+ *	le_to_native(dst,src,len)
+ *	native_to_le(cp,len)
+ *	set_ucbuffer_with_le(buffer,bufsize,data,size)
+ *	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
+ */
+#ifdef WORDS_BIGENDIAN
+
+static inline void swap_bytes (char * dst, const char * src, size_t len)
+{
+	const char *srcend = src + len;
+	while (src < srcend) {
+		dst[0] = src[1];
+		dst[1] = src[0];
+		dst += 2;
+		src += 2;
+	}
+}
+static inline void swap_bytes_inplace (char * cp, size_t len)
+{
+	char temp;
+	char *end = cp + len;
+	while (cp  < end) {
+		temp = cp[1];
+		cp[1] = cp[0];
+		cp[0] = temp;
+		cp += 2;
+	}
+}
+
+#define le_to_native(dst,src,len)	swap_bytes(dst,src,len)
+#define native_to_le(cp,len)		swap_bytes_inplace(cp,len)
+#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
+	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
+
+#else	/* ! WORDS_BIGENDIAN */
+
+#define le_to_native(dst,src,len)	memcpy(dst,src,len)
+#define native_to_le(cp,len)		/* nothing */
+#define	set_ucbuffer_with_le(buffer,bufsize,data,size) \
+	(((void)(bufsize)),(UniChar*)(data))
+
+#endif
+
+static inline UniChar *set_ucbuffer_with_le_copy (
+	UniChar *buffer, size_t *bufsize,
+	const void *data, size_t size, size_t reserve)
+{
+	buffer = resize_buffer(buffer, bufsize, size+reserve);
+	le_to_native((char*)buffer,data,size);
+	return buffer;
+}
+
+
+/*
+ * A simple hexdump function for debugging error conditions.
+ */
+#define	debug_out(s)	DEBUG(0,(s))
+
+#ifdef DEBUG_STRINGS
+
+static void hexdump( const char * label, const char * s, size_t len )
+{
+	size_t restlen = len;
+	debug_out("<<<<<<<\n");
+	debug_out(label);
+	debug_out("\n");
+	while (restlen > 0) {
+		char line[100];
+		size_t i, j;
+		char * d = line;
+#undef sprintf
+		d += sprintf(d, "%04X ", (unsigned)(len-restlen));
+		*d++ = ' ';
+		for( i = 0; i<restlen && i<8; ++i ) {
+			d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
+		}
+		for( j = i; j<8; ++j ) {
+			d += sprintf(d, "   ");
+		}
+		*d++ = ' ';
+		for( i = 8; i<restlen && i<16; ++i ) {
+			d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
+		}
+		for( j = i; j<16; ++j ) {
+			d += sprintf(d, "   ");
+		}
+		*d++ = ' ';
+		for( i = 0; i<restlen && i<16; ++i ) {
+			if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
+				*d++ = '.';
+			else
+				*d++ = s[i];
+		}
+		*d++ = '\n';
+		*d = 0;
+		restlen -= i;
+		s += i;
+		debug_out(line);
+	}
+	debug_out(">>>>>>>\n");
+}
+
+#else	/* !DEBUG_STRINGS */
+
+#define hexdump(label,s,len) /* nothing */
+
+#endif
+
+
+#if !USE_INTERNAL_API
+
+/*
+ * An implementation based on documented Mac OS X APIs.
+ *
+ * This does a certain amount of memory management, creating and
+ * manipulating CFString objects.  We try to minimize the impact by
+ * keeping those objects around and re-using them.  We also use
+ * external backing store for the CFStrings where this is possible and
+ * benficial.
+ *
+ * The Unicode normalizations forms available at this level are
+ * generic, not specifically for the file system.  So they may not be
+ * perfect fits.
+ */
+size_t macosxfs_encoding_pull(
+	void *cd,				/* Encoder handle */
+	const char **inbuf, size_t *inbytesleft, /* Script string */
+	char **outbuf, size_t *outbytesleft)	/* UTF-16-LE string */
+{
+	static const int script_code = kCFStringEncodingUTF8;
+	static CFMutableStringRef cfstring = NULL;
+	size_t outsize;
+	CFRange range;
+
+	(void) cd; /* UNUSED */
+
+	if (0 == *inbytesleft) {
+		return 0;
+	}
+
+	if (NULL == cfstring) {
+		/*
+		 * A version with an external backing store as in the
+		 * push function should have been more efficient, but
+		 * testing shows, that it is actually slower (!).
+		 * Maybe kCFAllocatorDefault gets shortcut evaluation
+		 * internally, while kCFAllocatorNull doesn't.
+		 */
+		cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
+	}
+
+	/*
+	 * Three methods of appending to a CFString, choose the most
+	 * efficient.
+	 */
+	if (0 == (*inbuf)[*inbytesleft-1]) {
+		CFStringAppendCString(cfstring, *inbuf, script_code);
+	} else if (*inbytesleft <= 255) {
+		Str255 buffer;
+		buffer[0] = *inbytesleft;
+		memcpy(buffer+1, *inbuf, buffer[0]);
+		CFStringAppendPascalString(cfstring, buffer, script_code);
+	} else {
+		/*
+		 * We would like to use a fixed buffer and a loop
+		 * here, but then we can't guarantee that the input is
+		 * well-formed UTF-8, as we are supposed to do.
+		 */
+		static char *buffer = NULL;
+		static size_t buflen = 0;
+		buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
+		memcpy(buffer, *inbuf, *inbytesleft);
+		buffer[*inbytesleft] = 0;
+		CFStringAppendCString(cfstring, *inbuf, script_code);
+	}
+
+	/*
+	 * Compose characters, using the non-canonical composition
+	 * form.
+	 */
+	CFStringNormalize(cfstring, kCFStringNormalizationFormC);
+
+	outsize = CFStringGetLength(cfstring);
+	range = CFRangeMake(0,outsize);
+
+	if (outsize == 0) {
+		/*
+		 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
+		 * errors here.  That function will always pass 2
+		 * characters.  smbd/open.c:check_for_pipe() cuts a
+		 * patchname to 10 characters blindly.  Suppress the
+		 * debug output in those cases.
+		 */
+		if(2 != *inbytesleft && 10 != *inbytesleft) {
+			debug_out("String conversion: "
+				  "An unknown error occurred\n");
+			hexdump("UTF8->UTF16LE (old) input",
+				*inbuf, *inbytesleft);
+		}
+		errno = EILSEQ; /* Not sure, but this is what we have
+				 * actually seen. */
+		return -1;
+	}
+	if (outsize*2 > *outbytesleft) {
+		CFStringDelete(cfstring, range);
+		debug_out("String conversion: "
+			  "Output buffer too small\n");
+		hexdump("UTF8->UTF16LE (old) input",
+			*inbuf, *inbytesleft);
+		errno = E2BIG;
+		return -1;
+	}
+
+        CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
+	CFStringDelete(cfstring, range);
+
+	native_to_le(*outbuf, outsize*2);
+
+	/*
+	 * Add a converted null byte, if the CFString conversions
+	 * prevented that until now.
+	 */
+	if (0 == (*inbuf)[*inbytesleft-1] &&
+	    (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
+
+		if ((outsize*2+2) > *outbytesleft) {
+			debug_out("String conversion: "
+				  "Output buffer too small\n");
+			hexdump("UTF8->UTF16LE (old) input",
+				*inbuf, *inbytesleft);
+			errno = E2BIG;
+			return -1;
+		}
+
+		(*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
+		outsize += 2;
+	}
+
+	*inbuf += *inbytesleft;
+	*inbytesleft = 0;
+	*outbuf += outsize*2;
+	*outbytesleft -= outsize*2;
+
+	return 0;
+}
+
+size_t macosxfs_encoding_push(
+	void *cd,				/* Encoder handle */
+	const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
+	char **outbuf, size_t *outbytesleft)	/* Script string */
+{
+	static const int script_code = kCFStringEncodingUTF8;
+	static CFMutableStringRef cfstring = NULL;
+	static UniChar *buffer = NULL;
+	static size_t buflen = 0;
+	CFIndex outsize, cfsize, charsconverted;
+
+	(void) cd; /* UNUSED */
+
+	if (0 == *inbytesleft) {
+		return 0;
+	}
+
+	/*
+	 * We need a buffer that can hold 4 times the original data,
+	 * because that is the theoretical maximum that decomposition
+	 * can create currently (in Unicode 4.0).
+	 */
+	buffer = set_ucbuffer_with_le_copy(
+		buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
+
+	if (NULL == cfstring) {
+		cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
+			kCFAllocatorDefault,
+			buffer, *inbytesleft/2, buflen/2,
+			kCFAllocatorNull);
+	} else {
+		CFStringSetExternalCharactersNoCopy(
+			cfstring,
+			buffer, *inbytesleft/2, buflen/2);
+	}
+
+	/*
+	 * Decompose characters, using the non-canonical decomposition
+	 * form.
+	 *
+	 * NB: This isn't exactly what HFS+ wants (see note on
+	 * kCFStringEncodingUseHFSPlusCanonical in
+	 * CFStringEncodingConverter.h), but AFAIK it's the best that
+	 * the official API can do.
+	 */
+	CFStringNormalize(cfstring, kCFStringNormalizationFormD);
+
+	cfsize = CFStringGetLength(cfstring);
+	charsconverted = CFStringGetBytes(
+		cfstring, CFRangeMake(0,cfsize),
+		script_code, 0, false,
+		*(UInt8 **)outbuf, *outbytesleft, &outsize);
+
+	if (0 == charsconverted) {
+		debug_out("String conversion: "
+			  "Buffer too small or not convertible\n");
+		hexdump("UTF16LE->UTF8 (old) input",
+			*inbuf, *inbytesleft);
+		errno = EILSEQ; /* Probably more likely. */
+		return -1;
+	}
+
+	/*
+	 * Add a converted null byte, if the CFString conversions
+	 * prevented that until now.
+	 */
+	if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
+	    (0 != (*outbuf)[outsize-1])) {
+
+		if (((size_t)outsize+1) > *outbytesleft) {
+			debug_out("String conversion: "
+				  "Output buffer too small\n");
+			hexdump("UTF16LE->UTF8 (old) input",
+				*inbuf, *inbytesleft);
+			errno = E2BIG;
+			return -1;
+		}
+
+		(*outbuf)[outsize] = 0;
+		++outsize;
+	}
+
+	*inbuf += *inbytesleft;
+	*inbytesleft = 0;
+	*outbuf += outsize;
+	*outbytesleft -= outsize;
+
+	return 0;
+}
+
+#else /* USE_INTERNAL_API */
+
+/*
+ * An implementation based on internal code as known from the
+ * OpenDarwin CVS.
+ *
+ * This code doesn't need much memory management because it uses
+ * functions that operate on the raw memory directly.
+ *
+ * The push routine here is faster and more compatible with HFS+ than
+ * the other implementation above.  The pull routine is only faster
+ * for some strings, slightly slower for others.  The pull routine
+ * looses because it has to iterate over the data twice, once to
+ * decode UTF-8 and than to do the character composition required by
+ * Windows.
+ */
+static size_t macosxfs_encoding_pull(
+	void *cd,				/* Encoder handle */
+	const char **inbuf, size_t *inbytesleft, /* Script string */
+	char **outbuf, size_t *outbytesleft)	/* UTF-16-LE string */
+{
+	static const int script_code = kCFStringEncodingUTF8;
+	UInt32 srcCharsUsed = 0;
+	UInt32 dstCharsUsed = 0;
+	UInt32 result;
+	uint32_t dstDecomposedUsed = 0;
+	uint32_t dstPrecomposedUsed = 0;
+
+	(void) cd; /* UNUSED */
+
+	if (0 == *inbytesleft) {
+		return 0;
+	}
+
+        result = CFStringEncodingBytesToUnicode(
+		script_code, kCFStringEncodingComposeCombinings,
+		*inbuf, *inbytesleft, &srcCharsUsed,
+		(UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
+
+	switch(result) {
+	case kCFStringEncodingConversionSuccess:
+		if (*inbytesleft == srcCharsUsed) {
+			break;
+		}
+
+		FALL_THROUGH;
+	case kCFStringEncodingInsufficientOutputBufferLength:
+		debug_out("String conversion: "
+			  "Output buffer too small\n");
+		hexdump("UTF8->UTF16LE (new) input",
+			*inbuf, *inbytesleft);
+		errno = E2BIG;
+		return -1;
+	case kCFStringEncodingInvalidInputStream:
+		/*
+		 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
+		 * errors here.  That function will always pass 2
+		 * characters.  smbd/open.c:check_for_pipe() cuts a
+		 * patchname to 10 characters blindly.  Suppress the
+		 * debug output in those cases.
+		 */
+		if(2 != *inbytesleft && 10 != *inbytesleft) {
+			debug_out("String conversion: "
+				  "Invalid input sequence\n");
+			hexdump("UTF8->UTF16LE (new) input",
+				*inbuf, *inbytesleft);
+		}
+		errno = EILSEQ;
+		return -1;
+	case kCFStringEncodingConverterUnavailable:
+		debug_out("String conversion: "
+			  "Unknown encoding\n");
+		hexdump("UTF8->UTF16LE (new) input",
+			*inbuf, *inbytesleft);
+		errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * It doesn't look like CFStringEncodingBytesToUnicode() can
+	 * produce precomposed characters (flags=ComposeCombinings
+	 * doesn't do it), so we need another pass over the data here.
+	 * We can do this in-place, as the string can only get
+	 * shorter.
+	 *
+	 * (Actually in theory there should be an internal
+	 * decomposition and reordering before the actual composition
+	 * step.  But we should be able to rely on that we always get
+	 * fully decomposed strings for input, so this can't create
+	 * problems in reality.)
+	 */
+	CFUniCharPrecompose(
+		(const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
+		(UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
+
+	native_to_le(*outbuf, dstPrecomposedUsed*2);
+
+	*inbuf += srcCharsUsed;
+	*inbytesleft -= srcCharsUsed;
+	*outbuf += dstPrecomposedUsed*2;
+	*outbytesleft -= dstPrecomposedUsed*2;
+
+	return 0;
+}
+
+static size_t macosxfs_encoding_push(
+	void *cd,				/* Encoder handle */
+	const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
+	char **outbuf, size_t *outbytesleft)	/* Script string */
+{
+	static const int script_code = kCFStringEncodingUTF8;
+	static UniChar *buffer = NULL;
+	static size_t buflen = 0;
+	UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
+
+	(void) cd; /* UNUSED */
+
+	if (0 == *inbytesleft) {
+		return 0;
+	}
+
+	buffer = set_ucbuffer_with_le(
+		buffer, &buflen, *inbuf, *inbytesleft);
+
+	result = CFStringEncodingUnicodeToBytes(
+		script_code, kCFStringEncodingUseHFSPlusCanonical,
+		buffer, *inbytesleft/2, &srcCharsUsed,
+		*outbuf, *outbytesleft, &dstCharsUsed);
+
+	switch(result) {
+	case kCFStringEncodingConversionSuccess:
+		if (*inbytesleft/2 == srcCharsUsed) {
+			break;
+		}
+
+		FALL_THROUGH;
+	case kCFStringEncodingInsufficientOutputBufferLength:
+		debug_out("String conversion: "
+			  "Output buffer too small\n");
+		hexdump("UTF16LE->UTF8 (new) input",
+			*inbuf, *inbytesleft);
+		errno = E2BIG;
+		return -1;
+	case kCFStringEncodingInvalidInputStream:
+		/*
+		 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
+		 * cuts a pathname to 10 characters blindly.  Suppress
+		 * the debug output in those cases.
+		 */
+		if(10 != *inbytesleft) {
+			debug_out("String conversion: "
+				  "Invalid input sequence\n");
+			hexdump("UTF16LE->UTF8 (new) input",
+				*inbuf, *inbytesleft);
+		}
+		errno = EILSEQ;
+		return -1;
+	case kCFStringEncodingConverterUnavailable:
+		debug_out("String conversion: "
+			  "Unknown encoding\n");
+		hexdump("UTF16LE->UTF8 (new) input",
+			*inbuf, *inbytesleft);
+		errno = EINVAL;
+		return -1;
+	}
+
+	*inbuf += srcCharsUsed*2;
+	*inbytesleft -= srcCharsUsed*2;
+	*outbuf += dstCharsUsed;
+	*outbytesleft -= dstCharsUsed;
+
+	return 0;
+}
+
+#endif /* USE_INTERNAL_API */
+
+#else /* DARWIN */
+
+void charset_macosfs_dummy(void);
+void charset_macosfs_dummy(void)
+{
+	return;
+}
+
+#endif /* DARWIN */