decompose-filesystem-strings   [plain text]


Use CFString to do UTF=8 decomposition.

We used to use the UTF-8-MAC charset, but this does HFS-canonical
UTF-8 decomposition, and we really only want to mess with the
composition when we are accepting filesystem paths. In most other
cases, we just want to convert and leave the composition untouched.
There are routines that depend on this, eg. strchr_m.

When we are sending paths back to the client, the composition ought
to already be correct, so there's no need to do anything.

Index: samba/source/include/smb.h
===================================================================
--- samba/source/include/smb.h.orig
+++ samba/source/include/smb.h
@@ -64,6 +64,7 @@
 #define STR_ASCII 4
 #define STR_UNICODE 8
 #define STR_NOALIGN 16
+#define STR_FILESYSTEM 32
 #define STR_TERMINATE_ASCII 128
 
 /* how long to wait for secondary SMB packets (milli-seconds) */
Index: samba/source/lib/charcnv.c
===================================================================
--- samba/source/lib/charcnv.c.orig
+++ samba/source/lib/charcnv.c
@@ -5,6 +5,7 @@
    Copyright (C) Andrew Tridgell 2001
    Copyright (C) Simo Sorce 2001
    Copyright (C) Martin Pool 2003
+   Copyright (C) 2009 Apple Inc. All rights reserved.
    
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -357,6 +358,130 @@ static size_t convert_string_internal(ch
 	}
 }
 
+#if DARWINOS
+
+#include <CoreFoundation/CoreFoundation.h>
+#include <CoreFoundation/CFStringEncodingConverter.h>
+#include <CoreFoundation/CFUnicodePrecomposition.h>
+#include <libkern/OSByteOrder.h>
+
+/* Convert a canonically decomposed UTF8 into composed UTF16. */
+static size_t push_ucs2_path_darwin(
+	const void * src, size_t nsrcbytes,
+	void * dest, size_t ndestbytes)
+{
+    CFIndex consumed = 0;
+    CFIndex produced = 0;
+    uint32_t ret;
+
+    if (nsrcbytes == (size_t)-1) {
+	nsrcbytes = strlen(src) + 1;
+    }
+
+    ret = CFStringEncodingBytesToUnicode(kCFStringEncodingUTF8,
+	    kCFStringEncodingComposeCombinings | kCFStringEncodingAllowLossyConversion,
+	    src, nsrcbytes,
+	    &consumed,
+	    dest, ndestbytes / sizeof(uint16_t),
+	    &produced);
+
+    if (ret != kCFStringEncodingConversionSuccess) {
+	    if (ret == kCFStringEncodingInsufficientOutputBufferLength) {
+		errno = E2BIG;
+	    } else {
+		errno = EINVAL;
+	    }
+
+	    return -1;
+    }
+
+    CFMutableStringRef mref;
+
+    mref = CFStringCreateMutableWithExternalCharactersNoCopy(
+		kCFAllocatorDefault,
+		dest,
+		produced,
+		ndestbytes / sizeof(uint16_t),
+		kCFAllocatorNull);
+
+    if (!mref) {
+	    DEBUG(0, ("CFStringCreateMutableWithExternalCharactersNoCopy failed???\n"));
+	    errno = ENOMEM;
+	    return -1;
+    }
+
+    CFStringNormalize(mref,  kCFStringNormalizationFormC);
+
+    /* Track the new length. */
+    produced = CFStringGetLength(mref);
+    CFRelease(mref);
+
+    /* Need to byteswap from little to native endian. */
+    if (OSHostByteOrder() == OSBigEndian) {
+	size_t i;
+	uint16_t tmpval;
+
+	for (i = 0; i < ndestbytes; i += sizeof(uint16_t)) {
+	    tmpval = OSReadBigInt16(dest, i);
+	    OSWriteLittleInt16(dest, i, tmpval);
+	}
+    }
+
+    return produced * sizeof(uint16_t);
+}
+
+/* Convert a UTF16 string into canonically decomposed UTF8. */
+static size_t pull_ucs2_path_darwin(
+	const void * src, size_t nsrcbytes,
+	void * dest, size_t ndestbytes)
+{
+    CFIndex consumed = 0;
+    CFIndex produced = 0;
+    uint32_t ret;
+
+    void * tmpsrc = NULL;
+
+    if (nsrcbytes == (size_t)-1) {
+	    nsrcbytes = (strlen_w((const smb_ucs2_t *)src) + 1) * 2;
+    }
+
+    /* Need to byteswap from little to native endian. */
+    if (OSHostByteOrder() == OSBigEndian) {
+	size_t i;
+	uint16_t tmpval;
+
+	tmpsrc = SMB_MALLOC(nsrcbytes);
+
+	for (i = 0; i < nsrcbytes; i += sizeof(uint16_t)) {
+	    tmpval = OSReadBigInt16(src, i);
+	    OSWriteLittleInt16(tmpsrc, i, tmpval);
+	}
+    }
+
+    ret = CFStringEncodingUnicodeToBytes(kCFStringEncodingUTF8,
+	    kCFStringEncodingUseHFSPlusCanonical | kCFStringEncodingAllowLossyConversion,
+	    tmpsrc ? tmpsrc : src, nsrcbytes / sizeof(uint16_t),
+	    &consumed,
+	    dest, ndestbytes,
+	    &produced);
+
+    if (ret == kCFStringEncodingConversionSuccess) {
+	SAFE_FREE(tmpsrc);
+	return produced;
+    }
+
+    if (ret == kCFStringEncodingInsufficientOutputBufferLength) {
+	errno = E2BIG;
+    } else {
+	errno = EINVAL;
+    }
+
+    SAFE_FREE(tmpsrc);
+    return -1;
+}
+
+#endif /* DARWINOS */
+
 /**
  * Convert string from one encoding to another, making error checking etc
  * Fast path version - handles ASCII first.
@@ -1063,6 +1188,11 @@ size_t push_ucs2(const void *base_ptr, v
 	/* ucs2 is always a multiple of 2 bytes */
 	dest_len &= ~1;
 
+#if DARWINOS
+	if (flags & STR_FILESYSTEM) {
+	    ret = push_ucs2_path_darwin(src, src_len, dest, dest_len);
+	} else
+#endif /* DARWINOS */
 	ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
 	if (ret == (size_t)-1) {
 		return 0;
@@ -1231,6 +1361,11 @@ size_t pull_ucs2(const void *base_ptr, c
 	if (src_len != (size_t)-1)
 		src_len &= ~1;
 	
+#if DARWINOS
+	if (flags & STR_FILESYSTEM) {
+	    ret = pull_ucs2_path_darwin(src, src_len, dest, dest_len);
+	} else
+#endif /* DARWINOS */
 	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
 	if (ret == (size_t)-1) {
 		return 0;
Index: samba/source/smbd/reply.c
===================================================================
--- samba/source/smbd/reply.c.orig
+++ samba/source/smbd/reply.c
@@ -223,6 +223,8 @@ size_t srvstr_get_path_wcard(char *inbuf
 	SMB_ASSERT(dest_len == sizeof(pstring));
 #endif
 
+	flags |= STR_FILESYSTEM;
+
 	if (src_len == 0) {
 		ret = srvstr_pull_buf( inbuf, tmppath_ptr, src, dest_len, flags);
 	} else {
@@ -263,6 +265,8 @@ size_t srvstr_get_path(char *inbuf, char
 	SMB_ASSERT(dest_len == sizeof(pstring));
 #endif
 
+	flags |= STR_FILESYSTEM;
+
 	if (src_len == 0) {
 		ret = srvstr_pull_buf( inbuf, tmppath_ptr, src, dest_len, flags);
 	} else {
@@ -1780,7 +1784,8 @@ int reply_ctemp(connection_struct *conn,
 	   thing in the byte section. JRA */
 	SSVALS(p, 0, -1); /* what is this? not in spec */
 #endif
-	namelen = srvstr_push(outbuf, p, s, BUFFER_SIZE - (p - outbuf), STR_ASCII|STR_TERMINATE);
+	namelen = srvstr_push(outbuf, p, s, BUFFER_SIZE - (p - outbuf),
+		STR_ASCII|STR_TERMINATE|STR_FILESYSTEM);
 	p += namelen;
 	outsize = set_message_end(outbuf, p);
 
Index: samba/source/smbd/trans2.c
===================================================================
--- samba/source/smbd/trans2.c.orig
+++ samba/source/smbd/trans2.c
@@ -1285,7 +1285,8 @@ static BOOL get_lanman2_dir_entry(connec
 			p += 23;
 			nameptr = p;
 			p += align_string(outbuf, p, 0);
-			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE);
+			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+				STR_TERMINATE|STR_FILESYSTEM);
 			if (SVAL(outbuf, smb_flg2) & FLAGS2_UNICODE_STRINGS) {
 				if (len > 2) {
 					SCVAL(nameptr, -1, len - 2);
@@ -1320,7 +1321,8 @@ static BOOL get_lanman2_dir_entry(connec
 			}
 			p += 27;
 			nameptr = p - 1;
-			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE | STR_NOALIGN);
+			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+				STR_TERMINATE | STR_NOALIGN | STR_FILESYSTEM);
 			if (SVAL(outbuf, smb_flg2) & FLAGS2_UNICODE_STRINGS) {
 				if (len > 2) {
 					len -= 2;
@@ -1376,7 +1378,8 @@ static BOOL get_lanman2_dir_entry(connec
 			/* Push the ea_data followed by the name. */
 			p += fill_ea_buffer(ea_ctx, p, space_remaining - (p - pdata), conn, name_list);
 			nameptr = p;
-			len = srvstr_push(outbuf, p + 1, fname, PTR_DIFF(end_data, p+1), STR_TERMINATE | STR_NOALIGN);
+			len = srvstr_push(outbuf, p + 1, fname, PTR_DIFF(end_data, p+1),
+				STR_TERMINATE | STR_NOALIGN | STR_FILESYSTEM);
 			if (SVAL(outbuf, smb_flg2) & FLAGS2_UNICODE_STRINGS) {
 				if (len > 2) {
 					len -= 2;
@@ -1424,7 +1427,8 @@ static BOOL get_lanman2_dir_entry(connec
 				mangle_map(mangled_name,True,True,
 					   conn->params);
 				mangled_name[12] = 0;
-				len = srvstr_push(outbuf, p+2, mangled_name, 24, STR_UPPER|STR_UNICODE);
+				len = srvstr_push(outbuf, p+2, mangled_name, 24,
+					STR_UPPER|STR_UNICODE|STR_FILESYSTEM);
 				if (len < 24) {
 					memset(p + 2 + len,'\0',24 - len);
 				}
@@ -1433,7 +1437,8 @@ static BOOL get_lanman2_dir_entry(connec
 				memset(p,'\0',26);
 			}
 			p += 2 + 24;
-			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE_ASCII);
+			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+				STR_TERMINATE_ASCII|STR_FILESYSTEM);
 			SIVAL(q,0,len);
 			p += len;
 			SIVAL(p,0,0); /* Ensure any padding is null. */
@@ -1454,7 +1459,8 @@ static BOOL get_lanman2_dir_entry(connec
 			SOFF_T(p,0,file_size); p += 8;
 			SOFF_T(p,0,allocation_size); p += 8;
 			SIVAL(p,0,nt_extmode); p += 4;
-			len = srvstr_push(outbuf, p + 4, fname, PTR_DIFF(end_data, p+4), STR_TERMINATE_ASCII);
+			len = srvstr_push(outbuf, p + 4, fname, PTR_DIFF(end_data, p+4),
+				STR_TERMINATE_ASCII|STR_FILESYSTEM);
 			SIVAL(p,0,len);
 			p += 4 + len;
 			SIVAL(p,0,0); /* Ensure any padding is null. */
@@ -1481,7 +1487,8 @@ static BOOL get_lanman2_dir_entry(connec
 				SIVAL(p,0,ea_size); /* Extended attributes */
 				p +=4;
 			}
-			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE_ASCII);
+			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+				STR_TERMINATE_ASCII|STR_FILESYSTEM);
 			SIVAL(q, 0, len);
 			p += len;
 
@@ -1499,7 +1506,8 @@ static BOOL get_lanman2_dir_entry(connec
 			p += 4;
 			/* this must *not* be null terminated or w2k gets in a loop trying to set an
 			   acl on a dir (tridge) */
-			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE_ASCII);
+			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+				STR_TERMINATE_ASCII|STR_FILESYSTEM);
 			SIVAL(p, -4, len);
 			p += len;
 			SIVAL(p,0,0); /* Ensure any padding is null. */
@@ -1529,7 +1537,8 @@ static BOOL get_lanman2_dir_entry(connec
 			SIVAL(p,0,0); p += 4; /* Unknown - reserved ? */
 			SIVAL(p,0,sbuf.st_ino); p += 4; /* FileIndexLow */
 			SIVAL(p,0,sbuf.st_dev); p += 4; /* FileIndexHigh */
-			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE_ASCII);
+			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+				STR_TERMINATE_ASCII|STR_FILESYSTEM);
 			SIVAL(q, 0, len);
 			p += len; 
 			SIVAL(p,0,0); /* Ensure any padding is null. */
@@ -1567,7 +1576,8 @@ static BOOL get_lanman2_dir_entry(connec
 				mangle_map(mangled_name,True,True,
 					   conn->params);
 				mangled_name[12] = 0;
-				len = srvstr_push(outbuf, p+2, mangled_name, 24, STR_UPPER|STR_UNICODE);
+				len = srvstr_push(outbuf, p+2, mangled_name, 24,
+					STR_UPPER|STR_UNICODE|STR_FILESYSTEM);
 				SSVAL(p, 0, len);
 				if (len < 24) {
 					memset(p + 2 + len,'\0',24 - len);
@@ -1580,7 +1590,8 @@ static BOOL get_lanman2_dir_entry(connec
 			SSVAL(p,0,0); p += 2; /* Reserved ? */
 			SIVAL(p,0,sbuf.st_ino); p += 4; /* FileIndexLow */
 			SIVAL(p,0,sbuf.st_dev); p += 4; /* FileIndexHigh */
-			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE_ASCII);
+			len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+				STR_TERMINATE_ASCII|STR_FILESYSTEM);
 			SIVAL(q,0,len);
 			p += len;
 			SIVAL(p,0,0); /* Ensure any padding is null. */
@@ -1603,14 +1614,16 @@ static BOOL get_lanman2_dir_entry(connec
 				DEBUG(10,("get_lanman2_dir_entry: SMB_FIND_FILE_UNIX\n"));
 				p = store_file_unix_basic(conn, p,
 							NULL, &sbuf);
-				len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), STR_TERMINATE);
+				len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+					STR_TERMINATE|STR_FILESYSTEM);
 			} else {
 				DEBUG(10,("get_lanman2_dir_entry: SMB_FIND_FILE_UNIX_INFO2\n"));
 				p = store_file_unix_basic_info2(conn, p,
 							NULL, &sbuf);
 				nameptr = p;
 				p += 4;
-				len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p), 0);
+				len = srvstr_push(outbuf, p, fname, PTR_DIFF(end_data, p),
+					0|STR_FILESYSTEM);
 				SIVAL(nameptr, 0, len);
 			}
 
@@ -3749,7 +3762,8 @@ total_data=%u (should be %u)\n", (unsign
 			if(!mangle_is_8_3(short_name, True, conn->params)) {
 				mangle_map(short_name,True,True,conn->params);
 			}
-			len = srvstr_push(outbuf, pdata+4, short_name, max_data_bytes - 4, STR_UNICODE);
+			len = srvstr_push(outbuf, pdata+4, short_name, max_data_bytes - 4,
+				STR_UNICODE|STR_FILESYSTEM);
 			data_size = 4 + len;
 			SIVAL(pdata,0,len);
 			break;
@@ -3759,7 +3773,8 @@ total_data=%u (should be %u)\n", (unsign
 			/*
 			  this must be *exactly* right for ACLs on mapped drives to work
 			 */
-			len = srvstr_push(outbuf, pdata+4, dos_fname, max_data_bytes - 4, STR_UNICODE);
+			len = srvstr_push(outbuf, pdata+4, dos_fname, max_data_bytes - 4,
+				STR_UNICODE|STR_FILESYSTEM);
 			DEBUG(10,("call_trans2qfilepathinfo: SMB_QUERY_FILE_NAME_INFO\n"));
 			data_size = 4 + len;
 			SIVAL(pdata,0,len);
@@ -3800,7 +3815,8 @@ total_data=%u (should be %u)\n", (unsign
 			pdata += 24;
 			SIVAL(pdata,0,ea_size);
 			pdata += 4; /* EA info */
-			len = srvstr_push(outbuf, pdata+4, dos_fname, max_data_bytes - (pdata+4 - *ppdata), STR_UNICODE);
+			len = srvstr_push(outbuf, pdata+4, dos_fname, max_data_bytes - (pdata+4 - *ppdata),
+				STR_UNICODE|STR_FILESYSTEM);
 			SIVAL(pdata,0,len);
 			pdata += 4 + len;
 			data_size = PTR_DIFF(pdata,(*ppdata));
@@ -3956,7 +3972,8 @@ total_data=%u (should be %u)\n", (unsign
 				if (len == -1)
 					return(UNIXERROR(ERRDOS,ERRnoaccess));
 				buffer[len] = 0;
-				len = srvstr_push(outbuf, pdata, buffer, max_data_bytes, STR_TERMINATE);
+				len = srvstr_push(outbuf, pdata, buffer, max_data_bytes,
+					STR_TERMINATE|STR_FILESYSTEM);
 				pdata += len;
 				data_size = PTR_DIFF(pdata,(*ppdata));