summaryrefslogtreecommitdiffstats
path: root/source4/ntvfs/posix/pvfs_shortname.c
diff options
context:
space:
mode:
Diffstat (limited to 'source4/ntvfs/posix/pvfs_shortname.c')
-rw-r--r--source4/ntvfs/posix/pvfs_shortname.c699
1 files changed, 699 insertions, 0 deletions
diff --git a/source4/ntvfs/posix/pvfs_shortname.c b/source4/ntvfs/posix/pvfs_shortname.c
new file mode 100644
index 0000000..9e3cf5f
--- /dev/null
+++ b/source4/ntvfs/posix/pvfs_shortname.c
@@ -0,0 +1,699 @@
+/*
+ Unix SMB/CIFS implementation.
+
+ POSIX NTVFS backend - 8.3 name routines
+
+ Copyright (C) Andrew Tridgell 2004
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "system/locale.h"
+#include "vfs_posix.h"
+#include "param/param.h"
+
+#undef strcasecmp
+
+/*
+ this mangling scheme uses the following format
+
+ Annnn~n.AAA
+
+ where nnnnn is a base 36 hash, and A represents characters from the original string
+
+ The hash is taken of the leading part of the long filename, in uppercase
+
+ for simplicity, we only allow ascii characters in 8.3 names
+*/
+
+/*
+ ===============================================================================
+ NOTE NOTE NOTE!!!
+
+ This file deliberately uses non-multibyte string functions in many places. This
+ is *not* a mistake. This code is multi-byte safe, but it gets this property
+ through some very subtle knowledge of the way multi-byte strings are encoded
+ and the fact that this mangling algorithm only supports ascii characters in
+ 8.3 names.
+
+ please don't convert this file to use the *_m() functions!!
+ ===============================================================================
+*/
+
+
+#if 1
+#define M_DEBUG(level, x) DEBUG(level, x)
+#else
+#define M_DEBUG(level, x)
+#endif
+
+/* these flags are used to mark characters in as having particular
+ properties */
+#define FLAG_BASECHAR 1
+#define FLAG_ASCII 2
+#define FLAG_ILLEGAL 4
+#define FLAG_WILDCARD 8
+
+/* the "possible" flags are used as a fast way to find possible DOS
+ reserved filenames */
+#define FLAG_POSSIBLE1 16
+#define FLAG_POSSIBLE2 32
+#define FLAG_POSSIBLE3 64
+#define FLAG_POSSIBLE4 128
+
+#define DEFAULT_MANGLE_PREFIX 4
+
+#define MANGLE_BASECHARS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+#define FLAG_CHECK(c, flag) (ctx->char_flags[(uint8_t)(c)] & (flag))
+
+static const char *reserved_names[] =
+{ "AUX", "CON",
+ "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
+ "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
+ "NUL", "PRN", NULL };
+
+
+struct pvfs_mangle_context {
+ uint8_t char_flags[256];
+ /*
+ this determines how many characters are used from the original
+ filename in the 8.3 mangled name. A larger value leads to a weaker
+ hash and more collisions. The largest possible value is 6.
+ */
+ int mangle_prefix;
+ uint32_t mangle_modulus;
+
+ /* we will use a very simple direct mapped prefix cache. The big
+ advantage of this cache structure is speed and low memory usage
+
+ The cache is indexed by the low-order bits of the hash, and confirmed by
+ hashing the resulting cache entry to match the known hash
+ */
+ uint32_t cache_size;
+ char **prefix_cache;
+ uint32_t *prefix_cache_hashes;
+
+ /* this is used to reverse the base 36 mapping */
+ unsigned char base_reverse[256];
+};
+
+
+/*
+ hash a string of the specified length. The string does not need to be
+ null terminated
+
+ this hash needs to be fast with a low collision rate (what hash doesn't?)
+*/
+static uint32_t mangle_hash(struct pvfs_mangle_context *ctx,
+ const char *key, size_t length)
+{
+ return pvfs_name_hash(key, length) % ctx->mangle_modulus;
+}
+
+/*
+ insert an entry into the prefix cache. The string might not be null
+ terminated */
+static void cache_insert(struct pvfs_mangle_context *ctx,
+ const char *prefix, int length, uint32_t hash)
+{
+ int i = hash % ctx->cache_size;
+
+ if (ctx->prefix_cache[i]) {
+ talloc_free(ctx->prefix_cache[i]);
+ }
+
+ ctx->prefix_cache[i] = talloc_strndup(ctx->prefix_cache, prefix, length);
+ ctx->prefix_cache_hashes[i] = hash;
+}
+
+/*
+ lookup an entry in the prefix cache. Return NULL if not found.
+*/
+static const char *cache_lookup(struct pvfs_mangle_context *ctx, uint32_t hash)
+{
+ int i = hash % ctx->cache_size;
+
+
+ if (!ctx->prefix_cache[i] || hash != ctx->prefix_cache_hashes[i]) {
+ return NULL;
+ }
+
+ /* yep, it matched */
+ return ctx->prefix_cache[i];
+}
+
+
+/*
+ determine if a string is possibly in a mangled format, ignoring
+ case
+
+ In this algorithm, mangled names use only pure ascii characters (no
+ multi-byte) so we can avoid doing a UCS2 conversion
+ */
+static bool is_mangled_component(struct pvfs_mangle_context *ctx,
+ const char *name, size_t len)
+{
+ unsigned int i;
+
+ M_DEBUG(10,("is_mangled_component %s (len %u) ?\n", name, (unsigned int)len));
+
+ /* check the length */
+ if (len > 12 || len < 8)
+ return false;
+
+ /* the best distinguishing characteristic is the ~ */
+ if (name[6] != '~')
+ return false;
+
+ /* check extension */
+ if (len > 8) {
+ if (name[8] != '.')
+ return false;
+ for (i=9; name[i] && i < len; i++) {
+ if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
+ return false;
+ }
+ }
+ }
+
+ /* check lead characters */
+ for (i=0;i<ctx->mangle_prefix;i++) {
+ if (! FLAG_CHECK(name[i], FLAG_ASCII)) {
+ return false;
+ }
+ }
+
+ /* check rest of hash */
+ if (! FLAG_CHECK(name[7], FLAG_BASECHAR)) {
+ return false;
+ }
+ for (i=ctx->mangle_prefix;i<6;i++) {
+ if (! FLAG_CHECK(name[i], FLAG_BASECHAR)) {
+ return false;
+ }
+ }
+
+ M_DEBUG(10,("is_mangled_component %s (len %u) -> yes\n", name, (unsigned int)len));
+
+ return true;
+}
+
+
+
+/*
+ determine if a string is possibly in a mangled format, ignoring
+ case
+
+ In this algorithm, mangled names use only pure ascii characters (no
+ multi-byte) so we can avoid doing a UCS2 conversion
+
+ NOTE! This interface must be able to handle a path with unix
+ directory separators. It should return true if any component is
+ mangled
+ */
+static bool is_mangled(struct pvfs_mangle_context *ctx, const char *name)
+{
+ const char *p;
+ const char *s;
+
+ M_DEBUG(10,("is_mangled %s ?\n", name));
+
+ for (s=name; (p=strchr(s, '/')); s=p+1) {
+ if (is_mangled_component(ctx, s, PTR_DIFF(p, s))) {
+ return true;
+ }
+ }
+
+ /* and the last part ... */
+ return is_mangled_component(ctx, s, strlen(s));
+}
+
+
+/*
+ see if a filename is an allowable 8.3 name.
+
+ we are only going to allow ascii characters in 8.3 names, as this
+ simplifies things greatly (it means that we know the string won't
+ get larger when converted from UNIX to DOS formats)
+*/
+static bool is_8_3(struct pvfs_mangle_context *ctx,
+ const char *name, bool check_case, bool allow_wildcards)
+{
+ int len, i;
+ char *dot_p;
+
+ /* as a special case, the names '.' and '..' are allowable 8.3 names */
+ if (name[0] == '.') {
+ if (!name[1] || (name[1] == '.' && !name[2])) {
+ return true;
+ }
+ }
+
+ /* the simplest test is on the overall length of the
+ filename. Note that we deliberately use the ascii string
+ length (not the multi-byte one) as it is faster, and gives us
+ the result we need in this case. Using strlen_m would not
+ only be slower, it would be incorrect */
+ len = strlen(name);
+ if (len > 12)
+ return false;
+
+ /* find the '.'. Note that once again we use the non-multibyte
+ function */
+ dot_p = strchr(name, '.');
+
+ if (!dot_p) {
+ /* if the name doesn't contain a '.' then its length
+ must be less than 8 */
+ if (len > 8) {
+ return false;
+ }
+ } else {
+ int prefix_len, suffix_len;
+
+ /* if it does contain a dot then the prefix must be <=
+ 8 and the suffix <= 3 in length */
+ prefix_len = PTR_DIFF(dot_p, name);
+ suffix_len = len - (prefix_len+1);
+
+ if (prefix_len > 8 || suffix_len > 3 || suffix_len == 0) {
+ return false;
+ }
+
+ /* a 8.3 name cannot contain more than 1 '.' */
+ if (strchr(dot_p+1, '.')) {
+ return false;
+ }
+ }
+
+ /* the length are all OK. Now check to see if the characters themselves are OK */
+ for (i=0; name[i]; i++) {
+ /* note that we may allow wildcard petterns! */
+ if (!FLAG_CHECK(name[i], FLAG_ASCII|(allow_wildcards ? FLAG_WILDCARD : 0)) &&
+ name[i] != '.') {
+ return false;
+ }
+ }
+
+ /* it is a good 8.3 name */
+ return true;
+}
+
+
+/*
+ try to find a 8.3 name in the cache, and if found then
+ return the original long name.
+*/
+static char *check_cache(struct pvfs_mangle_context *ctx,
+ TALLOC_CTX *mem_ctx, const char *name)
+{
+ uint32_t hash, multiplier;
+ unsigned int i;
+ const char *prefix;
+ char extension[4];
+
+ /* make sure that this is a mangled name from this cache */
+ if (!is_mangled(ctx, name)) {
+ M_DEBUG(10,("check_cache: %s -> not mangled\n", name));
+ return NULL;
+ }
+
+ /* we need to extract the hash from the 8.3 name */
+ hash = ctx->base_reverse[(unsigned char)name[7]];
+ for (multiplier=36, i=5;i>=ctx->mangle_prefix;i--) {
+ uint32_t v = ctx->base_reverse[(unsigned char)name[i]];
+ hash += multiplier * v;
+ multiplier *= 36;
+ }
+
+ /* now look in the prefix cache for that hash */
+ prefix = cache_lookup(ctx, hash);
+ if (!prefix) {
+ M_DEBUG(10,("check_cache: %s -> %08X -> not found\n", name, hash));
+ return NULL;
+ }
+
+ /* we found it - construct the full name */
+ if (name[8] == '.') {
+ strncpy(extension, name+9, 3);
+ extension[3] = 0;
+ } else {
+ extension[0] = 0;
+ }
+
+ if (extension[0]) {
+ return talloc_asprintf(mem_ctx, "%s.%s", prefix, extension);
+ }
+
+ return talloc_strdup(mem_ctx, prefix);
+}
+
+
+/*
+ look for a DOS reserved name
+*/
+static bool is_reserved_name(struct pvfs_mangle_context *ctx, const char *name)
+{
+ if (FLAG_CHECK(name[0], FLAG_POSSIBLE1) &&
+ FLAG_CHECK(name[1], FLAG_POSSIBLE2) &&
+ FLAG_CHECK(name[2], FLAG_POSSIBLE3) &&
+ FLAG_CHECK(name[3], FLAG_POSSIBLE4)) {
+ /* a likely match, scan the lot */
+ int i;
+ for (i=0; reserved_names[i]; i++) {
+ if (strcasecmp(name, reserved_names[i]) == 0) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+
+/*
+ See if a filename is a legal long filename.
+ A filename ending in a '.' is not legal unless it's "." or "..". JRA.
+*/
+static bool is_legal_name(struct pvfs_mangle_context *ctx, const char *name)
+{
+ while (*name) {
+ size_t c_size;
+ codepoint_t c = next_codepoint(name, &c_size);
+ if (c == INVALID_CODEPOINT) {
+ return false;
+ }
+ /* all high chars are OK */
+ if (c >= 128) {
+ name += c_size;
+ continue;
+ }
+ if (FLAG_CHECK(c, FLAG_ILLEGAL)) {
+ return false;
+ }
+ name += c_size;
+ }
+
+ return true;
+}
+
+/*
+ the main forward mapping function, which converts a long filename to
+ a 8.3 name
+
+ if need83 is not set then we only do the mangling if the name is illegal
+ as a long name
+
+ if cache83 is not set then we don't cache the result
+
+ return NULL if we don't need to do any conversion
+*/
+static char *name_map(struct pvfs_mangle_context *ctx,
+ const char *name, bool need83, bool cache83)
+{
+ char *dot_p;
+ char lead_chars[7];
+ char extension[4];
+ unsigned int extension_length, i;
+ unsigned int prefix_len;
+ uint32_t hash, v;
+ char *new_name;
+ const char *basechars = MANGLE_BASECHARS;
+
+ /* reserved names are handled specially */
+ if (!is_reserved_name(ctx, name)) {
+ /* if the name is already a valid 8.3 name then we don't need to
+ do anything */
+ if (is_8_3(ctx, name, false, false)) {
+ return NULL;
+ }
+
+ /* if the caller doesn't strictly need 8.3 then just check for illegal
+ filenames */
+ if (!need83 && is_legal_name(ctx, name)) {
+ return NULL;
+ }
+ }
+
+ /* find the '.' if any */
+ dot_p = strrchr(name, '.');
+
+ if (dot_p) {
+ /* if the extension contains any illegal characters or
+ is too long or zero length then we treat it as part
+ of the prefix */
+ for (i=0; i<4 && dot_p[i+1]; i++) {
+ if (! FLAG_CHECK(dot_p[i+1], FLAG_ASCII)) {
+ dot_p = NULL;
+ break;
+ }
+ }
+ if (i == 0 || i == 4) dot_p = NULL;
+ }
+
+ /* the leading characters in the mangled name is taken from
+ the first characters of the name, if they are ascii otherwise
+ '_' is used
+ */
+ for (i=0;i<ctx->mangle_prefix && name[i];i++) {
+ lead_chars[i] = name[i];
+ if (! FLAG_CHECK(lead_chars[i], FLAG_ASCII)) {
+ lead_chars[i] = '_';
+ }
+ lead_chars[i] = toupper((unsigned char)lead_chars[i]);
+ }
+ for (;i<ctx->mangle_prefix;i++) {
+ lead_chars[i] = '_';
+ }
+
+ /* the prefix is anything up to the first dot */
+ if (dot_p) {
+ prefix_len = PTR_DIFF(dot_p, name);
+ } else {
+ prefix_len = strlen(name);
+ }
+
+ /* the extension of the mangled name is taken from the first 3
+ ascii chars after the dot */
+ extension_length = 0;
+ if (dot_p) {
+ for (i=1; extension_length < 3 && dot_p[i]; i++) {
+ unsigned char c = dot_p[i];
+ if (FLAG_CHECK(c, FLAG_ASCII)) {
+ extension[extension_length++] = toupper(c);
+ }
+ }
+ }
+
+ /* find the hash for this prefix */
+ v = hash = mangle_hash(ctx, name, prefix_len);
+
+ new_name = talloc_array(ctx, char, 13);
+ if (new_name == NULL) {
+ return NULL;
+ }
+
+ /* now form the mangled name. */
+ for (i=0;i<ctx->mangle_prefix;i++) {
+ new_name[i] = lead_chars[i];
+ }
+ new_name[7] = basechars[v % 36];
+ new_name[6] = '~';
+ for (i=5; i>=ctx->mangle_prefix; i--) {
+ v = v / 36;
+ new_name[i] = basechars[v % 36];
+ }
+
+ /* add the extension */
+ if (extension_length) {
+ new_name[8] = '.';
+ memcpy(&new_name[9], extension, extension_length);
+ new_name[9+extension_length] = 0;
+ } else {
+ new_name[8] = 0;
+ }
+
+ if (cache83) {
+ /* put it in the cache */
+ cache_insert(ctx, name, prefix_len, hash);
+ }
+
+ M_DEBUG(10,("name_map: %s -> %08X -> %s (cache=%d)\n",
+ name, hash, new_name, cache83));
+
+ return new_name;
+}
+
+
+/* initialise the flags table
+
+ we allow only a very restricted set of characters as 'ascii' in this
+ mangling backend. This isn't a significant problem as modern clients
+ use the 'long' filenames anyway, and those don't have these
+ restrictions.
+*/
+static void init_tables(struct pvfs_mangle_context *ctx)
+{
+ const char *basechars = MANGLE_BASECHARS;
+ int i;
+ /* the list of reserved dos names - all of these are illegal */
+
+ ZERO_STRUCT(ctx->char_flags);
+
+ for (i=1;i<128;i++) {
+ if ((i >= '0' && i <= '9') ||
+ (i >= 'a' && i <= 'z') ||
+ (i >= 'A' && i <= 'Z')) {
+ ctx->char_flags[i] |= (FLAG_ASCII | FLAG_BASECHAR);
+ }
+ if (strchr("_-$~", i)) {
+ ctx->char_flags[i] |= FLAG_ASCII;
+ }
+
+ if (strchr("*\\/?<>|\":", i)) {
+ ctx->char_flags[i] |= FLAG_ILLEGAL;
+ }
+
+ if (strchr("*?\"<>", i)) {
+ ctx->char_flags[i] |= FLAG_WILDCARD;
+ }
+ }
+
+ ZERO_STRUCT(ctx->base_reverse);
+ for (i=0;i<36;i++) {
+ ctx->base_reverse[(uint8_t)basechars[i]] = i;
+ }
+
+ /* fill in the reserved names flags. These are used as a very
+ fast filter for finding possible DOS reserved filenames */
+ for (i=0; reserved_names[i]; i++) {
+ unsigned char c1, c2, c3, c4;
+
+ c1 = (unsigned char)reserved_names[i][0];
+ c2 = (unsigned char)reserved_names[i][1];
+ c3 = (unsigned char)reserved_names[i][2];
+ c4 = (unsigned char)reserved_names[i][3];
+
+ ctx->char_flags[c1] |= FLAG_POSSIBLE1;
+ ctx->char_flags[c2] |= FLAG_POSSIBLE2;
+ ctx->char_flags[c3] |= FLAG_POSSIBLE3;
+ ctx->char_flags[c4] |= FLAG_POSSIBLE4;
+ ctx->char_flags[tolower(c1)] |= FLAG_POSSIBLE1;
+ ctx->char_flags[tolower(c2)] |= FLAG_POSSIBLE2;
+ ctx->char_flags[tolower(c3)] |= FLAG_POSSIBLE3;
+ ctx->char_flags[tolower(c4)] |= FLAG_POSSIBLE4;
+
+ ctx->char_flags[(unsigned char)'.'] |= FLAG_POSSIBLE4;
+ }
+
+ ctx->mangle_modulus = 1;
+ for (i=0;i<(7-ctx->mangle_prefix);i++) {
+ ctx->mangle_modulus *= 36;
+ }
+}
+
+/*
+ initialise the mangling code
+ */
+NTSTATUS pvfs_mangle_init(struct pvfs_state *pvfs)
+{
+ struct pvfs_mangle_context *ctx;
+
+ ctx = talloc(pvfs, struct pvfs_mangle_context);
+ if (ctx == NULL) {
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ /* by default have a max of 512 entries in the cache. */
+ ctx->cache_size = lpcfg_parm_int(pvfs->ntvfs->ctx->lp_ctx, NULL, "mangle", "cachesize", 512);
+
+ ctx->prefix_cache = talloc_array(ctx, char *, ctx->cache_size);
+ if (ctx->prefix_cache == NULL) {
+ return NT_STATUS_NO_MEMORY;
+ }
+ ctx->prefix_cache_hashes = talloc_array(ctx, uint32_t, ctx->cache_size);
+ if (ctx->prefix_cache_hashes == NULL) {
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ memset(ctx->prefix_cache, 0, sizeof(char *) * ctx->cache_size);
+ memset(ctx->prefix_cache_hashes, 0, sizeof(uint32_t) * ctx->cache_size);
+
+ ctx->mangle_prefix = lpcfg_parm_int(pvfs->ntvfs->ctx->lp_ctx, NULL, "mangle", "prefix", -1);
+ if (ctx->mangle_prefix < 0 || ctx->mangle_prefix > 6) {
+ ctx->mangle_prefix = DEFAULT_MANGLE_PREFIX;
+ }
+
+ init_tables(ctx);
+
+ pvfs->mangle_ctx = ctx;
+
+ return NT_STATUS_OK;
+}
+
+
+/*
+ return the short name for a component of a full name
+*/
+char *pvfs_short_name_component(struct pvfs_state *pvfs, const char *name)
+{
+ return name_map(pvfs->mangle_ctx, name, true, true);
+}
+
+
+/*
+ return the short name for a given entry in a directory
+*/
+const char *pvfs_short_name(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
+ struct pvfs_filename *name)
+{
+ char *p = strrchr(name->full_name, '/');
+ char *ret = pvfs_short_name_component(pvfs, p+1);
+ if (ret == NULL) {
+ return p+1;
+ }
+ talloc_steal(mem_ctx, ret);
+ return ret;
+}
+
+/*
+ lookup a mangled name, returning the original long name if present
+ in the cache
+*/
+char *pvfs_mangled_lookup(struct pvfs_state *pvfs, TALLOC_CTX *mem_ctx,
+ const char *name)
+{
+ return check_cache(pvfs->mangle_ctx, mem_ctx, name);
+}
+
+
+/*
+ look for a DOS reserved name
+*/
+bool pvfs_is_reserved_name(struct pvfs_state *pvfs, const char *name)
+{
+ return is_reserved_name(pvfs->mangle_ctx, name);
+}
+
+
+/*
+ see if a component of a filename could be a mangled name from our
+ mangling code
+*/
+bool pvfs_is_mangled_component(struct pvfs_state *pvfs, const char *name)
+{
+ return is_mangled_component(pvfs->mangle_ctx, name, strlen(name));
+}