523 lines
16 KiB
C
523 lines
16 KiB
C
/*
|
|
* linux/fs/umsdos/mangle.c
|
|
*
|
|
* Written 1993 by Jacques Gelinas
|
|
*
|
|
* Control the mangling of file name to fit msdos name space.
|
|
* Many optimisations by GLU == dglaude@is1.vub.ac.be (Glaude David)
|
|
*/
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/string.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/umsdos_fs.h>
|
|
|
|
/* (This file is used outside of the kernel) */
|
|
#ifndef __KERNEL__
|
|
#define KERN_WARNING
|
|
#endif
|
|
|
|
/*
|
|
* Complete the mangling of the MSDOS fake name
|
|
* based on the position of the entry in the EMD file.
|
|
*
|
|
* Simply complete the job of umsdos_parse; fill the extension.
|
|
*
|
|
* Beware that info->f_pos must be set.
|
|
*/
|
|
void umsdos_manglename (struct umsdos_info *info)
|
|
{
|
|
if (info->msdos_reject) {
|
|
/* #Specification: file name / non MSDOS conforming / mangling
|
|
* Each non MSDOS conforming file has a special extension
|
|
* build from the entry position in the EMD file.
|
|
*
|
|
* This number is then transform in a base 32 number, where
|
|
* each digit is expressed like hexadecimal number, using
|
|
* digit and letter, except it uses 22 letters from 'a' to 'v'.
|
|
* The number 32 comes from 2**5. It is faster to split a binary
|
|
* number using a base which is a power of two. And I was 32
|
|
* when I started this project. Pick your answer :-) .
|
|
*
|
|
* If the result is '0', it is replace with '_', simply
|
|
* to make it odd.
|
|
*
|
|
* This is true for the first two character of the extension.
|
|
* The last one is taken from a list of odd character, which
|
|
* are:
|
|
*
|
|
* { } ( ) ! ` ^ & @
|
|
*
|
|
* With this scheme, we can produce 9216 ( 9* 32 * 32)
|
|
* different extensions which should not clash with any useful
|
|
* extension already popular or meaningful. Since most directory
|
|
* have much less than 32 * 32 files in it, the first character
|
|
* of the extension of any mangled name will be {.
|
|
*
|
|
* Here are the reason to do this (this kind of mangling).
|
|
*
|
|
* -The mangling is deterministic. Just by the extension, we
|
|
* are able to locate the entry in the EMD file.
|
|
*
|
|
* -By keeping to beginning of the file name almost unchanged,
|
|
* we are helping the MSDOS user.
|
|
*
|
|
* -The mangling produces names not too ugly, so an msdos user
|
|
* may live with it (remember it, type it, etc...).
|
|
*
|
|
* -The mangling produces names ugly enough so no one will
|
|
* ever think of using such a name in real life. This is not
|
|
* fool proof. I don't think there is a total solution to this.
|
|
*/
|
|
int entry_num;
|
|
char *pt = info->fake.fname + info->fake.len;
|
|
/* lookup for encoding the last character of the extension
|
|
* It contains valid character after the ugly one to make sure
|
|
* even if someone overflows the 32 * 32 * 9 limit, it still
|
|
* does something
|
|
*/
|
|
#define SPECIAL_MANGLING '{','}','(',')','!','`','^','&','@'
|
|
static char lookup3[] =
|
|
{
|
|
SPECIAL_MANGLING,
|
|
/* This is the start of lookup12 */
|
|
'_', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
|
'p', 'q', 'r', 's', 't', 'u', 'v'
|
|
};
|
|
|
|
#define lookup12 (lookup3+9)
|
|
entry_num = info->f_pos / UMSDOS_REC_SIZE;
|
|
if (entry_num > (9* 32 * 32)){
|
|
printk (KERN_WARNING "UMSDOS: more than 9216 files in a directory.\n"
|
|
"This may break the mangling strategy.\n"
|
|
"Not a killer problem. See doc.\n");
|
|
}
|
|
*pt++ = '.';
|
|
*pt++ = lookup3 [(entry_num >> 10) & 31];
|
|
*pt++ = lookup12[(entry_num >> 5) & 31];
|
|
*pt++ = lookup12[entry_num & 31];
|
|
*pt = '\0'; /* help doing printk */
|
|
info->fake.len += 4;
|
|
info->msdos_reject = 0; /* Avoid mangling twice */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Evaluate the record size needed to store of name of len character.
|
|
* The value returned is a multiple of UMSDOS_REC_SIZE.
|
|
*/
|
|
int umsdos_evalrecsize (int len)
|
|
{
|
|
struct umsdos_dirent dirent;
|
|
int nbrec = 1 + ((len - 1 + (dirent.name - (char *) &dirent))
|
|
/ UMSDOS_REC_SIZE);
|
|
|
|
return nbrec * UMSDOS_REC_SIZE;
|
|
/*
|
|
* GLU This should be inlined or something to speed it up to the max.
|
|
* GLU nbrec is absolutely not needed to return the value.
|
|
*/
|
|
}
|
|
#ifdef TEST
|
|
int umsdos_evalrecsize_old (int len)
|
|
{
|
|
struct umsdos_dirent dirent;
|
|
int size = len + (dirent.name - (char *) &dirent);
|
|
int nbrec = size / UMSDOS_REC_SIZE;
|
|
int extra = size % UMSDOS_REC_SIZE;
|
|
|
|
if (extra > 0)
|
|
nbrec++;
|
|
return nbrec * UMSDOS_REC_SIZE;
|
|
}
|
|
#endif
|
|
|
|
|
|
/*
|
|
* Fill the struct info with the full and msdos name of a file
|
|
* Return 0 if all is OK, a negative error code otherwise.
|
|
*/
|
|
int umsdos_parse (
|
|
const char *fname,
|
|
int len,
|
|
struct umsdos_info *info)
|
|
{
|
|
int ret = -ENAMETOOLONG;
|
|
|
|
/* #Specification: file name / too long
|
|
* If a file name exceed UMSDOS maxima, the file name is silently
|
|
* truncated. This makes it conformant with the other file system
|
|
* of Linux (minix and ext2 at least).
|
|
*/
|
|
if (len > UMSDOS_MAXNAME)
|
|
len = UMSDOS_MAXNAME;
|
|
{
|
|
const char *firstpt = NULL; /* First place we saw a "." in fname */
|
|
|
|
/* #Specification: file name / non MSDOS conforming / base length 0
|
|
* file names beginning with a period '.' are invalid for MS-DOS.
|
|
* It needs absolutely a base name. So the file name is mangled
|
|
*/
|
|
int ivldchar = fname[0] == '.'; /* At least one invalid character */
|
|
int msdos_len = len;
|
|
int base_len;
|
|
|
|
/*
|
|
* cardinal_per_size tells if there exists at least one
|
|
* DOS pseudo device on length n. See the test below.
|
|
*/
|
|
static const char cardinal_per_size[9] =
|
|
{
|
|
0, 0, 0, 1, 1, 0, 1, 0, 1
|
|
};
|
|
|
|
/*
|
|
* lkp translate all character to acceptable character (for DOS).
|
|
* When lkp[n] == n, it means also it is an acceptable one.
|
|
* So it serves both as a flag and as a translator.
|
|
*/
|
|
static char lkp[256];
|
|
static char is_init;
|
|
|
|
if (!is_init) {
|
|
/*
|
|
* Initialisation of the array is easier and less error
|
|
* prone like this.
|
|
*/
|
|
int i;
|
|
static const char *spc = "\"*+,/:;<=>?[\\]|~";
|
|
|
|
is_init = 1;
|
|
for (i = 0; i <= 32; i++)
|
|
lkp[i] = '#';
|
|
for (i = 33; i < 'A'; i++)
|
|
lkp[i] = (char) i;
|
|
for (i = 'A'; i <= 'Z'; i++)
|
|
lkp[i] = (char) (i + ('a' - 'A'));
|
|
for (i = 'Z' + 1; i < 127; i++)
|
|
lkp[i] = (char) i;
|
|
for (i = 128; i < 256; i++)
|
|
lkp[i] = '#';
|
|
|
|
lkp['.'] = '_';
|
|
while (*spc != '\0')
|
|
lkp[(unsigned char) (*spc++)] = '#';
|
|
}
|
|
/* GLU
|
|
* File names longer than 8+'.'+3 are invalid for MS-DOS,
|
|
* so the file name is to be mangled--no further test is needed.
|
|
* This speeds up handling of long names.
|
|
* The position of the last point is no more necessary anyway.
|
|
*/
|
|
if (len <= (8 + 1 + 3)) {
|
|
const char *pt = fname;
|
|
const char *endpt = fname + len;
|
|
|
|
while (pt < endpt) {
|
|
if (*pt == '.') {
|
|
if (firstpt != NULL) {
|
|
/* 2 . in a file name. Reject */
|
|
ivldchar = 1;
|
|
break;
|
|
} else {
|
|
int extlen = (int) (endpt - pt);
|
|
|
|
firstpt = pt;
|
|
if (firstpt - fname > 8) {
|
|
/* base name longer than 8: reject */
|
|
ivldchar = 1;
|
|
break;
|
|
} else if (extlen > 4) {
|
|
/* Extension longer than 4 (including .): reject */
|
|
ivldchar = 1;
|
|
break;
|
|
} else if (extlen == 1) {
|
|
/* #Specification: file name / non MSDOS conforming / last char == .
|
|
* If the last character of a file name is
|
|
* a period, mangling is applied. MS-DOS does
|
|
* not support those file names.
|
|
*/
|
|
ivldchar = 1;
|
|
break;
|
|
} else if (extlen == 4) {
|
|
/* #Specification: file name / non MSDOS conforming / mangling clash
|
|
* To avoid clash with the umsdos mangling, any file
|
|
* with a special character as the first character
|
|
* of the extension will be mangled. This solves the
|
|
* following problem:
|
|
*
|
|
* #
|
|
* touch FILE
|
|
* # FILE is invalid for DOS, so mangling is applied
|
|
* # file.{_1 is created in the DOS directory
|
|
* touch file.{_1
|
|
* # To UMSDOS file point to a single DOS entry.
|
|
* # So file.{_1 has to be mangled.
|
|
* #
|
|
*/
|
|
static char special[] =
|
|
{
|
|
SPECIAL_MANGLING, '\0'
|
|
};
|
|
|
|
if (strchr (special, firstpt[1]) != NULL) {
|
|
ivldchar = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} else if (lkp[(unsigned char) (*pt)] != *pt) {
|
|
ivldchar = 1;
|
|
break;
|
|
}
|
|
pt++;
|
|
}
|
|
} else {
|
|
ivldchar = 1;
|
|
}
|
|
if (ivldchar
|
|
|| (firstpt == NULL && len > 8)
|
|
|| (len == UMSDOS_EMD_NAMELEN
|
|
&& memcmp (fname, UMSDOS_EMD_FILE, UMSDOS_EMD_NAMELEN) == 0)) {
|
|
/* #Specification: file name / --linux-.---
|
|
* The name of the EMD file --linux-.--- is map to a mangled
|
|
* name. So UMSDOS does not restrict its use.
|
|
*/
|
|
/* #Specification: file name / non MSDOS conforming / mangling
|
|
* Non MSDOS conforming file names must use some alias to fit
|
|
* in the MSDOS name space.
|
|
*
|
|
* The strategy is simple. The name is simply truncated to
|
|
* 8 char. points are replace with underscore and a
|
|
* number is given as an extension. This number correspond
|
|
* to the entry number in the EMD file. The EMD file
|
|
* only need to carry the real name.
|
|
*
|
|
* Upper case is also converted to lower case.
|
|
* Control character are converted to #.
|
|
* Spaces are converted to #.
|
|
* The following characters are also converted to #.
|
|
* #
|
|
* " * + , / : ; < = > ? [ \ ] | ~
|
|
* #
|
|
*
|
|
* Sometimes the problem is not in MS-DOS itself but in
|
|
* command.com.
|
|
*/
|
|
int i;
|
|
char *pt = info->fake.fname;
|
|
|
|
base_len = msdos_len = (msdos_len > 8) ? 8 : msdos_len;
|
|
/*
|
|
* There is no '.' any more so we know for a fact that
|
|
* the base length is the length.
|
|
*/
|
|
memcpy (info->fake.fname, fname, msdos_len);
|
|
for (i = 0; i < msdos_len; i++, pt++)
|
|
*pt = lkp[(unsigned char) (*pt)];
|
|
*pt = '\0'; /* GLU We force null termination. */
|
|
info->msdos_reject = 1;
|
|
/*
|
|
* The numeric extension is added only when we know
|
|
* the position in the EMD file, in umsdos_newentry(),
|
|
* umsdos_delentry(), and umsdos_findentry().
|
|
* See umsdos_manglename().
|
|
*/
|
|
} else {
|
|
/* Conforming MSDOS file name */
|
|
strncpy (info->fake.fname, fname, len);
|
|
info->msdos_reject = 0;
|
|
base_len = firstpt != NULL ? (int) (firstpt - fname) : len;
|
|
}
|
|
if (cardinal_per_size[base_len]) {
|
|
/* #Specification: file name / MSDOS devices / mangling
|
|
* To avoid unreachable file from MS-DOS, any MS-DOS conforming
|
|
* file with a basename equal to one of the MS-DOS pseudo
|
|
* devices will be mangled.
|
|
*
|
|
* If a file such as "prn" was created, it would be unreachable
|
|
* under MS-DOS because "prn" is assumed to be the printer, even
|
|
* if the file does have an extension.
|
|
*
|
|
* Since the extension is unimportant to MS-DOS, we must patch
|
|
* the basename also. We simply insert a minus '-'. To avoid
|
|
* conflict with valid file with a minus in front (such as
|
|
* "-prn"), we add an mangled extension like any other
|
|
* mangled file name.
|
|
*
|
|
* Here is the list of DOS pseudo devices:
|
|
*
|
|
* #
|
|
* "prn","con","aux","nul",
|
|
* "lpt1","lpt2","lpt3","lpt4",
|
|
* "com1","com2","com3","com4",
|
|
* "clock$"
|
|
* #
|
|
*
|
|
* and some standard ones for common DOS programs
|
|
*
|
|
* "emmxxxx0","xmsxxxx0","setverxx"
|
|
*
|
|
* (Thanks to Chris Hall <cah17@phoenix.cambridge.ac.uk>
|
|
* for pointing these out to me).
|
|
*
|
|
* Is there one missing?
|
|
*/
|
|
/* This table must be ordered by length */
|
|
static const char *tbdev[] =
|
|
{
|
|
"prn", "con", "aux", "nul",
|
|
"lpt1", "lpt2", "lpt3", "lpt4",
|
|
"com1", "com2", "com3", "com4",
|
|
"clock$",
|
|
"emmxxxx0", "xmsxxxx0", "setverxx"
|
|
};
|
|
|
|
/* Tell where to find in tbdev[], the first name of */
|
|
/* a certain length */
|
|
static const char start_ind_dev[9] =
|
|
{
|
|
0, 0, 0, 4, 12, 12, 13, 13, 16
|
|
};
|
|
char basen[9];
|
|
int i;
|
|
|
|
for (i = start_ind_dev[base_len - 1]; i < start_ind_dev[base_len]; i++) {
|
|
if (memcmp (info->fake.fname, tbdev[i], base_len) == 0) {
|
|
memcpy (basen, info->fake.fname, base_len);
|
|
basen[base_len] = '\0'; /* GLU We force null termination. */
|
|
/*
|
|
* GLU We do that only if necessary; we try to do the
|
|
* GLU simple thing in the usual circumstance.
|
|
*/
|
|
info->fake.fname[0] = '-';
|
|
strcpy (info->fake.fname + 1, basen); /* GLU We already guaranteed a null would be at the end. */
|
|
msdos_len = (base_len == 8) ? 8 : base_len + 1;
|
|
info->msdos_reject = 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
info->fake.fname[msdos_len] = '\0'; /* Help doing printk */
|
|
/* GLU This zero should (always?) be there already. */
|
|
info->fake.len = msdos_len;
|
|
/* Why not use info->fake.len everywhere? Is it longer?
|
|
*/
|
|
memcpy (info->entry.name, fname, len);
|
|
info->entry.name[len] = '\0'; /* for printk */
|
|
info->entry.name_len = len;
|
|
ret = 0;
|
|
}
|
|
/*
|
|
* Evaluate how many records are needed to store this entry.
|
|
*/
|
|
info->recsize = umsdos_evalrecsize (len);
|
|
return ret;
|
|
}
|
|
|
|
#ifdef TEST
|
|
|
|
struct MANG_TEST {
|
|
char *fname; /* Name to validate */
|
|
int msdos_reject; /* Expected msdos_reject flag */
|
|
char *msname; /* Expected msdos name */
|
|
};
|
|
|
|
struct MANG_TEST tb[] =
|
|
{
|
|
"hello", 0, "hello",
|
|
"hello.1", 0, "hello.1",
|
|
"hello.1_", 0, "hello.1_",
|
|
"prm", 0, "prm",
|
|
|
|
#ifdef PROPOSITION
|
|
"HELLO", 1, "hello",
|
|
"Hello.1", 1, "hello.1",
|
|
"Hello.c", 1, "hello.c",
|
|
#else
|
|
/*
|
|
* I find the three examples below very unfortunate. I propose to
|
|
* convert them to lower case in a quick preliminary pass, then test
|
|
* whether there are other troublesome characters. I have not made
|
|
* this change, because it is not easy, but I wanted to mention the
|
|
* principle. Obviously something like that would increase the chance
|
|
* of collisions, for example between "HELLO" and "Hello", but these
|
|
* can be treated elsewhere along with the other collisions.
|
|
*/
|
|
|
|
"HELLO", 1, "hello",
|
|
"Hello.1", 1, "hello_1",
|
|
"Hello.c", 1, "hello_c",
|
|
#endif
|
|
|
|
"hello.{_1", 1, "hello_{_",
|
|
"hello\t", 1, "hello#",
|
|
"hello.1.1", 1, "hello_1_",
|
|
"hel,lo", 1, "hel#lo",
|
|
"Salut.Tu.vas.bien?", 1, "salut_tu",
|
|
".profile", 1, "_profile",
|
|
".xv", 1, "_xv",
|
|
"toto.", 1, "toto_",
|
|
"clock$.x", 1, "-clock$",
|
|
"emmxxxx0", 1, "-emmxxxx",
|
|
"emmxxxx0.abcd", 1, "-emmxxxx",
|
|
"aux", 1, "-aux",
|
|
"prn", 1, "-prn",
|
|
"prn.abc", 1, "-prn",
|
|
"PRN", 1, "-prn",
|
|
/*
|
|
* GLU WARNING: the results of these are different with my version
|
|
* GLU of mangling compared to the original one.
|
|
* GLU CAUSE: the manner of calculating the baselen variable.
|
|
* GLU For you they are always 3.
|
|
* GLU For me they are respectively 7, 8, and 8.
|
|
|
|
*/
|
|
"PRN.abc", 1, "prn_abc",
|
|
"Prn.abcd", 1, "prn_abcd",
|
|
"prn.abcd", 1, "prn_abcd",
|
|
"Prn.abcdefghij", 1, "prn_abcd"
|
|
};
|
|
|
|
int main (int argc, char *argv[])
|
|
{
|
|
int i, rold, rnew;
|
|
|
|
printf ("Testing the umsdos_parse.\n");
|
|
for (i = 0; i < sizeof (tb) / sizeof (tb[0]); i++) {
|
|
struct MANG_TEST *pttb = tb + i;
|
|
struct umsdos_info info;
|
|
int ok = umsdos_parse (pttb->fname, strlen (pttb->fname), &info);
|
|
|
|
if (strcmp (info.fake.fname, pttb->msname) != 0) {
|
|
printf ("**** %s -> ", pttb->fname);
|
|
printf ("%s <> %s\n", info.fake.fname, pttb->msname);
|
|
} else if (info.msdos_reject != pttb->msdos_reject) {
|
|
printf ("**** %s -> %s ", pttb->fname, pttb->msname);
|
|
printf ("%d <> %d\n", info.msdos_reject, pttb->msdos_reject);
|
|
} else {
|
|
printf (" %s -> %s %d\n", pttb->fname, pttb->msname
|
|
,pttb->msdos_reject);
|
|
}
|
|
}
|
|
printf ("Testing the new umsdos_evalrecsize.");
|
|
for (i = 0; i < UMSDOS_MAXNAME; i++) {
|
|
rnew = umsdos_evalrecsize (i);
|
|
rold = umsdos_evalrecsize_old (i);
|
|
if (!(i % UMSDOS_REC_SIZE)) {
|
|
printf ("\n%d:\t", i);
|
|
}
|
|
if (rnew != rold) {
|
|
printf ("**** %d newres: %d != %d \n", i, rnew, rold);
|
|
} else {
|
|
printf (".");
|
|
}
|
|
}
|
|
printf ("\nEnd of Testing.\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif
|