diff options
Diffstat (limited to 'usr/kinit')
75 files changed, 9373 insertions, 0 deletions
diff --git a/usr/kinit/.gitignore b/usr/kinit/.gitignore new file mode 100644 index 0000000..f5a4f24 --- /dev/null +++ b/usr/kinit/.gitignore @@ -0,0 +1,3 @@ +lib.a +kinit +kinit.shared diff --git a/usr/kinit/Kbuild b/usr/kinit/Kbuild new file mode 100644 index 0000000..6cd5ba6 --- /dev/null +++ b/usr/kinit/Kbuild @@ -0,0 +1,43 @@ +# +# Kbuild file for kinit +# + +# library part of kinit. Is used by programs in sub-directories (resume et al) +lib-y := name_to_dev.o devname.o getarg.o capabilities.o +# use lib for kinit +static/kinit-y := lib.a + +static/kinit-y += kinit.o do_mounts.o ramdisk_load.o initrd.o +static/kinit-y += getintfile.o readfile.o xpio.o +static/kinit-y += do_mounts_md.o do_mounts_mtd.o nfsroot.o + +static/kinit-y += ipconfig/ +static/kinit-y += nfsmount/ +static/kinit-y += run-init/ +static/kinit-y += fstype/ +static/kinit-y += resume/ + +static-y := static/kinit +shared-y := shared/kinit +shared/kinit-y := $(static/kinit-y) + +# Additional include paths files +KLIBCCFLAGS += -I$(srctree)/$(src)/fstype \ + -I$(srctree)/$(src)/ipconfig \ + -I$(srctree)/$(src)/nfsmount \ + -I$(srctree)/$(src)/resume \ + -I$(srctree)/$(src)/run-init + +# Cleaning +targets += static/kinit static/kinit.g shared/kinit shared/kinit.g +subdir- := fstype ipconfig nfsmount resume run-init + +# Clean deletes the static and shared dir +clean-dirs := static shared + +# install binary +ifdef KLIBCSHAREDFLAGS +install-y := shared/kinit +else +install-y := static/kinit +endif diff --git a/usr/kinit/README b/usr/kinit/README new file mode 100644 index 0000000..fa7f645 --- /dev/null +++ b/usr/kinit/README @@ -0,0 +1,9 @@ +kinit - tiny init program +------------------------- + +This program is intended for use as /sbin/init in an initramfs +environment. It currently replaces the kernel's ipconfig and nfsroot +code. + +-- +Bryan O'Sullivan (2003/05/05) diff --git a/usr/kinit/capabilities.c b/usr/kinit/capabilities.c new file mode 100644 index 0000000..2c61025 --- /dev/null +++ b/usr/kinit/capabilities.c @@ -0,0 +1,231 @@ +/* + * Copyright 2011 Google Inc. All Rights Reserved + * Author: mikew@google.com (Mike Waychison) + */ + +/* + * We have to include the klibc types.h here to keep the kernel's + * types.h from being used. + */ +#include <sys/types.h> + +#include <sys/capability.h> +#include <sys/prctl.h> +#include <sys/utsname.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "kinit.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#define MAKE_CAP(cap) [cap] = { .cap_name = #cap } + +struct capability { + const char *cap_name; +} capabilities[] = { + MAKE_CAP(CAP_CHOWN), + MAKE_CAP(CAP_DAC_OVERRIDE), + MAKE_CAP(CAP_DAC_READ_SEARCH), + MAKE_CAP(CAP_FOWNER), + MAKE_CAP(CAP_FSETID), + MAKE_CAP(CAP_KILL), + MAKE_CAP(CAP_SETGID), + MAKE_CAP(CAP_SETUID), + MAKE_CAP(CAP_SETPCAP), + MAKE_CAP(CAP_LINUX_IMMUTABLE), + MAKE_CAP(CAP_NET_BIND_SERVICE), + MAKE_CAP(CAP_NET_BROADCAST), + MAKE_CAP(CAP_NET_ADMIN), + MAKE_CAP(CAP_NET_RAW), + MAKE_CAP(CAP_IPC_LOCK), + MAKE_CAP(CAP_IPC_OWNER), + MAKE_CAP(CAP_SYS_MODULE), + MAKE_CAP(CAP_SYS_RAWIO), + MAKE_CAP(CAP_SYS_CHROOT), + MAKE_CAP(CAP_SYS_PTRACE), + MAKE_CAP(CAP_SYS_PACCT), + MAKE_CAP(CAP_SYS_ADMIN), + MAKE_CAP(CAP_SYS_BOOT), + MAKE_CAP(CAP_SYS_NICE), + MAKE_CAP(CAP_SYS_RESOURCE), + MAKE_CAP(CAP_SYS_TIME), + MAKE_CAP(CAP_SYS_TTY_CONFIG), + MAKE_CAP(CAP_MKNOD), + MAKE_CAP(CAP_LEASE), + MAKE_CAP(CAP_AUDIT_WRITE), + MAKE_CAP(CAP_AUDIT_CONTROL), + MAKE_CAP(CAP_SETFCAP), + MAKE_CAP(CAP_MAC_OVERRIDE), + MAKE_CAP(CAP_MAC_ADMIN), + MAKE_CAP(CAP_SYSLOG), +}; + +static void fail(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +static void fail(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + exit(1); +} + +/* + * Find the capability ordinal by name, and return its ordinal. + * Returns -1 on failure. + */ +static int find_capability(const char *s) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(capabilities); i++) { + if (capabilities[i].cap_name + && strcasecmp(s, capabilities[i].cap_name) == 0) { + return i; + } + } + return -1; +} + +static void do_capset(int cap_ordinal) +{ + struct __user_cap_header_struct hdr; + struct __user_cap_data_struct caps[2]; + + /* Get the current capability mask */ + hdr.version = _LINUX_CAPABILITY_VERSION_3; + hdr.pid = getpid(); + if (capget(&hdr, caps)) { + perror("capget()"); + exit(1); + } + + /* Drop the bits */ + if (cap_ordinal < 32) + caps[0].inheritable &= ~(1U << cap_ordinal); + else + caps[1].inheritable &= ~(1U << (cap_ordinal - 32)); + + /* And drop the capability. */ + hdr.version = _LINUX_CAPABILITY_VERSION_3; + hdr.pid = getpid(); + if (capset(&hdr, caps)) + fail("Couldn't drop the capability \"%s\"\n", + capabilities[cap_ordinal].cap_name); +} + +static void do_bset(int cap_ordinal) +{ + int ret; + + ret = prctl(PR_CAPBSET_READ, cap_ordinal); + if (ret == 1) { + ret = prctl(PR_CAPBSET_DROP, cap_ordinal); + if (ret != 0) + fail("Error dropping capability %s from bset\n", + capabilities[cap_ordinal].cap_name); + } else if (ret < 0) + fail("Kernel doesn't recognize capability %d\n", cap_ordinal); +} + +static void do_usermodehelper_file(const char *filename, int cap_ordinal) +{ + uint32_t lo32, hi32; + FILE *file; + static const size_t buf_size = 80; + char buf[buf_size]; + char tail; + size_t bytes_read; + int ret; + + /* Try and open the file */ + file = fopen(filename, "r+"); + if (!file && errno == ENOENT) + fail("Could not disable usermode helpers capabilities as " + "%s is not available\n", filename); + if (!file) + fail("Failed to access file %s errno %d\n", filename, errno); + + /* Read and process the current bits */ + bytes_read = fread(buf, 1, buf_size - 1, file); + if (bytes_read == 0) + fail("Trouble reading %s\n", filename); + buf[bytes_read] = '\0'; + ret = sscanf(buf, "%u %u %c", &lo32, &hi32, &tail); + if (ret != 2) + fail("Failed to understand %s \"%s\"\n", filename, buf); + + /* Clear the bits in the local copy */ + if (cap_ordinal < 32) + lo32 &= ~(1 << cap_ordinal); + else + hi32 &= ~(1 << (cap_ordinal - 32)); + + /* Commit the new bit masks to the kernel */ + ret = fflush(file); + if (ret != 0) + fail("Failed on file %s to fflush %d\n", filename, ret); + sprintf(buf, "%u %u", lo32, hi32); + ret = fwrite(buf, 1, strlen(buf) + 1, file); + if (ret != 0) + fail("Failed to commit usermode helper bitmasks: %d\n", ret); + + /* Cleanup */ + fclose(file); +} + +static void do_usermodehelper(int cap_ordinal) +{ + static const char * const files[] = { + "/proc/sys/kernel/usermodehelper/bset", + "/proc/sys/kernel/usermodehelper/inheritable", + }; + int i; + + for (i = 0; i < ARRAY_SIZE(files); i++) + do_usermodehelper_file(files[i], cap_ordinal); +} + +static void drop_capability(int cap_ordinal) +{ + do_usermodehelper(cap_ordinal); + do_bset(cap_ordinal); + do_capset(cap_ordinal); + + printf("Dropped capability: %s\n", capabilities[cap_ordinal].cap_name); +} + +int drop_capabilities(const char *caps) +{ + char *s, *saveptr = NULL; + char *token; + + if (!caps) + return 0; + + /* Create a duplicate string that can be modified. */ + s = strdup(caps); + if (!s) + fail("Failed to drop caps as requested. Exiting\n"); + + token = strtok_r(s, ",", &saveptr); + while (token) { + int cap_ordinal = find_capability(token); + + if (cap_ordinal < 0) + fail("Could not understand capability name \"%s\" " + "on command line, failing init\n", token); + + drop_capability(cap_ordinal); + + token = strtok_r(NULL, ",", &saveptr); + } + + free(s); + return 0; +} diff --git a/usr/kinit/capabilities.h b/usr/kinit/capabilities.h new file mode 100644 index 0000000..a32a66a --- /dev/null +++ b/usr/kinit/capabilities.h @@ -0,0 +1,10 @@ +/* + * capabilities.h + */ + +#ifndef KINIT_CAPABILITIES_H +#define KINIT_CAPABILITIES_H + +int drop_capabilities(const char *caps); + +#endif /* KINIT_CAPABILITIES_H */ diff --git a/usr/kinit/devname.c b/usr/kinit/devname.c new file mode 100644 index 0000000..c327e3b --- /dev/null +++ b/usr/kinit/devname.c @@ -0,0 +1,116 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <dirent.h> +#include <string.h> +#include <sys/types.h> +#include <sys/sysmacros.h> + +#include "kinit.h" + +/* + * Print the name of a block device. + */ +#define BUF_SIZE 512 + +static int scansysdir(char *namebuf, char *sysdir, dev_t dev) +{ + char *dirtailptr = strchr(sysdir, '\0'); + DIR *dir; + int done = 0; + struct dirent *de; + char *systail; + FILE *sysdev; + unsigned long ma, mi; + char *ep; + ssize_t rd; + + dir = opendir(sysdir); + if (!dir) + return 0; + + *dirtailptr++ = '/'; + + while (!done && (de = readdir(dir))) { + /* Assume if we see a dot-name in sysfs it's special */ + if (de->d_name[0] == '.') + continue; + + if (de->d_type != DT_UNKNOWN && de->d_type != DT_DIR) + continue; + + if (strlen(de->d_name) >= + (BUF_SIZE - 64) - (dirtailptr - sysdir)) + continue; /* Badness... */ + + strcpy(dirtailptr, de->d_name); + systail = strchr(sysdir, '\0'); + + strcpy(systail, "/dev"); + sysdev = fopen(sysdir, "r"); + if (!sysdev) + continue; + + /* Abusing the namebuf as temporary storage here. */ + rd = fread(namebuf, 1, BUF_SIZE, sysdev); + namebuf[rd] = '\0'; /* Just in case... */ + + fclose(sysdev); + + ma = strtoul(namebuf, &ep, 10); + if (ma != major(dev) || *ep != ':') + continue; + + mi = strtoul(ep + 1, &ep, 10); + if (*ep != '\n') + continue; + + if (mi == minor(dev)) { + /* Found it! */ + strcpy(namebuf, de->d_name); + done = 1; + } else { + /* we have a major number match, scan for partitions */ + *systail = '\0'; + done = scansysdir(namebuf, sysdir, dev); + } + } + + closedir(dir); + return done; +} + +const char *bdevname(dev_t dev) +{ + static char buf[BUF_SIZE]; + char sysdir[BUF_SIZE]; + char *p; + + strcpy(sysdir, "/sys/block"); + + if (!scansysdir(buf, sysdir, dev)) + strcpy(buf, "dev"); /* prints e.g. dev(3,5) */ + + p = strchr(buf, '\0'); + snprintf(p, sizeof buf - (p - buf), "(%d,%d)", major(dev), minor(dev)); + + return buf; +} + +#ifdef TEST_DEVNAME /* Standalone test */ + +int main(int argc, char *argv[]) +{ + dev_t dev; + int i; + + for (i = 1; i < argc; i++) { + dev = strtoul(argv[i], NULL, 0); + + printf("0x%08x = %s\n", (unsigned int)dev, bdevname(dev)); + } + + return 0; +} + +#endif /* TEST */ diff --git a/usr/kinit/do_mounts.c b/usr/kinit/do_mounts.c new file mode 100644 index 0000000..b648299 --- /dev/null +++ b/usr/kinit/do_mounts.c @@ -0,0 +1,533 @@ +#include <errno.h> +#include <fcntl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <inttypes.h> +#include <mntent.h> + +#include "do_mounts.h" +#include "kinit.h" +#include "fstype.h" +#include "zlib.h" + +#ifndef MS_RELATIME +# define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#endif + +#ifndef MS_STRICTATIME +# define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#endif + +/* + * The following mount option parsing was stolen from + * + * usr/utils/mount_opts.c + * + * and adapted to add some later mount flags. + */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +struct mount_opts { + const char str[16]; + unsigned long rwmask; + unsigned long rwset; + unsigned long rwnoset; +}; + +struct extra_opts { + char *str; + char *end; + int used_size; + int alloc_size; +}; + +/* + * These options define the function of "mount(2)". + */ +#define MS_TYPE (MS_REMOUNT|MS_BIND|MS_MOVE) + + +/* These must be in alphabetic order! */ +static const struct mount_opts options[] = { + /* name mask set noset */ + {"async", MS_SYNCHRONOUS, 0, MS_SYNCHRONOUS}, + {"atime", MS_NOATIME, 0, MS_NOATIME}, + {"bind", MS_TYPE, MS_BIND, 0,}, + {"dev", MS_NODEV, 0, MS_NODEV}, + {"diratime", MS_NODIRATIME, 0, MS_NODIRATIME}, + {"dirsync", MS_DIRSYNC, MS_DIRSYNC, 0}, + {"exec", MS_NOEXEC, 0, MS_NOEXEC}, + {"move", MS_TYPE, MS_MOVE, 0}, + {"nodev", MS_NODEV, MS_NODEV, 0}, + {"noexec", MS_NOEXEC, MS_NOEXEC, 0}, + {"nosuid", MS_NOSUID, MS_NOSUID, 0}, + {"recurse", MS_REC, MS_REC, 0}, + {"relatime", MS_RELATIME, MS_RELATIME, 0}, + {"remount", MS_TYPE, MS_REMOUNT, 0}, + {"ro", MS_RDONLY, MS_RDONLY, 0}, + {"rw", MS_RDONLY, 0, MS_RDONLY}, + {"strictatime", MS_STRICTATIME, MS_STRICTATIME, 0}, + {"suid", MS_NOSUID, 0, MS_NOSUID}, + {"sync", MS_SYNCHRONOUS, MS_SYNCHRONOUS, 0}, + {"verbose", MS_VERBOSE, MS_VERBOSE, 0}, +}; + +/* + * Append 's' to 'extra->str'. 's' is a mount option that can't be turned into + * a flag. Return 0 on success, -1 on error. + */ +static int add_extra_option(struct extra_opts *extra, char *s) +{ + int len = strlen(s); + int newlen = extra->used_size + len; + + if (extra->str) + len++; /* +1 for ',' */ + + if (newlen >= extra->alloc_size) { + char *new; + + new = realloc(extra->str, newlen + 1); /* +1 for NUL */ + if (!new) { + if (extra->str) + free(extra->str); + return -1; + } + + extra->str = new; + extra->end = extra->str + extra->used_size; + extra->alloc_size = newlen; + } + + if (extra->used_size) { + *extra->end = ','; + extra->end++; + } + strcpy(extra->end, s); + extra->used_size += len; + + return 0; +} + +/* + * Parse the options in 'arg'; put numeric mount flags into 'flags' and + * the rest into 'extra'. Return 0 on success, -1 on error. + */ +static int +parse_mount_options(char *arg, unsigned long *flags, struct extra_opts *extra) +{ + char *s; + + while ((s = strsep(&arg, ",")) != NULL) { + char *opt = s; + unsigned int i; + int res; + int no = (s[0] == 'n' && s[1] == 'o'); + int found = 0; + + if (no) + s += 2; + + for (i = 0; i < ARRAY_SIZE(options); i++) { + + res = strcmp(s, options[i].str); + if (res == 0) { + found = 1; + *flags &= ~options[i].rwmask; + if (no) + *flags |= options[i].rwnoset; + else + *flags |= options[i].rwset; + break; + + /* If we're beyond 's' alphabetically, we're done */ + } else if (res < 0) + break; + } + if (! found) + if (add_extra_option(extra, opt) != 0) + return -1; + } + + return 0; +} + +/* Create the device node "name" */ +int create_dev(const char *name, dev_t dev) +{ + unlink(name); + return mknod(name, S_IFBLK | 0600, dev); +} + + +/* + * If there is not a block device for the input 'name', try to create one; if + * we can't that's okay. + */ +static void create_dev_if_not_present(const char *name) +{ + struct stat st; + dev_t dev; + + if (stat(name, &st) == 0) /* file present; we're done */ + return; + dev = name_to_dev_t(name); + if (dev) + (void) create_dev(name, dev); +} + + +/* mount a filesystem, possibly trying a set of different types */ +const char *mount_block(const char *source, const char *target, + const char *type, unsigned long flags, + const void *data) +{ + char *fslist, *p, *ep; + const char *rp; + ssize_t fsbytes; + int fd; + + if (type) { + dprintf("kinit: trying to mount %s on %s " + "with type %s, flags 0x%lx, data '%s'\n", + source, target, type, flags, (char *)data); + int rv = mount(source, target, type, flags, data); + + if (rv != 0) + dprintf("kinit: mount %s on %s failed " + "with errno = %d\n", + source, target, errno); + /* Mount readonly if necessary */ + if (rv == -1 && errno == EACCES && !(flags & MS_RDONLY)) + rv = mount(source, target, type, flags | MS_RDONLY, + data); + return rv ? NULL : type; + } + + /* If no type given, try to identify the type first; this + also takes care of specific ordering requirements, like + ext3 before ext2... */ + fd = open(source, O_RDONLY); + if (fd >= 0) { + int err = identify_fs(fd, &type, NULL, 0); + close(fd); + + if (!err && type) { + dprintf("kinit: %s appears to be a %s filesystem\n", + source, type); + type = mount_block(source, target, type, flags, data); + if (type) + return type; + } + } + + dprintf("kinit: failed to identify filesystem %s, trying all\n", + source); + + fsbytes = readfile("/proc/filesystems", &fslist); + + errno = EINVAL; + if (fsbytes < 0) + return NULL; + + p = fslist; + ep = fslist + fsbytes; + + rp = NULL; + + while (p < ep) { + type = p; + p = strchr(p, '\n'); + if (!p) + break; + *p++ = '\0'; + /* We can't mount a block device as a "nodev" fs */ + if (*type != '\t') + continue; + + type++; + rp = mount_block(source, target, type, flags, data); + if (rp) + break; + if (errno != EINVAL) + break; + } + + free(fslist); + return rp; +} + +/* mount the root filesystem from a block device */ +static int +mount_block_root(int argc, char *argv[], dev_t root_dev, + const char *type, unsigned long flags) +{ + const char *data, *rp; + + data = get_arg(argc, argv, "rootflags="); + create_dev("/dev/root", root_dev); + + errno = 0; + + if (type) { + if ((rp = mount_block("/dev/root", "/root", type, flags, data))) + goto ok; + if (errno != EINVAL) + goto bad; + } + + if (!errno + && (rp = mount_block("/dev/root", "/root", NULL, flags, data))) + goto ok; + +bad: + if (errno != EINVAL) { + /* + * Allow the user to distinguish between failed open + * and bad superblock on root device. + */ + fprintf(stderr, "%s: Cannot open root device %s\n", + progname, bdevname(root_dev)); + return -errno; + } else { + fprintf(stderr, "%s: Unable to mount root fs on device %s\n", + progname, bdevname(root_dev)); + return -ESRCH; + } + +ok: + printf("%s: Mounted root (%s filesystem)%s.\n", + progname, rp, (flags & MS_RDONLY) ? " readonly" : ""); + return 0; +} + +static int +mount_roots(int argc, char *argv[], const char *root_dev_name) +{ + char *roots = strdup(root_dev_name); + char *root; + const char *sep = ","; + char *saveptr; + int ret = -ESRCH; + + root = strtok_r(roots, sep, &saveptr); + while (root) { + dev_t root_dev; + + dprintf("kinit: trying to mount %s\n", root); + root_dev = name_to_dev_t(root); + ret = mount_root(argc, argv, root_dev, root); + if (!ret) + break; + root = strtok_r(NULL, sep, &saveptr); + } + free(roots); + return ret; +} + +int +mount_root(int argc, char *argv[], dev_t root_dev, const char *root_dev_name) +{ + unsigned long flags = MS_RDONLY | MS_VERBOSE; + int ret; + const char *type = get_arg(argc, argv, "rootfstype="); + + if (get_flag(argc, argv, "rw") > get_flag(argc, argv, "ro")) { + dprintf("kinit: mounting root rw\n"); + flags &= ~MS_RDONLY; + } + + if (type) { + if (!strcmp(type, "nfs")) + root_dev = Root_NFS; + else if (!strcmp(type, "jffs2") && !major(root_dev)) + root_dev = Root_MTD; + } + + switch (root_dev) { + case Root_NFS: + ret = mount_nfs_root(argc, argv, flags); + break; + case Root_MTD: + ret = mount_mtd_root(argc, argv, root_dev_name, type, flags); + break; + default: + ret = mount_block_root(argc, argv, root_dev, type, flags); + break; + } + + if (!ret) + chdir("/root"); + + return ret; +} + +/* Allocate a buffer and prepend '/root' onto 'src'. */ +static char *prepend_root_dir(const char *src) +{ + size_t len = strlen(src) + 6; /* "/root" */ + char *p = malloc(len); + + if (!p) + return NULL; + + strcpy(p, "/root"); + strcat(p, src); + return p; +} + +int do_cmdline_mounts(int argc, char *argv[]) +{ + int arg_i; + int ret = 0; + + for (arg_i = 0; arg_i < argc; arg_i++) { + const char *fs_dev, *fs_dir, *fs_type; + char *fs_opts; + unsigned long flags = 0; + char *saveptr = NULL; + char *new_dir; + struct extra_opts extra = { 0, 0, 0, 0 }; + + if (strncmp(argv[arg_i], "kinit_mount=", 12)) + continue; + /* + * Format: + * <fs_dev>;<dir>;<fs_type>;[opt1],[optn...] + */ + fs_dev = strtok_r(&argv[arg_i][12], ";", &saveptr); + if (!fs_dev) { + fprintf(stderr, "Failed to parse fs_dev\n"); + continue; + } + fs_dir = strtok_r(NULL, ";", &saveptr); + if (!fs_dir) { + fprintf(stderr, "Failed to parse fs_dir\n"); + continue; + } + fs_type = strtok_r(NULL, ";", &saveptr); + if (!fs_type) { + fprintf(stderr, "Failed to parse fs_type\n"); + continue; + } + fs_opts = strtok_r(NULL, ";", &saveptr); + /* Don't error if there is no option string sent */ + + new_dir = prepend_root_dir(fs_dir); + if (! new_dir) + return -ENOMEM; + create_dev_if_not_present(fs_dev); + ret = parse_mount_options(fs_opts, &flags, &extra); + if (ret != 0) + break; + + if (!mount_block(fs_dev, new_dir, fs_type, + flags, extra.str)) + fprintf(stderr, "Skipping failed mount '%s'\n", fs_dev); + free(new_dir); + if (extra.str) + free(extra.str); + } + return ret; +} + +int do_fstab_mounts(FILE *fp) +{ + struct mntent *ent = NULL; + char *new_dir; + int ret = 0; + + while ((ent = getmntent(fp))) { + unsigned long flags = 0; + struct extra_opts extra = { 0, 0, 0, 0 }; + + new_dir = prepend_root_dir(ent->mnt_dir); + if (! new_dir) + return -ENOMEM; + create_dev_if_not_present(ent->mnt_fsname); + ret = parse_mount_options(ent->mnt_opts, &flags, &extra); + if (ret != 0) + break; + + if (!mount_block(ent->mnt_fsname, + new_dir, + ent->mnt_type, + flags, + extra.str)) { + fprintf(stderr, "Skipping failed mount '%s'\n", + ent->mnt_fsname); + } + free(new_dir); + if (extra.str) + free(extra.str); + } + return 0; +} + +int do_mounts(int argc, char *argv[]) +{ + const char *root_dev_name = get_arg(argc, argv, "root="); + const char *root_delay = get_arg(argc, argv, "rootdelay="); + const char *load_ramdisk = get_arg(argc, argv, "load_ramdisk="); + dev_t root_dev = 0; + int err; + FILE *fp; + + dprintf("kinit: do_mounts\n"); + + if (root_delay) { + int delay = atoi(root_delay); + fprintf(stderr, "Waiting %d s before mounting root device...\n", + delay); + sleep(delay); + } + + md_run(argc, argv); + + if (root_dev_name) { + root_dev = name_to_dev_t(root_dev_name); + } else if (get_arg(argc, argv, "nfsroot=") || + get_arg(argc, argv, "nfsaddrs=")) { + root_dev = Root_NFS; + } else { + long rootdev; + getintfile("/proc/sys/kernel/real-root-dev", &rootdev); + root_dev = (dev_t) rootdev; + } + + dprintf("kinit: root_dev = %s\n", bdevname(root_dev)); + + if (initrd_load(argc, argv, root_dev)) { + dprintf("initrd loaded\n"); + return 0; + } + + if (load_ramdisk && atoi(load_ramdisk)) { + if (ramdisk_load(argc, argv)) + root_dev = Root_RAM0; + } + + if (root_dev == Root_MULTI) + err = mount_roots(argc, argv, root_dev_name); + else + err = mount_root(argc, argv, root_dev, root_dev_name); + + if (err) + return err; + + if ((fp = setmntent("/etc/fstab", "r"))) { + err = do_fstab_mounts(fp); + fclose(fp); + } + + if (err) + return err; + + if (get_arg(argc, argv, "kinit_mount=")) + err = do_cmdline_mounts(argc, argv); + return err; +} diff --git a/usr/kinit/do_mounts.h b/usr/kinit/do_mounts.h new file mode 100644 index 0000000..99bc6a6 --- /dev/null +++ b/usr/kinit/do_mounts.h @@ -0,0 +1,49 @@ +/* + * do_mounts.h + */ + +#ifndef DO_MOUNTS_H +#define DO_MOUNTS_H + +#include <sys/types.h> +#include <sys/sysmacros.h> +#include <sys/stat.h> + +#define Root_RAM0 __makedev(1, 0) + +/* These device numbers are only used internally */ +#define Root_NFS __makedev(0, 255) +#define Root_MTD __makedev(0, 254) +#define Root_MULTI __makedev(0, 253) + +int create_dev(const char *name, dev_t dev); + +dev_t name_to_dev_t(const char *name); + +const char *mount_block(const char *source, const char *target, + const char *type, unsigned long flags, + const void *data); + +int mount_root(int argc, char *argv[], dev_t root_dev, + const char *root_dev_name); + +int mount_mtd_root(int argc, char *argv[], const char *root_dev_name, + const char *type, unsigned long flags); + +int do_mounts(int argc, char *argv[]); + +int initrd_load(int argc, char *argv[], dev_t root_dev); + +static inline dev_t bstat(const char *name) +{ + struct stat st; + + if (stat(name, &st) || !S_ISBLK(st.st_mode)) + return 0; + return st.st_rdev; +} + +int load_ramdisk_compressed(const char *devpath, FILE * wfd, + off_t ramdisk_start); + +#endif /* DO_MOUNTS_H */ diff --git a/usr/kinit/do_mounts_md.c b/usr/kinit/do_mounts_md.c new file mode 100644 index 0000000..f446620 --- /dev/null +++ b/usr/kinit/do_mounts_md.c @@ -0,0 +1,400 @@ +/* + * Handle autoconfiguration of md devices. This is ugly, partially since + * it still relies on a sizable kernel component. + * + * This file is derived from the Linux kernel. + */ + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <inttypes.h> +#include <sys/sysmacros.h> +#include <sys/md.h> +#include <linux/major.h> + +#include "kinit.h" +#include "do_mounts.h" + +#define LEVEL_NONE (-1000000) + +/* + * When md (and any require personalities) are compiled into the kernel + * (not a module), arrays can be assembles are boot time using with AUTODETECT + * where specially marked partitions are registered with md_autodetect_dev(), + * and with MD_BOOT where devices to be collected are given on the boot line + * with md=..... + * The code for that is here. + */ + +static int raid_noautodetect, raid_autopart; + +static struct { + int minor; + int partitioned; + int level; + int chunk; + char *device_names; +} md_setup_args[MAX_MD_DEVS]; + +static int md_setup_ents; + +/** + * get_option - Parse integer from an option string + * @str: option string + * @pint: (output) integer value parsed from @str + * + * Read an int from an option string; if available accept a subsequent + * comma as well. + * + * Return values: + * 0 : no int in string + * 1 : int found, no subsequent comma + * 2 : int found including a subsequent comma + */ + +static int get_option(char **str, int *pint) +{ + char *cur = *str; + + if (!cur || !(*cur)) + return 0; + *pint = strtol(cur, str, 0); + if (cur == *str) + return 0; + if (**str == ',') { + (*str)++; + return 2; + } + + return 1; +} + +/* + * Find the partitioned md device major number... of course this *HAD* + * to be done dynamically instead of using a registered number. + * Sigh. Double sigh. + */ +static int mdp_major(void) +{ + static int found = 0; + FILE *f; + char line[512], *p; + int is_blk, major_no; + + if (found) + return found; + + f = fopen("/proc/devices", "r"); + is_blk = 0; + while (fgets(line, sizeof line, f)) { + if (!strcmp(line, "Block devices:\n")) + is_blk = 1; + if (is_blk) { + major_no = strtol(line, &p, 10); + while (*p && isspace(*p)) + p++; + + if (major_no == 0) /* Not a number */ + is_blk = 0; + else if (major_no > 0 && !strcmp(p, "mdp")) { + found = major_no; + break; + } + } + } + fclose(f); + + if (!found) { + fprintf(stderr, + "Error: mdp devices detected but no mdp device found!\n"); + exit(1); + } + + return found; +} + +/* + * Parse the command-line parameters given our kernel, but do not + * actually try to invoke the MD device now; that is handled by + * md_setup_drive after the low-level disk drivers have initialised. + * + * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which + * assigns the task of parsing integer arguments to the + * invoked program now). Added ability to initialise all + * the MD devices (by specifying multiple "md=" lines) + * instead of just one. -- KTK + * 18May2000: Added support for persistent-superblock arrays: + * md=n,0,factor,fault,device-list uses RAID0 for device n + * md=n,-1,factor,fault,device-list uses LINEAR for device n + * md=n,device-list reads a RAID superblock from the devices + * elements in device-list are read by name_to_kdev_t so can be + * a hex number or something like /dev/hda1 /dev/sdb + * 2001-06-03: Dave Cinege <dcinege@psychosis.com> + * Shifted name_to_kdev_t() and related operations to md_set_drive() + * for later execution. Rewrote section to make devfs compatible. + */ +static int md_setup(char *str) +{ + int minor_num, level, factor, fault, partitioned = 0; + char *pername = ""; + char *str1; + int ent; + + if (*str == 'd') { + partitioned = 1; + str++; + } + if (get_option(&str, &minor_num) != 2) { /* MD Number */ + fprintf(stderr, "md: Too few arguments supplied to md=.\n"); + return 0; + } + str1 = str; + if (minor_num >= MAX_MD_DEVS) { + fprintf(stderr, "md: md=%d, Minor device number too high.\n", + minor_num); + return 0; + } + for (ent = 0; ent < md_setup_ents; ent++) + if (md_setup_args[ent].minor == minor_num && + md_setup_args[ent].partitioned == partitioned) { + fprintf(stderr, + "md: md=%s%d, Specified more than once. " + "Replacing previous definition.\n", + partitioned ? "d" : "", minor_num); + break; + } + if (ent >= MAX_MD_DEVS) { + fprintf(stderr, "md: md=%s%d - too many md initialisations\n", + partitioned ? "d" : "", minor_num); + return 0; + } + if (ent >= md_setup_ents) + md_setup_ents++; + switch (get_option(&str, &level)) { /* RAID level */ + case 2: /* could be 0 or -1.. */ + if (level == 0 || level == LEVEL_LINEAR) { + if (get_option(&str, &factor) != 2 || /* Chunk Size */ + get_option(&str, &fault) != 2) { + fprintf(stderr, + "md: Too few arguments supplied to md=.\n"); + return 0; + } + md_setup_args[ent].level = level; + md_setup_args[ent].chunk = 1 << (factor + 12); + if (level == LEVEL_LINEAR) + pername = "linear"; + else + pername = "raid0"; + break; + } + /* FALL THROUGH */ + case 1: /* the first device is numeric */ + str = str1; + /* FALL THROUGH */ + case 0: + md_setup_args[ent].level = LEVEL_NONE; + pername = "super-block"; + } + + fprintf(stderr, "md: Will configure md%s%d (%s) from %s, below.\n", + partitioned ? "_d" : "", minor_num, pername, str); + md_setup_args[ent].device_names = str; + md_setup_args[ent].partitioned = partitioned; + md_setup_args[ent].minor = minor_num; + + return 1; +} + +#define MdpMinorShift 6 + +static void md_setup_drive(void) +{ + int dev_minor, i, ent, partitioned; + dev_t dev; + dev_t devices[MD_SB_DISKS + 1]; + + for (ent = 0; ent < md_setup_ents; ent++) { + int fd; + int err = 0; + char *devname; + mdu_disk_info_t dinfo; + char name[16]; + struct stat st_chk; + + dev_minor = md_setup_args[ent].minor; + partitioned = md_setup_args[ent].partitioned; + devname = md_setup_args[ent].device_names; + + snprintf(name, sizeof name, + "/dev/md%s%d", partitioned ? "_d" : "", dev_minor); + + if (stat(name, &st_chk) == 0) + continue; + + if (partitioned) + dev = makedev(mdp_major(), dev_minor << MdpMinorShift); + else + dev = makedev(MD_MAJOR, dev_minor); + create_dev(name, dev); + for (i = 0; i < MD_SB_DISKS && devname != 0; i++) { + char *p; + + p = strchr(devname, ','); + if (p) + *p++ = 0; + + dev = name_to_dev_t(devname); + if (!dev) { + fprintf(stderr, "md: Unknown device name: %s\n", + devname); + break; + } + + devices[i] = dev; + + devname = p; + } + devices[i] = 0; + + if (!i) + continue; + + fprintf(stderr, "md: Loading md%s%d: %s\n", + partitioned ? "_d" : "", dev_minor, + md_setup_args[ent].device_names); + + fd = open(name, 0, 0); + if (fd < 0) { + fprintf(stderr, "md: open failed - cannot start " + "array %s\n", name); + continue; + } + if (ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) { + fprintf(stderr, + "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", + dev_minor); + close(fd); + continue; + } + + if (md_setup_args[ent].level != LEVEL_NONE) { + /* non-persistent */ + mdu_array_info_t ainfo; + ainfo.level = md_setup_args[ent].level; + ainfo.size = 0; + ainfo.nr_disks = 0; + ainfo.raid_disks = 0; + while (devices[ainfo.raid_disks]) + ainfo.raid_disks++; + ainfo.md_minor = dev_minor; + ainfo.not_persistent = 1; + + ainfo.state = (1 << MD_SB_CLEAN); + ainfo.layout = 0; + ainfo.chunk_size = md_setup_args[ent].chunk; + err = ioctl(fd, SET_ARRAY_INFO, &ainfo); + for (i = 0; !err && i <= MD_SB_DISKS; i++) { + dev = devices[i]; + if (!dev) + break; + dinfo.number = i; + dinfo.raid_disk = i; + dinfo.state = + (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC); + dinfo.major = major(dev); + dinfo.minor = minor(dev); + err = ioctl(fd, ADD_NEW_DISK, &dinfo); + } + } else { + /* persistent */ + for (i = 0; i <= MD_SB_DISKS; i++) { + dev = devices[i]; + if (!dev) + break; + dinfo.major = major(dev); + dinfo.minor = minor(dev); + ioctl(fd, ADD_NEW_DISK, &dinfo); + } + } + if (!err) + err = ioctl(fd, RUN_ARRAY, 0); + if (err) + fprintf(stderr, "md: starting md%d failed\n", + dev_minor); + else { + /* reread the partition table. + * I (neilb) and not sure why this is needed, but I + * cannot boot a kernel with devfs compiled in from + * partitioned md array without it + */ + close(fd); + fd = open(name, 0, 0); + ioctl(fd, BLKRRPART, 0); + } + close(fd); + } +} + +static int raid_setup(char *str) +{ + int len, pos; + + len = strlen(str) + 1; + pos = 0; + + while (pos < len) { + char *comma = strchr(str + pos, ','); + int wlen; + if (comma) + wlen = (comma - str) - pos; + else + wlen = (len - 1) - pos; + + if (!strncmp(str, "noautodetect", wlen)) + raid_noautodetect = 1; + if (strncmp(str, "partitionable", wlen) == 0) + raid_autopart = 1; + if (strncmp(str, "part", wlen) == 0) + raid_autopart = 1; + pos += wlen + 1; + } + return 1; +} + +static void md_run_setup(void) +{ + create_dev("/dev/md0", makedev(MD_MAJOR, 0)); + if (raid_noautodetect) + fprintf(stderr, + "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n"); + else { + int fd = open("/dev/md0", 0, 0); + if (fd >= 0) { + ioctl(fd, RAID_AUTORUN, + (void *)(intptr_t) raid_autopart); + close(fd); + } + } + md_setup_drive(); +} + +void md_run(int argc, char *argv[]) +{ + char **pp, *p; + + for (pp = argv; (p = *pp); pp++) { + if (!strncmp(p, "raid=", 5)) + raid_setup(p + 5); + else if (!strncmp(p, "md=", 3)) + md_setup(p + 3); + } + + md_run_setup(); +} diff --git a/usr/kinit/do_mounts_mtd.c b/usr/kinit/do_mounts_mtd.c new file mode 100644 index 0000000..20d27ca --- /dev/null +++ b/usr/kinit/do_mounts_mtd.c @@ -0,0 +1,42 @@ +/* + * Mount an MTD device as a character device. + */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <inttypes.h> + +#include "kinit.h" +#include "do_mounts.h" + +int mount_mtd_root(int argc, char *argv[], const char *root_dev_name, + const char *type, unsigned long flags) +{ + const char *data = get_arg(argc, argv, "rootflags="); + + if (!type) + type = "jffs2"; + + printf("Trying to mount MTD %s as root (%s filesystem)\n", + root_dev_name, type); + + if (mount(root_dev_name, "/root", type, flags, data)) { + int err = errno; + fprintf(stderr, + "%s: Unable to mount MTD %s (%s filesystem) " + "as root: %s\n", + progname, root_dev_name, type, strerror(err)); + return -err; + } else { + fprintf(stderr, "%s: Mounted root (%s filesystem)%s.\n", + progname, type, (flags & MS_RDONLY) ? " readonly" : ""); + return 0; + } + +} diff --git a/usr/kinit/fstype/Kbuild b/usr/kinit/fstype/Kbuild new file mode 100644 index 0000000..631eb32 --- /dev/null +++ b/usr/kinit/fstype/Kbuild @@ -0,0 +1,29 @@ +# +# Kbuild file for fstype +# + +static-y := static/fstype +shared-y := shared/fstype + +# common .o files +objs := main.o fstype.o + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + +# Create built-in.o with all object files (used by kinit) +lib-y := $(objs) + +# .o files used to built executables +static/fstype-y := $(objs) +shared/fstype-y := $(objs) + +# Cleaning +clean-dirs := static shared + +# install binary +ifdef KLIBCSHAREDFLAGS +install-y := $(shared-y) +else +install-y := $(static-y) +endif diff --git a/usr/kinit/fstype/btrfs.h b/usr/kinit/fstype/btrfs.h new file mode 100644 index 0000000..459da12 --- /dev/null +++ b/usr/kinit/fstype/btrfs.h @@ -0,0 +1,57 @@ +#ifndef __BTRFS_H +#define __BTRFS_H + +# define BTRFS_MAGIC "_BHRfS_M" +# define BTRFS_MAGIC_L 8 + +/* + * Structure of the super block + */ +struct btrfs_super_block { + uint8_t csum[32]; + uint8_t fsid[16]; + uint64_t bytenr; + uint64_t flags; + uint8_t magic[8]; + uint64_t generation; + uint64_t root; + uint64_t chunk_root; + uint64_t log_root; + uint64_t log_root_transid; + uint64_t total_bytes; + uint64_t bytes_used; + uint64_t root_dir_objectid; + uint64_t num_devices; + uint32_t sectorsize; + uint32_t nodesize; + uint32_t leafsize; + uint32_t stripesize; + uint32_t sys_chunk_array_size; + uint64_t chunk_root_generation; + uint64_t compat_flags; + uint64_t compat_ro_flags; + uint64_t incompat_flags; + uint16_t csum_type; + uint8_t root_level; + uint8_t chunk_root_level; + uint8_t log_root_level; + struct btrfs_dev_item { + uint64_t devid; + uint64_t total_bytes; + uint64_t bytes_used; + uint32_t io_align; + uint32_t io_width; + uint32_t sector_size; + uint64_t type; + uint64_t generation; + uint64_t start_offset; + uint32_t dev_group; + uint8_t seek_speed; + uint8_t bandwidth; + uint8_t uuid[16]; + uint8_t fsid[16]; + } __attribute__ ((__packed__)) dev_item; + uint8_t label[256]; +} __attribute__ ((__packed__)); + +#endif /* __BTRFS_H */ diff --git a/usr/kinit/fstype/cramfs_fs.h b/usr/kinit/fstype/cramfs_fs.h new file mode 100644 index 0000000..6f5ad4f --- /dev/null +++ b/usr/kinit/fstype/cramfs_fs.h @@ -0,0 +1,85 @@ +#ifndef __CRAMFS_H +#define __CRAMFS_H + +#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ +#define CRAMFS_SIGNATURE "Compressed ROMFS" + +/* + * Width of various bitfields in struct cramfs_inode. + * Primarily used to generate warnings in mkcramfs. + */ +#define CRAMFS_MODE_WIDTH 16 +#define CRAMFS_UID_WIDTH 16 +#define CRAMFS_SIZE_WIDTH 24 +#define CRAMFS_GID_WIDTH 8 +#define CRAMFS_NAMELEN_WIDTH 6 +#define CRAMFS_OFFSET_WIDTH 26 + +/* + * Since inode.namelen is a unsigned 6-bit number, the maximum cramfs + * path length is 63 << 2 = 252. + */ +#define CRAMFS_MAXPATHLEN (((1 << CRAMFS_NAMELEN_WIDTH) - 1) << 2) + +/* + * Reasonably terse representation of the inode data. + */ +struct cramfs_inode { + __u32 mode:CRAMFS_MODE_WIDTH, uid:CRAMFS_UID_WIDTH; + /* SIZE for device files is i_rdev */ + __u32 size:CRAMFS_SIZE_WIDTH, gid:CRAMFS_GID_WIDTH; + /* NAMELEN is the length of the file name, divided by 4 and + rounded up. (cramfs doesn't support hard links.) */ + /* OFFSET: For symlinks and non-empty regular files, this + contains the offset (divided by 4) of the file data in + compressed form (starting with an array of block pointers; + see README). For non-empty directories it is the offset + (divided by 4) of the inode of the first file in that + directory. For anything else, offset is zero. */ + __u32 namelen:CRAMFS_NAMELEN_WIDTH, offset:CRAMFS_OFFSET_WIDTH; +}; + +struct cramfs_info { + __u32 crc; + __u32 edition; + __u32 blocks; + __u32 files; +}; + +/* + * Superblock information at the beginning of the FS. + */ +struct cramfs_super { + __u32 magic; /* 0x28cd3d45 - random number */ + __u32 size; /* length in bytes */ + __u32 flags; /* feature flags */ + __u32 future; /* reserved for future use */ + __u8 signature[16]; /* "Compressed ROMFS" */ + struct cramfs_info fsid; /* unique filesystem info */ + __u8 name[16]; /* user-defined name */ + struct cramfs_inode root; /* root inode data */ +}; + +/* + * Feature flags + * + * 0x00000000 - 0x000000ff: features that work for all past kernels + * 0x00000100 - 0xffffffff: features that don't work for past kernels + */ +#define CRAMFS_FLAG_FSID_VERSION_2 0x00000001 /* fsid version #2 */ +#define CRAMFS_FLAG_SORTED_DIRS 0x00000002 /* sorted dirs */ +#define CRAMFS_FLAG_HOLES 0x00000100 /* support for holes */ +#define CRAMFS_FLAG_WRONG_SIGNATURE 0x00000200 /* reserved */ +#define CRAMFS_FLAG_SHIFTED_ROOT_OFFSET 0x00000400 /* shifted root fs */ + +/* + * Valid values in super.flags. Currently we refuse to mount + * if (flags & ~CRAMFS_SUPPORTED_FLAGS). Maybe that should be + * changed to test super.future instead. + */ +#define CRAMFS_SUPPORTED_FLAGS ( 0x000000ff \ + | CRAMFS_FLAG_HOLES \ + | CRAMFS_FLAG_WRONG_SIGNATURE \ + | CRAMFS_FLAG_SHIFTED_ROOT_OFFSET ) + +#endif diff --git a/usr/kinit/fstype/ext2_fs.h b/usr/kinit/fstype/ext2_fs.h new file mode 100644 index 0000000..775df8f --- /dev/null +++ b/usr/kinit/fstype/ext2_fs.h @@ -0,0 +1,84 @@ +#ifndef __EXT2_FS_H +#define __EXT2_FS_H + +/* + * The second extended file system magic number + */ +#define EXT2_SUPER_MAGIC 0xEF53 + +/* + * Structure of the super block + */ +struct ext2_super_block { + __le32 s_inodes_count; /* Inodes count */ + __le32 s_blocks_count; /* Blocks count */ + __le32 s_r_blocks_count; /* Reserved blocks count */ + __le32 s_free_blocks_count; /* Free blocks count */ + __le32 s_free_inodes_count; /* Free inodes count */ + __le32 s_first_data_block; /* First Data Block */ + __le32 s_log_block_size; /* Block size */ + __le32 s_log_frag_size; /* Fragment size */ + __le32 s_blocks_per_group; /* # Blocks per group */ + __le32 s_frags_per_group; /* # Fragments per group */ + __le32 s_inodes_per_group; /* # Inodes per group */ + __le32 s_mtime; /* Mount time */ + __le32 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_magic; /* Magic signature */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le16 s_minor_rev_level; /* minor revision level */ + __le32 s_lastcheck; /* time of last check */ + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le32 s_rev_level; /* Revision level */ + __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT2_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __le32 s_first_ino; /* First non-reserved inode */ + __le16 s_inode_size; /* size of inode structure */ + __le16 s_block_group_nr; /* block group # of this superblock */ + __le32 s_feature_compat; /* compatible feature set */ + __le32 s_feature_incompat; /* incompatible feature set */ + __le32 s_feature_ro_compat; /* readonly-compatible feature set */ + __u8 s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + __le32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT2_COMPAT_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate */ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_padding1; + /* + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. + */ + __u8 s_journal_uuid[16]; /* uuid of journal superblock */ + __u32 s_journal_inum; /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ + __u32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __u16 s_reserved_word_pad; + __le32 s_default_mount_opts; + __le32 s_first_meta_bg; /* First metablock block group */ + __u32 s_reserved[190]; /* Padding to the end of the block */ +}; + +#endif /* __EXT2_FS_H */ diff --git a/usr/kinit/fstype/ext3_fs.h b/usr/kinit/fstype/ext3_fs.h new file mode 100644 index 0000000..f958e5c --- /dev/null +++ b/usr/kinit/fstype/ext3_fs.h @@ -0,0 +1,134 @@ +#ifndef __EXT3_FS_H +#define __EXT3_FS_H + +/* + * The second extended file system magic number + */ +#define EXT3_SUPER_MAGIC 0xEF53 + +#define EXT2_FLAGS_TEST_FILESYS 0x0004 +#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +#define EXT2_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002 +#define EXT2_FEATURE_INCOMPAT_META_BG 0x0010 +#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004 +#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 +#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 + +#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 +#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 +#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 + +#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT2_FEATURE_RO_COMPAT_BTREE_DIR) +#define EXT3_FEATURE_RO_COMPAT_UNSUPPORTED ~EXT3_FEATURE_RO_COMPAT_SUPP +#define EXT3_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \ + EXT3_FEATURE_INCOMPAT_RECOVER| \ + EXT2_FEATURE_INCOMPAT_META_BG) +#define EXT3_FEATURE_INCOMPAT_UNSUPPORTED ~EXT3_FEATURE_INCOMPAT_SUPP + + + +/* + * Structure of the super block + */ +struct ext3_super_block { + /*00*/ __u32 s_inodes_count; + /* Inodes count */ + __u32 s_blocks_count; /* Blocks count */ + __u32 s_r_blocks_count; /* Reserved blocks count */ + __u32 s_free_blocks_count; /* Free blocks count */ + /*10*/ __u32 s_free_inodes_count; + /* Free inodes count */ + __u32 s_first_data_block; /* First Data Block */ + __u32 s_log_block_size; /* Block size */ + __s32 s_log_frag_size; /* Fragment size */ + /*20*/ __u32 s_blocks_per_group; + /* # Blocks per group */ + __u32 s_frags_per_group; /* # Fragments per group */ + __u32 s_inodes_per_group; /* # Inodes per group */ + __u32 s_mtime; /* Mount time */ + /*30*/ __u32 s_wtime; + /* Write time */ + __u16 s_mnt_count; /* Mount count */ + __s16 s_max_mnt_count; /* Maximal mount count */ + __u16 s_magic; /* Magic signature */ + __u16 s_state; /* File system state */ + __u16 s_errors; /* Behaviour when detecting errors */ + __u16 s_minor_rev_level; /* minor revision level */ + /*40*/ __u32 s_lastcheck; + /* time of last check */ + __u32 s_checkinterval; /* max. time between checks */ + __u32 s_creator_os; /* OS */ + __u32 s_rev_level; /* Revision level */ + /*50*/ __u16 s_def_resuid; + /* Default uid for reserved blocks */ + __u16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT3_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __u32 s_first_ino; /* First non-reserved inode */ + __u16 s_inode_size; /* size of inode structure */ + __u16 s_block_group_nr; /* block group # of this superblock */ + __u32 s_feature_compat; /* compatible feature set */ + /*60*/ __u32 s_feature_incompat; + /* incompatible feature set */ + __u32 s_feature_ro_compat; /* readonly-compatible feature set */ + /*68*/ __u8 s_uuid[16]; + /* 128-bit uuid for volume */ + /*78*/ char s_volume_name[16]; + /* volume name */ + /*88*/ char s_last_mounted[64]; + /* directory where last mounted */ + /*C8*/ __u32 s_algorithm_usage_bitmap; + /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate */ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_padding1; + /* + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. + */ + /*D0*/ __u8 s_journal_uuid[16]; + /* uuid of journal superblock */ + /*E0*/ __u32 s_journal_inum; + /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ + __u32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_jnl_backup_type; + __u16 s_reserved_word_pad; + __u32 s_default_mount_opts; + __u32 s_first_meta_bg; + __u32 s_mkfs_time; + __u32 s_jnl_blocks[17]; + __u32 s_blocks_count_hi; + __u32 s_r_blocks_count_hi; + __u32 s_free_blocks_hi; + __u16 s_min_extra_isize; + __u16 s_want_extra_isize; + __u32 s_flags; + __u16 s_raid_stride; + __u16 s_mmp_interval; + __u64 s_mmp_block; + __u32 s_raid_stripe_width; + __u32 s_reserved[163]; +}; + +#endif /* __EXT3_FS_H */ diff --git a/usr/kinit/fstype/fstype.c b/usr/kinit/fstype/fstype.c new file mode 100644 index 0000000..aebccca --- /dev/null +++ b/usr/kinit/fstype/fstype.c @@ -0,0 +1,445 @@ +/* + * by rmk + * + * Detect filesystem type (on stdin) and output strings for two + * environment variables: + * FSTYPE - filesystem type + * FSSIZE - filesystem size (if known) + * + * We currently detect the filesystems listed below in the struct + * "imagetype images" (in the order they are listed). + */ + +#include <sys/types.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <endian.h> +#include <netinet/in.h> +#include <sys/utsname.h> +#include <sys/vfs.h> + +#define cpu_to_be32(x) __cpu_to_be32(x) /* Needed by romfs_fs.h */ + +#include "btrfs.h" +#include "cramfs_fs.h" +#include "ext2_fs.h" +#include "ext3_fs.h" +#include "gfs2_fs.h" +#include "iso9660_sb.h" +#include "luks_fs.h" +#include "lvm2_sb.h" +#include "minix_fs.h" +#include "nilfs_fs.h" +#include "ocfs2_fs.h" +#include "romfs_fs.h" +#include "squashfs_fs.h" +#include "xfs_sb.h" + +/* + * Slightly cleaned up version of jfs_superblock to + * avoid pulling in other kernel header files. + */ +#include "jfs_superblock.h" + +/* + * reiserfs_fs.h is too sick to include directly. + * Use a cleaned up version. + */ +#include "reiserfs_fs.h" +#include "reiser4_fs.h" + +#include "fstype.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#define BLOCK_SIZE 1024 + +/* Swap needs the definition of block size */ +#include "swap_fs.h" + +static int gzip_image(const void *buf, unsigned long long *bytes) +{ + const unsigned char *p = buf; + + if (p[0] == 037 && (p[1] == 0213 || p[1] == 0236)) { + /* The length of a gzip stream can only be determined + by processing the whole stream */ + *bytes = 0ULL; + return 1; + } + return 0; +} + +static int cramfs_image(const void *buf, unsigned long long *bytes) +{ + const struct cramfs_super *sb = (const struct cramfs_super *)buf; + + if (sb->magic == CRAMFS_MAGIC) { + if (sb->flags & CRAMFS_FLAG_FSID_VERSION_2) + *bytes = (unsigned long long)sb->fsid.blocks << 10; + else + *bytes = 0; + return 1; + } + return 0; +} + +static int romfs_image(const void *buf, unsigned long long *bytes) +{ + const struct romfs_super_block *sb = + (const struct romfs_super_block *)buf; + + if (sb->word0 == ROMSB_WORD0 && sb->word1 == ROMSB_WORD1) { + *bytes = __be32_to_cpu(sb->size); + return 1; + } + return 0; +} + +static int minix_image(const void *buf, unsigned long long *bytes) +{ + const struct minix_super_block *sb = + (const struct minix_super_block *)buf; + + if (sb->s_magic == MINIX_SUPER_MAGIC || + sb->s_magic == MINIX_SUPER_MAGIC2) { + *bytes = (unsigned long long)sb->s_nzones + << (sb->s_log_zone_size + 10); + return 1; + } + return 0; +} + +static int ext4_image(const void *buf, unsigned long long *bytes) +{ + const struct ext3_super_block *sb = + (const struct ext3_super_block *)buf; + + if (sb->s_magic != __cpu_to_le16(EXT2_SUPER_MAGIC)) + return 0; + + /* There is at least one feature not supported by ext3 */ + if ((sb->s_feature_incompat + & __cpu_to_le32(EXT3_FEATURE_INCOMPAT_UNSUPPORTED)) || + (sb->s_feature_ro_compat + & __cpu_to_le32(EXT3_FEATURE_RO_COMPAT_UNSUPPORTED))) { + *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count) + << (10 + __le32_to_cpu(sb->s_log_block_size)); + return 1; + } + return 0; +} + +static int ext3_image(const void *buf, unsigned long long *bytes) +{ + const struct ext3_super_block *sb = + (const struct ext3_super_block *)buf; + + if (sb->s_magic == __cpu_to_le16(EXT2_SUPER_MAGIC) && + sb-> + s_feature_compat & __cpu_to_le32(EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { + *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count) + << (10 + __le32_to_cpu(sb->s_log_block_size)); + return 1; + } + return 0; +} + +static int ext2_image(const void *buf, unsigned long long *bytes) +{ + const struct ext2_super_block *sb = + (const struct ext2_super_block *)buf; + + if (sb->s_magic == __cpu_to_le16(EXT2_SUPER_MAGIC)) { + *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count) + << (10 + __le32_to_cpu(sb->s_log_block_size)); + return 1; + } + return 0; +} + +static int reiserfs_image(const void *buf, unsigned long long *bytes) +{ + const struct reiserfs_super_block *sb = + (const struct reiserfs_super_block *)buf; + + if (memcmp(REISERFS_MAGIC(sb), REISERFS_SUPER_MAGIC_STRING, + sizeof(REISERFS_SUPER_MAGIC_STRING) - 1) == 0 || + memcmp(REISERFS_MAGIC(sb), REISER2FS_SUPER_MAGIC_STRING, + sizeof(REISER2FS_SUPER_MAGIC_STRING) - 1) == 0 || + memcmp(REISERFS_MAGIC(sb), REISER2FS_JR_SUPER_MAGIC_STRING, + sizeof(REISER2FS_JR_SUPER_MAGIC_STRING) - 1) == 0) { + *bytes = (unsigned long long)REISERFS_BLOCK_COUNT(sb) * + REISERFS_BLOCKSIZE(sb); + return 1; + } + return 0; +} + +static int reiser4_image(const void *buf, unsigned long long *bytes) +{ + const struct reiser4_master_sb *sb = + (const struct reiser4_master_sb *)buf; + + if (memcmp(sb->ms_magic, REISER4_SUPER_MAGIC_STRING, + sizeof(REISER4_SUPER_MAGIC_STRING) - 1) == 0) { + *bytes = (unsigned long long) __le32_to_cpu(sb->ms_format) * + __le32_to_cpu(sb->ms_blksize); + return 1; + } + return 0; +} + +static int xfs_image(const void *buf, unsigned long long *bytes) +{ + const struct xfs_sb *sb = (const struct xfs_sb *)buf; + + if (__be32_to_cpu(sb->sb_magicnum) == XFS_SB_MAGIC) { + *bytes = __be64_to_cpu(sb->sb_dblocks) * + __be32_to_cpu(sb->sb_blocksize); + return 1; + } + return 0; +} + +static int jfs_image(const void *buf, unsigned long long *bytes) +{ + const struct jfs_superblock *sb = (const struct jfs_superblock *)buf; + + if (!memcmp(sb->s_magic, JFS_MAGIC, 4)) { + *bytes = __le64_to_cpu(sb->s_size) + << __le16_to_cpu(sb->s_l2pbsize); + return 1; + } + return 0; +} + +static int luks_image(const void *buf, unsigned long long *blocks) +{ + const struct luks_partition_header *lph = + (const struct luks_partition_header *)buf; + + if (!memcmp(lph->magic, LUKS_MAGIC, LUKS_MAGIC_L)) { + /* FSSIZE is dictated by the underlying fs, not by LUKS */ + *blocks = 0; + return 1; + } + return 0; +} + +static int swap_image(const void *buf, unsigned long long *blocks) +{ + const struct swap_super_block *ssb = + (const struct swap_super_block *)buf; + + if (!memcmp(ssb->magic, SWAP_MAGIC_1, SWAP_MAGIC_L) || + !memcmp(ssb->magic, SWAP_MAGIC_2, SWAP_MAGIC_L)) { + *blocks = 0; + return 1; + } + return 0; +} + +static int suspend_image(const void *buf, unsigned long long *blocks) +{ + const struct swap_super_block *ssb = + (const struct swap_super_block *)buf; + + if (!memcmp(ssb->magic, SUSP_MAGIC_1, SUSP_MAGIC_L) || + !memcmp(ssb->magic, SUSP_MAGIC_2, SUSP_MAGIC_L) || + !memcmp(ssb->magic, SUSP_MAGIC_U, SUSP_MAGIC_L)) { + *blocks = 0; + return 1; + } + return 0; +} + +static int lvm2_image(const void *buf, unsigned long long *blocks) +{ + const struct lvm2_super_block *lsb; + int i; + + /* We must check every 512 byte sector */ + for (i = 0; i < BLOCK_SIZE; i += 0x200) { + lsb = (const struct lvm2_super_block *)(buf + i); + + if (!memcmp(lsb->magic, LVM2_MAGIC, LVM2_MAGIC_L) && + !memcmp(lsb->type, LVM2_TYPE, LVM2_TYPE_L)) { + /* This is just one of possibly many PV's */ + *blocks = 0; + return 1; + } + } + + return 0; +} + +static int iso_image(const void *buf, unsigned long long *blocks) +{ + const struct iso_volume_descriptor *isovd = + (const struct iso_volume_descriptor *)buf; + const struct iso_hs_volume_descriptor *isohsvd = + (const struct iso_hs_volume_descriptor *)buf; + + if (!memcmp(isovd->id, ISO_MAGIC, ISO_MAGIC_L) || + !memcmp(isohsvd->id, ISO_HS_MAGIC, ISO_HS_MAGIC_L)) { + *blocks = 0; + return 1; + } + return 0; +} + +static int squashfs_image(const void *buf, unsigned long long *blocks) +{ + const struct squashfs_super_block *sb = + (const struct squashfs_super_block *)buf; + + if (sb->s_magic == SQUASHFS_MAGIC + || sb->s_magic == SQUASHFS_MAGIC_SWAP + || sb->s_magic == SQUASHFS_MAGIC_LZMA + || sb->s_magic == SQUASHFS_MAGIC_LZMA_SWAP) { + *blocks = (unsigned long long) sb->bytes_used; + return 1; + } + return 0; +} + +static int gfs2_image(const void *buf, unsigned long long *bytes) +{ + const struct gfs2_sb *sb = + (const struct gfs2_sb *)buf; + + if (__be32_to_cpu(sb->sb_header.mh_magic) == GFS2_MAGIC + && (__be32_to_cpu(sb->sb_fs_format) == GFS2_FORMAT_FS + || __be32_to_cpu(sb->sb_fs_format) == GFS2_FORMAT_MULTI)) { + *bytes = 0; /* cpu_to_be32(sb->sb_bsize) * ?; */ + return 1; + } + return 0; +} + +static int ocfs2_image(const void *buf, unsigned long long *bytes) +{ + const struct ocfs2_dinode *sb = + (const struct ocfs2_dinode *)buf; + + if (!memcmp(sb->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, + sizeof(OCFS2_SUPER_BLOCK_SIGNATURE) - 1)) { + *bytes = 0; + return 1; + } + return 0; +} + +static int nilfs2_image(const void *buf, unsigned long long *bytes) +{ + const struct nilfs_super_block *sb = + (const struct nilfs_super_block *)buf; + + if (sb->s_magic == __cpu_to_le16(NILFS_SUPER_MAGIC) && + sb->s_rev_level == __cpu_to_le32(2)) { + *bytes = (unsigned long long)__le64_to_cpu(sb->s_dev_size); + return 1; + } + return 0; +} + +static int btrfs_image(const void *buf, unsigned long long *bytes) +{ + const struct btrfs_super_block *sb = + (const struct btrfs_super_block *)buf; + + if (!memcmp(sb->magic, BTRFS_MAGIC, BTRFS_MAGIC_L)) { + *bytes = (unsigned long long)__le64_to_cpu(sb->total_bytes); + return 1; + } + return 0; +} + +struct imagetype { + off_t block; + const char name[12]; + int (*identify) (const void *, unsigned long long *); +}; + +/* + * Note: + * + * Minix test needs to come after ext3/ext2, since it's possible for + * ext3/ext2 to look like minix by pure random chance. + * + * LVM comes after all other filesystems since it's possible + * that an old lvm signature is left on the disk if pvremove + * is not used before creating the new fs. + * + * The same goes for LUKS as for LVM. + */ +static struct imagetype images[] = { + {0, "gzip", gzip_image}, + {0, "cramfs", cramfs_image}, + {0, "romfs", romfs_image}, + {0, "xfs", xfs_image}, + {0, "squashfs", squashfs_image}, + {1, "ext4", ext4_image}, + {1, "ext3", ext3_image}, + {1, "ext2", ext2_image}, + {1, "minix", minix_image}, + {1, "nilfs2", nilfs2_image}, + {2, "ocfs2", ocfs2_image}, + {8, "reiserfs", reiserfs_image}, + {64, "reiserfs", reiserfs_image}, + {64, "reiser4", reiser4_image}, + {64, "gfs2", gfs2_image}, + {64, "btrfs", btrfs_image}, + {32, "jfs", jfs_image}, + {32, "iso9660", iso_image}, + {0, "luks", luks_image}, + {0, "lvm2", lvm2_image}, + {1, "lvm2", lvm2_image}, + {-1, "swap", swap_image}, + {-1, "suspend", suspend_image}, + {0, "", NULL} +}; + +int identify_fs(int fd, const char **fstype, + unsigned long long *bytes, off_t offset) +{ + uint64_t buf[BLOCK_SIZE >> 3]; /* 64-bit worst case alignment */ + off_t cur_block = (off_t) -1; + struct imagetype *ip; + int ret; + unsigned long long dummy; + + if (!bytes) + bytes = &dummy; + + *fstype = NULL; + *bytes = 0; + + for (ip = images; ip->identify; ip++) { + /* Hack for swap, which apparently is dependent on page size */ + if (ip->block == -1) + ip->block = SWAP_OFFSET(); + + if (cur_block != ip->block) { + /* + * Read block. + */ + cur_block = ip->block; + ret = pread(fd, buf, BLOCK_SIZE, + offset + cur_block * BLOCK_SIZE); + if (ret != BLOCK_SIZE) + return -1; /* error */ + } + + if (ip->identify(buf, bytes)) { + *fstype = ip->name; + return 0; + } + } + + return 1; /* Unknown filesystem */ +} diff --git a/usr/kinit/fstype/fstype.h b/usr/kinit/fstype/fstype.h new file mode 100644 index 0000000..be2a3e4 --- /dev/null +++ b/usr/kinit/fstype/fstype.h @@ -0,0 +1,20 @@ +/* + * by rmk + * + * Detect filesystem type (on stdin) and output strings for two + * environment variables: + * FSTYPE - filesystem type + * FSSIZE - filesystem size (if known) + * + * We currently detect the fs listed in struct imagetype. + */ + +#ifndef FSTYPE_H +#define FSTYPE_H + +#include <unistd.h> + +int identify_fs(int fd, const char **fstype, + unsigned long long *bytes, off_t offset); + +#endif diff --git a/usr/kinit/fstype/gfs2_fs.h b/usr/kinit/fstype/gfs2_fs.h new file mode 100644 index 0000000..028e0c9 --- /dev/null +++ b/usr/kinit/fstype/gfs2_fs.h @@ -0,0 +1,56 @@ +#ifndef __GFS2_FS_H +#define __GFS2_FS_H + +#define GFS2_MAGIC 0x01161970 +#define GFS2_FORMAT_FS 1801 +#define GFS2_FORMAT_MULTI 1900 + + +/* + * An on-disk inode number + */ +struct gfs2_inum { + __be64 no_formal_ino; + __be64 no_addr; +}; + +/* + * Generic metadata head structure + * Every inplace buffer logged in the journal must start with this. + */ +struct gfs2_meta_header { + uint32_t mh_magic; + uint32_t mh_type; + uint64_t __pad0; /* Was generation number in gfs1 */ + uint32_t mh_format; + uint32_t __pad1; /* Was incarnation number in gfs1 */ +}; + +/* Requirement: GFS2_LOCKNAME_LEN % 8 == 0 + * Includes: the fencing zero at the end */ +#define GFS2_LOCKNAME_LEN 64 + +/* + * super-block structure + */ +struct gfs2_sb { + struct gfs2_meta_header sb_header; + + uint32_t sb_fs_format; + uint32_t sb_multihost_format; + uint32_t __pad0; /* Was superblock flags in gfs1 */ + + uint32_t sb_bsize; + uint32_t sb_bsize_shift; + uint32_t __pad1; /* Was journal segment size in gfs1 */ + + struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */ + struct gfs2_inum __pad2; /* Was rindex dinode in gfs1 */ + struct gfs2_inum sb_root_dir; + + char sb_lockproto[GFS2_LOCKNAME_LEN]; + char sb_locktable[GFS2_LOCKNAME_LEN]; + /* In gfs1, quota and license dinodes followed */ +} __attribute__ ((__packed__)); + +#endif /* __GFS2_FS_H */ diff --git a/usr/kinit/fstype/iso9660_sb.h b/usr/kinit/fstype/iso9660_sb.h new file mode 100644 index 0000000..efe0733 --- /dev/null +++ b/usr/kinit/fstype/iso9660_sb.h @@ -0,0 +1,24 @@ +#ifndef __ISO9660_SB_H +#define __ISO9660_SB_H + +#define ISO_MAGIC_L 5 +#define ISO_MAGIC "CD001" +#define ISO_HS_MAGIC_L 5 +#define ISO_HS_MAGIC "CDROM" + +/* ISO9660 Volume Descriptor */ +struct iso_volume_descriptor { + __u8 type; + char id[ISO_MAGIC_L]; + __u8 version; +}; + +/* High Sierra Volume Descriptor */ +struct iso_hs_volume_descriptor { + char foo[8]; + __u8 type; + char id[ISO_HS_MAGIC_L]; + __u8 version; +}; + +#endif diff --git a/usr/kinit/fstype/jfs_superblock.h b/usr/kinit/fstype/jfs_superblock.h new file mode 100644 index 0000000..63132a0 --- /dev/null +++ b/usr/kinit/fstype/jfs_superblock.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) International Business Machines Corp., 2000-2003 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_SUPERBLOCK +#define _H_JFS_SUPERBLOCK + +struct timestruc_t { + __le32 tv_sec; + __le32 tv_nsec; +}; + +/* + * make the magic number something a human could read + */ +#define JFS_MAGIC "JFS1" /* Magic word */ + +#define JFS_VERSION 2 /* Version number: Version 2 */ + +#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ + +/* + * aggregate superblock + * + * The name superblock is too close to super_block, so the name has been + * changed to jfs_superblock. The utilities are still using the old name. + */ +struct jfs_superblock { + char s_magic[4]; /* 4: magic number */ + __le32 s_version; /* 4: version number */ + + __le64 s_size; /* 8: aggregate size in hardware/LVM blocks; + * VFS: number of blocks + */ + __le32 s_bsize; /* 4: aggregate block size in bytes; + * VFS: fragment size + */ + __le16 s_l2bsize; /* 2: log2 of s_bsize */ + __le16 s_l2bfactor; /* 2: log2(s_bsize/hardware block size) */ + __le32 s_pbsize; /* 4: hardware/LVM block size in bytes */ + __le16 s_l2pbsize; /* 2: log2 of s_pbsize */ + __le16 pad; /* 2: padding necessary for alignment */ + + __le32 s_agsize; /* 4: allocation group size in aggr. blocks */ + + __le32 s_flag; /* 4: aggregate attributes: + * see jfs_filsys.h + */ + __le32 s_state; /* 4: mount/unmount/recovery state: + * see jfs_filsys.h + */ + __le32 s_compress; /* 4: > 0 if data compression */ + + __le64 s_ait2; /* 8: first extent of secondary + * aggregate inode table + */ + + __le64 s_aim2; /* 8: first extent of secondary + * aggregate inode map + */ + __le32 s_logdev; /* 4: device address of log */ + __le32 s_logserial; /* 4: log serial number at aggregate mount */ + __le64 s_logpxd; /* 8: inline log extent */ + + __le64 s_fsckpxd; /* 8: inline fsck work space extent */ + + struct timestruc_t s_time; /* 8: time last updated */ + + __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for + * the fsck service log. + * N.B. These blocks are divided among the + * versions kept. This is not a per + * version size. + * N.B. These blocks are included in the + * length field of s_fsckpxd. + */ + char s_fscklog; /* 1: which fsck service log is most recent + * 0 => no service log data yet + * 1 => the first one + * 2 => the 2nd one + */ + char s_fpack[11]; /* 11: file system volume name + * N.B. This must be 11 bytes to + * conform with the OS/2 BootSector + * requirements + * Only used when s_version is 1 + */ + + /* extendfs() parameter under s_state & FM_EXTENDFS */ + __le64 s_xsize; /* 8: extendfs s_size */ + __le64 s_xfsckpxd; /* 8: extendfs fsckpxd */ + __le64 s_xlogpxd; /* 8: extendfs logpxd */ + /* - 128 byte boundary - */ + + char s_uuid[16]; /* 16: 128-bit uuid for volume */ + char s_label[16]; /* 16: volume label */ + char s_loguuid[16]; /* 16: 128-bit uuid for log device */ + +}; + +#endif /*_H_JFS_SUPERBLOCK */ diff --git a/usr/kinit/fstype/luks_fs.h b/usr/kinit/fstype/luks_fs.h new file mode 100644 index 0000000..fd8de31 --- /dev/null +++ b/usr/kinit/fstype/luks_fs.h @@ -0,0 +1,44 @@ +#ifndef __LINUX_LUKS_FS_H +#define __LINUX_LUKS_FS_H + +/* The basic structures of the luks partition header */ +#define LUKS_MAGIC_L 6 +#define LUKS_CIPHERNAME_L 32 +#define LUKS_CIPHERMODE_L 32 +#define LUKS_HASHSPEC_L 32 +#define LUKS_UUID_STRING_L 40 + +#define LUKS_MAGIC "LUKS\xBA\xBE" +#define LUKS_DIGESTSIZE 20 +#define LUKS_SALTSIZE 32 +#define LUKS_NUMKEYS 8 +#define LUKS_MKD_ITER 10 +#define LUKS_KEY_DISABLED 0x0000DEAD +#define LUKS_KEY_ENABLED 0x00AC71F3 +#define LUKS_STRIPES 4000 + +/* On-disk "super block" */ +struct luks_partition_header { + char magic[LUKS_MAGIC_L]; + __be16 version; + char cipherName[LUKS_CIPHERNAME_L]; + char cipherMode[LUKS_CIPHERMODE_L]; + char hashSpec[LUKS_HASHSPEC_L]; + __be32 payloadOffset; + __be32 keyBytes; + char mkDigest[LUKS_DIGESTSIZE]; + char mkDigestSalt[LUKS_SALTSIZE]; + __be32 mkDigestIterations; + char uuid[LUKS_UUID_STRING_L]; + + struct { + __be32 active; + /* Parameters for PBKDF2 processing */ + __be32 passwordIterations; + char passwordSalt[LUKS_SALTSIZE]; + __be32 keyMaterialOffset; + __be32 stripes; + } keyblock[LUKS_NUMKEYS]; +}; + +#endif diff --git a/usr/kinit/fstype/lvm2_sb.h b/usr/kinit/fstype/lvm2_sb.h new file mode 100644 index 0000000..75dfc10 --- /dev/null +++ b/usr/kinit/fstype/lvm2_sb.h @@ -0,0 +1,18 @@ +#ifndef __LVM2_SB_H +#define __LVM2_SB_H + +/* LVM2 super block definitions */ +#define LVM2_MAGIC_L 8 +#define LVM2_MAGIC "LABELONE" +#define LVM2_TYPE_L 8 +#define LVM2_TYPE "LVM2 001" + +struct lvm2_super_block { + char magic[LVM2_MAGIC_L]; + __be64 sector; + __be32 crc; + __be32 offset; + char type[LVM2_TYPE_L]; +}; + +#endif diff --git a/usr/kinit/fstype/main.c b/usr/kinit/fstype/main.c new file mode 100644 index 0000000..9162bdf --- /dev/null +++ b/usr/kinit/fstype/main.c @@ -0,0 +1,57 @@ +/* + * by rmk + * + * Detect filesystem type (on stdin) and output strings for two + * environment variables: + * FSTYPE - filesystem type + * FSSIZE - filesystem size (if known) + * + * We currently detect (in order): + * gzip, cramfs, romfs, xfs, minix, ext3, ext2, reiserfs, jfs + * + * MINIX, ext3 and Reiserfs bits are currently untested. + */ + +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <stdlib.h> +#include "fstype.h" + +char *progname; + +int main(int argc, char *argv[]) +{ + int fd = 0; + int rv; + const char *fstype; + const char *file = "stdin"; + unsigned long long bytes; + + progname = argv[0]; + + if (argc > 2) { + fprintf(stderr, "Usage: %s [file]\n", progname); + return 1; + } + + if (argc > 1 && !(argv[1][0] == '-' && argv[1][1] == '\0')) { + fd = open(file = argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + return 2; + } + } + + rv = identify_fs(fd, &fstype, &bytes, 0); + if (rv == -1) { + perror(file); + return 2; + } + + fstype = fstype ? fstype : "unknown"; + + fprintf(stdout, "FSTYPE=%s\nFSSIZE=%llu\n", fstype, bytes); + return rv; +} diff --git a/usr/kinit/fstype/minix_fs.h b/usr/kinit/fstype/minix_fs.h new file mode 100644 index 0000000..e2899f0 --- /dev/null +++ b/usr/kinit/fstype/minix_fs.h @@ -0,0 +1,85 @@ +#ifndef _LINUX_MINIX_FS_H +#define _LINUX_MINIX_FS_H + +/* + * The minix filesystem constants/structures + */ + +/* + * Thanks to Kees J Bot for sending me the definitions of the new + * minix filesystem (aka V2) with bigger inodes and 32-bit block + * pointers. + */ + +#define MINIX_ROOT_INO 1 + +/* Not the same as the bogus LINK_MAX in <linux/limits.h>. Oh well. */ +#define MINIX_LINK_MAX 250 +#define MINIX2_LINK_MAX 65530 + +#define MINIX_I_MAP_SLOTS 8 +#define MINIX_Z_MAP_SLOTS 64 +#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ +#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ +#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ +#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ +#define MINIX_VALID_FS 0x0001 /* Clean fs. */ +#define MINIX_ERROR_FS 0x0002 /* fs has errors. */ + +#define MINIX_INODES_PER_BLOCK ((BLOCK_SIZE)/(sizeof (struct minix_inode))) +#define MINIX2_INODES_PER_BLOCK ((BLOCK_SIZE)/(sizeof (struct minix2_inode))) + +/* + * This is the original minix inode layout on disk. + * Note the 8-bit gid and atime and ctime. + */ +struct minix_inode { + __u16 i_mode; + __u16 i_uid; + __u32 i_size; + __u32 i_time; + __u8 i_gid; + __u8 i_nlinks; + __u16 i_zone[9]; +}; + +/* + * The new minix inode has all the time entries, as well as + * long block numbers and a third indirect block (7+1+1+1 + * instead of 7+1+1). Also, some previously 8-bit values are + * now 16-bit. The inode is now 64 bytes instead of 32. + */ +struct minix2_inode { + __u16 i_mode; + __u16 i_nlinks; + __u16 i_uid; + __u16 i_gid; + __u32 i_size; + __u32 i_atime; + __u32 i_mtime; + __u32 i_ctime; + __u32 i_zone[10]; +}; + +/* + * minix super-block data on disk + */ +struct minix_super_block { + __u16 s_ninodes; + __u16 s_nzones; + __u16 s_imap_blocks; + __u16 s_zmap_blocks; + __u16 s_firstdatazone; + __u16 s_log_zone_size; + __u32 s_max_size; + __u16 s_magic; + __u16 s_state; + __u32 s_zones; +}; + +struct minix_dir_entry { + __u16 inode; + char name[0]; +}; + +#endif diff --git a/usr/kinit/fstype/nilfs_fs.h b/usr/kinit/fstype/nilfs_fs.h new file mode 100644 index 0000000..0845edf --- /dev/null +++ b/usr/kinit/fstype/nilfs_fs.h @@ -0,0 +1,64 @@ +#ifndef __NILFS_FS_H +#define __NILFS_FS_H + +#define NILFS_SUPER_MAGIC 0x3434 /* NILFS filesystem magic number */ + +/* + * struct nilfs_super_block - structure of super block on disk + */ +struct nilfs_super_block { + __le32 s_rev_level; /* Revision level */ + __le16 s_minor_rev_level; /* minor revision level */ + __le16 s_magic; /* Magic signature */ + + __le16 s_bytes; /* Bytes count of CRC calculation + for this structure. s_reserved + is excluded. */ + __le16 s_flags; /* flags */ + __le32 s_crc_seed; /* Seed value of CRC calculation */ + __le32 s_sum; /* Check sum of super block */ + + __le32 s_log_block_size; /* Block size represented as follows + blocksize = 1 << (s_log_block_size + 10) */ + __le64 s_nsegments; /* Number of segments in filesystem */ + __le64 s_dev_size; /* block device size in bytes */ + __le64 s_first_data_block; /* 1st seg disk block number */ + __le32 s_blocks_per_segment; /* number of blocks per full segment */ + __le32 s_r_segments_percentage;/* Reserved segments percentage */ /* or __le16 */ + + __le64 s_last_cno; /* Last checkpoint number */ + __le64 s_last_pseg; /* disk block addr pseg written last */ + __le64 s_last_seq; /* seq. number of seg written last */ + __le64 s_free_blocks_count; /* Free blocks count */ + + __le64 s_ctime; /* Creation time (execution time of newfs) */ + __le64 s_mtime; /* Mount time */ + __le64 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le64 s_lastcheck; /* time of last check */ + + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + __le32 s_first_ino; /* First non-reserved inode */ /* or __le16 */ + + __le16 s_inode_size; /* Size of an inode */ + __le16 s_dat_entry_size; /* Size of a dat entry */ + __le16 s_checkpoint_size; /* Size of a checkpoint */ + __le16 s_segment_usage_size; /* Size of a segment usage */ + + __u8 s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + + __le32 s_c_interval; /* Commit interval of segment */ + __le32 s_c_block_max; /* Threshold of data amount for + the segment construction */ + __u32 s_reserved[192]; /* padding to the end of the block */ +}; + +#endif /* __NILFS_FS_H */ diff --git a/usr/kinit/fstype/ocfs2_fs.h b/usr/kinit/fstype/ocfs2_fs.h new file mode 100644 index 0000000..b71cb61 --- /dev/null +++ b/usr/kinit/fstype/ocfs2_fs.h @@ -0,0 +1,90 @@ +#ifndef _OCFS2_FS_H +#define _OCFS2_FS_H + +/* Object signatures */ +#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2" + +#define OCFS2_VOL_UUID_LEN 16 +#define OCFS2_MAX_VOL_LABEL_LEN 64 + +/* + * On disk superblock for OCFS2 + * Note that it is contained inside an ocfs2_dinode, so all offsets + * are relative to the start of ocfs2_dinode.id2. + */ +struct ocfs2_super_block { +/*00*/ uint16_t s_major_rev_level; + uint16_t s_minor_rev_level; + uint16_t s_mnt_count; + int16_t s_max_mnt_count; + uint16_t s_state; /* File system state */ + uint16_t s_errors; /* Behaviour when detecting errors */ + uint32_t s_checkinterval; /* Max time between checks */ +/*10*/ uint64_t s_lastcheck; /* Time of last check */ + uint32_t s_creator_os; /* OS */ + uint32_t s_feature_compat; /* Compatible feature set */ +/*20*/ uint32_t s_feature_incompat; /* Incompatible feature set */ + uint32_t s_feature_ro_compat; /* Readonly-compatible feature set */ + uint64_t s_root_blkno; /* Offset, in blocks, of root directory + dinode */ +/*30*/ uint64_t s_system_dir_blkno; /* Offset, in blocks, of system + directory dinode */ + uint32_t s_blocksize_bits; /* Blocksize for this fs */ + uint32_t s_clustersize_bits; /* Clustersize for this fs */ +/*40*/ uint16_t s_max_slots; /* Max number of simultaneous mounts + before tunefs required */ + uint16_t s_reserved1; + uint32_t s_reserved2; + uint64_t s_first_cluster_group; /* Block offset of 1st cluster + * group header */ +/*50*/ uint8_t s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ +/*90*/ uint8_t s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ +/*A0*/ +} __attribute__ ((packed)); + +/* + * On disk inode for OCFS2 + */ +struct ocfs2_dinode { +/*00*/ uint8_t i_signature[8]; /* Signature for validation */ + uint32_t i_generation; /* Generation number */ + uint16_t i_suballoc_slot; /* Slot suballocator this inode + belongs to */ + int16_t i_suballoc_bit; /* Bit offset in suballocator + block group */ +/*10*/ uint32_t i_reserved0; + uint32_t i_clusters; /* Cluster count */ + uint32_t i_uid; /* Owner UID */ + uint32_t i_gid; /* Owning GID */ +/*20*/ uint64_t i_size; /* Size in bytes */ + uint16_t i_mode; /* File mode */ + uint16_t i_links_count; /* Links count */ + uint32_t i_flags; /* File flags */ +/*30*/ uint64_t i_atime; /* Access time */ + uint64_t i_ctime; /* Creation time */ +/*40*/ uint64_t i_mtime; /* Modification time */ + uint64_t i_dtime; /* Deletion time */ +/*50*/ uint64_t i_blkno; /* Offset on disk, in blocks */ + uint64_t i_last_eb_blk; /* Pointer to last extent + block */ +/*60*/ uint32_t i_fs_generation; /* Generation per fs-instance */ + uint32_t i_atime_nsec; + uint32_t i_ctime_nsec; + uint32_t i_mtime_nsec; + uint32_t i_attr; + uint16_t i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL + was set in i_flags */ + uint16_t i_reserved1; +/*70*/ uint64_t i_reserved2[8]; +/*B8*/ uint64_t i_pad1; + uint64_t i_rdev; /* Device number */ + uint32_t i_used; /* Bits (ie, clusters) used */ + uint32_t i_total; /* Total bits (clusters) + available */ + uint32_t ij_flags; /* Mounted, version, etc. */ + uint32_t ij_pad; +/*C0*/ struct ocfs2_super_block i_super; +/* Actual on-disk size is one block */ +} __attribute__ ((packed)); + +#endif /* _OCFS2_FS_H */ diff --git a/usr/kinit/fstype/reiser4_fs.h b/usr/kinit/fstype/reiser4_fs.h new file mode 100644 index 0000000..af6ccc4 --- /dev/null +++ b/usr/kinit/fstype/reiser4_fs.h @@ -0,0 +1,31 @@ +#ifndef __REISER4_FS_H +#define __REISER4_FS_H + +#define SS_MAGIC_SIZE 16 + +/* reiser4 filesystem structure + * + * Master super block structure. It is the same for all reiser4 filesystems, + * so, we can declare it here. It contains common for all format fields like + * block size etc. + */ +struct reiser4_master_sb { + /* Master super block magic. */ + char ms_magic[SS_MAGIC_SIZE]; + + /* Disk format in use. */ + __u16 ms_format; + + /* Filesyetem block size in use. */ + __u16 ms_blksize; + + /* Filesyetm uuid in use. */ + char ms_uuid[SS_MAGIC_SIZE]; + + /* Filesystem label in use. */ + char ms_label[SS_MAGIC_SIZE]; +} __attribute__ ((packed)); + +#define REISER4_SUPER_MAGIC_STRING "ReIsEr4" + +#endif /* __REISER4_FS_H */ diff --git a/usr/kinit/fstype/reiserfs_fs.h b/usr/kinit/fstype/reiserfs_fs.h new file mode 100644 index 0000000..096d505 --- /dev/null +++ b/usr/kinit/fstype/reiserfs_fs.h @@ -0,0 +1,74 @@ +#ifndef __REISERFS_FS_H +#define __REISERFS_FS_H + +struct journal_params { + __u32 jp_journal_1st_block; /* where does journal start from on its + * device */ + __u32 jp_journal_dev; /* journal device st_rdev */ + __u32 jp_journal_size; /* size of the journal */ + __u32 jp_journal_trans_max; /* max number of blocks in a transaction. */ + __u32 jp_journal_magic; /* random value made on fs creation (this + * was sb_journal_block_count) */ + __u32 jp_journal_max_batch; /* max number of blocks to batch into a + * trans */ + __u32 jp_journal_max_commit_age; /* in seconds, how old can an async + * commit be */ + __u32 jp_journal_max_trans_age; /* in seconds, how old can a transaction + * be */ +}; + +/* this is the super from 3.5.X, where X >= 10 */ +struct reiserfs_super_block_v1 { + __u32 s_block_count; /* blocks count */ + __u32 s_free_blocks; /* free blocks count */ + __u32 s_root_block; /* root block number */ + struct journal_params s_journal; + __u16 s_blocksize; /* block size */ + __u16 s_oid_maxsize; /* max size of object id array, see + * get_objectid() commentary */ + __u16 s_oid_cursize; /* current size of object id array */ + __u16 s_umount_state; /* this is set to 1 when filesystem was + * umounted, to 2 - when not */ + char s_magic[10]; /* reiserfs magic string indicates that + * file system is reiserfs: + * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ + __u16 s_fs_state; /* it is set to used by fsck to mark which + * phase of rebuilding is done */ + __u32 s_hash_function_code; /* indicate, what hash function is being use + * to sort names in a directory*/ + __u16 s_tree_height; /* height of disk tree */ + __u16 s_bmap_nr; /* amount of bitmap blocks needed to address + * each block of file system */ + __u16 s_version; /* this field is only reliable on filesystem + * with non-standard journal */ + __u16 s_reserved_for_journal; /* size in blocks of journal area on main + * device, we need to keep after + * making fs with non-standard journal */ +} __attribute__ ((__packed__)); + +/* this is the on disk super block */ +struct reiserfs_super_block { + struct reiserfs_super_block_v1 s_v1; + __u32 s_inode_generation; + __u32 s_flags; /* Right now used only by inode-attributes, if enabled */ + unsigned char s_uuid[16]; /* filesystem unique identifier */ + unsigned char s_label[16]; /* filesystem volume label */ + char s_unused[88]; /* zero filled by mkreiserfs and + * reiserfs_convert_objectid_map_v1() + * so any additions must be updated + * there as well. */ +} __attribute__ ((__packed__)); + +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" +#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" + +#define SB_V1_DISK_SUPER_BLOCK(s) (&((s)->s_v1)) +#define REISERFS_BLOCKSIZE(s) \ + __le32_to_cpu((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize)) +#define REISERFS_BLOCK_COUNT(s) \ + __le32_to_cpu((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count)) +#define REISERFS_MAGIC(s) \ + (SB_V1_DISK_SUPER_BLOCK(s)->s_magic) + +#endif /* __REISERFS_FS_H */ diff --git a/usr/kinit/fstype/romfs_fs.h b/usr/kinit/fstype/romfs_fs.h new file mode 100644 index 0000000..c490fbc --- /dev/null +++ b/usr/kinit/fstype/romfs_fs.h @@ -0,0 +1,56 @@ +#ifndef __LINUX_ROMFS_FS_H +#define __LINUX_ROMFS_FS_H + +/* The basic structures of the romfs filesystem */ + +#define ROMBSIZE BLOCK_SIZE +#define ROMBSBITS BLOCK_SIZE_BITS +#define ROMBMASK (ROMBSIZE-1) +#define ROMFS_MAGIC 0x7275 + +#define ROMFS_MAXFN 128 + +#define __mkw(h,l) (((h)&0x00ff)<< 8|((l)&0x00ff)) +#define __mkl(h,l) (((h)&0xffff)<<16|((l)&0xffff)) +#define __mk4(a,b,c,d) cpu_to_be32(__mkl(__mkw(a,b),__mkw(c,d))) +#define ROMSB_WORD0 __mk4('-','r','o','m') +#define ROMSB_WORD1 __mk4('1','f','s','-') + +/* On-disk "super block" */ + +struct romfs_super_block { + __be32 word0; + __be32 word1; + __be32 size; + __be32 checksum; + char name[0]; /* volume name */ +}; + +/* On disk inode */ + +struct romfs_inode { + __be32 next; /* low 4 bits see ROMFH_ */ + __be32 spec; + __be32 size; + __be32 checksum; + char name[0]; +}; + +#define ROMFH_TYPE 7 +#define ROMFH_HRD 0 +#define ROMFH_DIR 1 +#define ROMFH_REG 2 +#define ROMFH_SYM 3 +#define ROMFH_BLK 4 +#define ROMFH_CHR 5 +#define ROMFH_SCK 6 +#define ROMFH_FIF 7 +#define ROMFH_EXEC 8 + +/* Alignment */ + +#define ROMFH_SIZE 16 +#define ROMFH_PAD (ROMFH_SIZE-1) +#define ROMFH_MASK (~ROMFH_PAD) + +#endif diff --git a/usr/kinit/fstype/squashfs_fs.h b/usr/kinit/fstype/squashfs_fs.h new file mode 100644 index 0000000..c18365d --- /dev/null +++ b/usr/kinit/fstype/squashfs_fs.h @@ -0,0 +1,48 @@ +#ifndef __SQUASHFS_FS_H +#define __SQUASHFS_FS_H + +/* + * Squashfs + */ + +#define SQUASHFS_MAGIC 0x73717368 +#define SQUASHFS_MAGIC_SWAP 0x68737173 + +/* + * Squashfs + LZMA + */ + +#define SQUASHFS_MAGIC_LZMA 0x71736873 +#define SQUASHFS_MAGIC_LZMA_SWAP 0x73687371 + +/* definitions for structures on disk */ +struct squashfs_super_block { + unsigned int s_magic; + unsigned int inodes; + unsigned int bytes_used_2; + unsigned int uid_start_2; + unsigned int guid_start_2; + unsigned int inode_table_start_2; + unsigned int directory_table_start_2; + unsigned int s_major:16; + unsigned int s_minor:16; + unsigned int block_size_1:16; + unsigned int block_log:16; + unsigned int flags:8; + unsigned int no_uids:8; + unsigned int no_guids:8; + unsigned int mkfs_time /* time of filesystem creation */; + long long root_inode; + unsigned int block_size; + unsigned int fragments; + unsigned int fragment_table_start_2; + long long bytes_used; + long long uid_start; + long long guid_start; + long long inode_table_start; + long long directory_table_start; + long long fragment_table_start; + long long lookup_table_start; +} __attribute__ ((packed)); + +#endif /* __SQUASHFS_FS_H */ diff --git a/usr/kinit/fstype/swap_fs.h b/usr/kinit/fstype/swap_fs.h new file mode 100644 index 0000000..7b7fddb --- /dev/null +++ b/usr/kinit/fstype/swap_fs.h @@ -0,0 +1,25 @@ +#ifndef __LINUX_SWAP_FS_H +#define __LINUX_SWAP_FS_H + +/* The basic structures of the swap super block */ +#define SWAP_MAGIC_L 10 +#define SWAP_RESERVED_L (1024 - SWAP_MAGIC_L) +#define SWAP_MAGIC_1 "SWAP-SPACE" +#define SWAP_MAGIC_2 "SWAPSPACE2" + +/* Suspend signatures, located at same addr as swap magic */ +#define SUSP_MAGIC_L 9 +#define SUSP_MAGIC_1 "S1SUSPEND" +#define SUSP_MAGIC_2 "S2SUSPEND" +#define SUSP_MAGIC_U "ULSUSPEND" + +/* The superblock is the last block in the first page */ +#define SWAP_OFFSET() ((getpagesize() - 1024) >> 10) + +/* On-disk "super block" */ +struct swap_super_block { + char reserved[SWAP_RESERVED_L]; + char magic[SWAP_MAGIC_L]; +}; + +#endif diff --git a/usr/kinit/fstype/xfs_sb.h b/usr/kinit/fstype/xfs_sb.h new file mode 100644 index 0000000..fd54bc4 --- /dev/null +++ b/usr/kinit/fstype/xfs_sb.h @@ -0,0 +1,21 @@ +#ifndef __XFS_SB_H +#define __XFS_SB_H + +/* + * Super block + * Fits into a sector-sized buffer at address 0 of each allocation group. + * Only the first of these is ever updated except during growfs. + */ + +struct xfs_buf; +struct xfs_mount; + +#define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */ + +typedef struct xfs_sb { + __u32 sb_magicnum; /* magic number == XFS_SB_MAGIC */ + __u32 sb_blocksize; /* logical block size, bytes */ + __u64 sb_dblocks; /* number of data blocks */ +} xfs_sb_t; + +#endif /* __XFS_SB_H */ diff --git a/usr/kinit/getarg.c b/usr/kinit/getarg.c new file mode 100644 index 0000000..fcce247 --- /dev/null +++ b/usr/kinit/getarg.c @@ -0,0 +1,57 @@ +#include <string.h> +#include "kinit.h" + +/* + * Routines that hunt for a specific argument. Please note that + * they actually search the array backwards. That is because on the + * kernel command lines, it's legal to override an earlier argument + * with a later argument. + */ + +/* + * Was this boolean argument passed? If so return the index in the + * argv array for it. For conflicting boolean options, use the + * one with the higher index. The only case when the return value + * can be equal, is when they're both zero; so equality can be used + * as the default option choice. + * + * In other words, if two options "a" and "b" are opposites, and "a" + * is the default, this can be coded as: + * + * if (get_flag(argc,argv,"a") >= get_flag(argc,argv,"b")) + * do_a_stuff(); + * else + * do_b_stuff(); + */ +int get_flag(int argc, char *argv[], const char *name) +{ + int i; + + for (i = argc-1; i > 0; i--) { + if (!strcmp(argv[i], name)) + return i; + } + return 0; +} + +/* + * Was this textual parameter (foo=option) passed? + * + * This returns the latest instance of such an option in the argv array. + */ +char *get_arg(int argc, char *argv[], const char *name) +{ + int len = strlen(name); + char *ret = NULL; + int i; + + for (i = argc-1; i > 0; i--) { + if (argv[i] && strncmp(argv[i], name, len) == 0 && + (argv[i][len] != '\0')) { + ret = argv[i] + len; + break; + } + } + + return ret; +} diff --git a/usr/kinit/getintfile.c b/usr/kinit/getintfile.c new file mode 100644 index 0000000..41ba475 --- /dev/null +++ b/usr/kinit/getintfile.c @@ -0,0 +1,30 @@ +/* + * Open a file and read it, assuming it contains a single long value. + * Return 0 if we read a valid value, otherwise -1. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "kinit.h" + +int getintfile(const char *path, long *val) +{ + char buffer[64]; + char *ep; + FILE *f; + + f = fopen(path, "r"); + if (!f) + return -1; + + ep = buffer + fread(buffer, 1, sizeof buffer - 1, f); + fclose(f); + *ep = '\0'; + + *val = strtol(buffer, &ep, 0); + if (*ep && *ep != '\n') + return -1; + else + return 0; +} diff --git a/usr/kinit/initrd.c b/usr/kinit/initrd.c new file mode 100644 index 0000000..5833f2f --- /dev/null +++ b/usr/kinit/initrd.c @@ -0,0 +1,204 @@ +/* + * Handle initrd, thus putting the backwards into backwards compatible + */ + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/ioctl.h> +#include <sys/wait.h> +#include "do_mounts.h" +#include "kinit.h" +#include "xpio.h" + +#define BUF_SIZE 65536 /* Should be a power of 2 */ + +/* + * Copy the initrd to /dev/ram0, copy from the end to the beginning + * to avoid taking 2x the memory. + */ +static int rd_copy_uncompressed(int ffd, int dfd) +{ + char buffer[BUF_SIZE]; + off_t bytes; + struct stat st; + + dprintf("kinit: uncompressed initrd\n"); + + if (ffd < 0 || fstat(ffd, &st) || !S_ISREG(st.st_mode) || + (bytes = st.st_size) == 0) + return -1; + + while (bytes) { + ssize_t blocksize = ((bytes - 1) & (BUF_SIZE - 1)) + 1; + off_t offset = bytes - blocksize; + + dprintf("kinit: copying %zd bytes at offset %llu\n", + blocksize, offset); + + if (xpread(ffd, buffer, blocksize, offset) != blocksize || + xpwrite(dfd, buffer, blocksize, offset) != blocksize) + return -1; + + ftruncate(ffd, offset); /* Free up memory */ + bytes = offset; + } + return 0; +} + +static int rd_copy_image(const char *path) +{ + int ffd = open(path, O_RDONLY); + int rv = -1; + unsigned char gzip_magic[2]; + + if (ffd < 0) + goto barf; + + if (xpread(ffd, gzip_magic, 2, 0) == 2 && + gzip_magic[0] == 037 && gzip_magic[1] == 0213) { + FILE *wfd = fopen("/dev/ram0", "w"); + if (!wfd) + goto barf; + rv = load_ramdisk_compressed(path, wfd, 0); + fclose(wfd); + } else { + int dfd = open("/dev/ram0", O_WRONLY); + if (dfd < 0) + goto barf; + rv = rd_copy_uncompressed(ffd, dfd); + close(dfd); + } + +barf: + if (ffd >= 0) + close(ffd); + return rv; +} + +/* + * Run /linuxrc, for emulation of old-style initrd + */ +static int run_linuxrc(int argc, char *argv[], dev_t root_dev) +{ + int root_fd, old_fd; + pid_t pid; + long realroot = Root_RAM0; + const char *ramdisk_name = "/dev/ram0"; + FILE *fp; + + dprintf("kinit: mounting initrd\n"); + mkdir("/root", 0700); + if (!mount_block(ramdisk_name, "/root", NULL, MS_VERBOSE, NULL)) + return -errno; + + /* Write the current "real root device" out to procfs */ + dprintf("kinit: real_root_dev = %#x\n", root_dev); + fp = fopen("/proc/sys/kernel/real-root-dev", "w"); + fprintf(fp, "%u", root_dev); + fclose(fp); + + mkdir("/old", 0700); + root_fd = open("/", O_RDONLY|O_DIRECTORY|O_CLOEXEC, 0); + old_fd = open("/old", O_RDONLY|O_DIRECTORY|O_CLOEXEC, 0); + + if (root_fd < 0 || old_fd < 0) + return -errno; + + if (chdir("/root") || + mount(".", "/", NULL, MS_MOVE, NULL) || chroot(".")) + return -errno; + + pid = vfork(); + if (pid == 0) { + setsid(); + /* Looks like linuxrc doesn't get the init environment + or parameters. Weird, but so is the whole linuxrc bit. */ + execl("/linuxrc", "linuxrc", NULL); + _exit(255); + } else if (pid > 0) { + dprintf("kinit: Waiting for linuxrc to complete...\n"); + while (waitpid(pid, NULL, 0) != pid) + ; + dprintf("kinit: linuxrc done\n"); + } else { + return -errno; + } + + if (fchdir(old_fd) || + mount("/", ".", NULL, MS_MOVE, NULL) || + fchdir(root_fd) || chroot(".")) + return -errno; + + close(root_fd); + close(old_fd); + + getintfile("/proc/sys/kernel/real-root-dev", &realroot); + + /* If realroot is Root_RAM0, then the initrd did any necessary work */ + if (realroot == Root_RAM0) { + if (mount("/old", "/root", NULL, MS_MOVE, NULL)) + return -errno; + } else { + mount_root(argc, argv, (dev_t) realroot, NULL); + + /* If /root/initrd exists, move the initrd there, otherwise discard */ + if (!mount("/old", "/root/initrd", NULL, MS_MOVE, NULL)) { + /* We're good */ + } else { + int olddev = open(ramdisk_name, O_RDWR); + umount2("/old", MNT_DETACH); + if (olddev < 0 || + ioctl(olddev, BLKFLSBUF, 0) || + close(olddev)) { + fprintf(stderr, + "%s: Cannot flush initrd contents\n", + progname); + } + } + } + + rmdir("/old"); + return 0; +} + +int initrd_load(int argc, char *argv[], dev_t root_dev) +{ + if (access("/initrd.image", R_OK)) + return 0; /* No initrd */ + + dprintf("kinit: initrd found\n"); + + create_dev("/dev/ram0", Root_RAM0); + + if (rd_copy_image("/initrd.image") || unlink("/initrd.image")) { + fprintf(stderr, "%s: initrd installation failed (too big?)\n", + progname); + return 0; /* Failed to copy initrd */ + } + + dprintf("kinit: initrd copied\n"); + + if (root_dev == Root_MULTI) { + dprintf("kinit: skipping linuxrc: incompatible with multiple roots\n"); + /* Mounting initrd as ordinary root */ + return 0; + } + + if (root_dev != Root_RAM0) { + int err; + dprintf("kinit: running linuxrc\n"); + err = run_linuxrc(argc, argv, root_dev); + if (err) + fprintf(stderr, "%s: running linuxrc: %s\n", progname, + strerror(-err)); + return 1; /* initrd is root, or run_linuxrc took care of it */ + } else { + dprintf("kinit: permament (or pivoting) initrd, not running linuxrc\n"); + return 0; /* Mounting initrd as ordinary root */ + } +} diff --git a/usr/kinit/ipconfig/Kbuild b/usr/kinit/ipconfig/Kbuild new file mode 100644 index 0000000..686b03b --- /dev/null +++ b/usr/kinit/ipconfig/Kbuild @@ -0,0 +1,35 @@ +# +# Kbuild file for ipconfig +# + +static-y := static/ipconfig +shared-y := shared/ipconfig + +# common .o files +objs := main.o netdev.o packet.o +# dhcp +objs += dhcp_proto.o +# bootp +objs += bootp_proto.o + + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + + +# Create built-in.o with all object files (used by kinit) +lib-y := $(objs) + +# .o files used to built executables +static/ipconfig-y := $(objs) +shared/ipconfig-y := $(objs) + +# Cleaning +clean-dirs := static shared + +# install binary +ifdef KLIBCSHAREDFLAGS +install-y := $(shared-y) +else +install-y := $(static-y) +endif diff --git a/usr/kinit/ipconfig/README.ipconfig b/usr/kinit/ipconfig/README.ipconfig new file mode 100644 index 0000000..5ee87e5 --- /dev/null +++ b/usr/kinit/ipconfig/README.ipconfig @@ -0,0 +1,120 @@ +BOOTP/DHCP client for klibc +--------------------------- + +Usage: + +ipconfig [-c proto] [-d interface] [-i identifier] + [-n] [-p port] [-t timeout] [interface ...] + +-c proto Use PROTO as the configuration protocol for all + interfaces, unless overridden by specific interfaces. +-d interface Either the name of an interface, or a long spec. +-i identifier DHCP vendor class identifier. The default is + "Linux ipconfig". +-n Do nothing - just print the configuration that would + be performed. +-p port Send bootp/dhcp broadcasts from PORT, to PORT - 1. +-t timeout Give up on all unconfigured interfaces after TIMEOUT secs. + +You can configure multiple interfaces by passing multiple interface +specs on the command line, or by using the special interface name +"all". If you're autoconfiguring any interfaces, ipconfig will wait +until either all such interfaces have been configured, or the timeout +passes. + +PROTO can be one of the following, which selects the autoconfiguration +protocol to use: + +not specified use all protocols (the default) +dhcp use bootp and dhcp +bootp use bootp only +rarp use rarp (not currently supported) +none no autoconfiguration - either static config, or none at all + +An interface spec can be either short form, which is just the name of +an interface (eth0 or whatever), or long form. The long form consists +of two or more fields, separated by colons: + +<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>: + <dns0-ip>:<dns1-ip>:<ntp0-ip>:... + + <client-ip> IP address of the client. If empty, the address will + either be determined by RARP/BOOTP/DHCP. What protocol + is used de- pends on the <autoconf> parameter. If this + parameter is not empty, autoconf will be used. + + <server-ip> IP address of the NFS server. If RARP is used to + determine the client address and this parameter is NOT + empty only replies from the specified server are + accepted. To use different RARP and NFS server, + specify your RARP server here (or leave it blank), and + specify your NFS server in the `nfsroot' parameter + (see above). If this entry is blank the address of the + server is used which answered the RARP/BOOTP/DHCP + request. + + <gw-ip> IP address of a gateway if the server is on a different + subnet. If this entry is empty no gateway is used and the + server is assumed to be on the local network, unless a + value has been received by BOOTP/DHCP. + + <netmask> Netmask for local network interface. If this is empty, + the netmask is derived from the client IP address assuming + classful addressing, unless overridden in BOOTP/DHCP reply. + + <hostname> Name of the client. If empty, the client IP address is + used in ASCII notation, or the value received by + BOOTP/DHCP. + + <device> Name of network device to use. If this is empty, all + devices are used for RARP/BOOTP/DHCP requests, and the + first one we receive a reply on is configured. If you + have only one device, you can safely leave this blank. + + <autoconf> Method to use for autoconfiguration. If this is either + 'rarp', 'bootp', or 'dhcp' the specified protocol is + used. If the value is 'both', 'all' or empty, all + protocols are used. 'off', 'static' or 'none' means + no autoconfiguration. + + <dns0-ip> IP address of primary nameserver. + + Default: None if not using autoconfiguration; determined + automatically if using autoconfiguration. + + <dns1-ip> IP address of secondary nameserver. + See <dns0-ip>. + + <ntp0-ip> IP address of a Network Time Protocol (NTP) server. + Currently ignored. + + ... Additional fields will be ignored. + +IP addresses and netmasks must be either absent (defaulting to zero) +or presented in dotted-quad notation. + +An interface spec can be prefixed with either "ip=", "nfsaddrs=", both +of which are ignored. These (along with the ugliness of the long +form) are present for compatibility with the in-kernel ipconfig code +from 2.4 and earlier kernels. + +Here are a few examples of valid ipconfig command lines. + +Enable the loopback interface: + ipconfig 127.0.0.1:::::lo:none + +Try to configure eth0 using bootp for up to 30 seconds: + ipconfig -t 30 -c bootp eth0 + +Configure eth0 and eth1 using dhcp or bootp, and eth2 statically: + ipconfig -c any eth0 eth1 192.168.1.1:::::eth2:none + +-- + +From Russell's original README, and still true: + +The code in main.c is yucky imho. Needs cleaning. + +-- +Russell King (2002/10/22) +Bryan O'Sullivan (2003/04/29) diff --git a/usr/kinit/ipconfig/bootp_packet.h b/usr/kinit/ipconfig/bootp_packet.h new file mode 100644 index 0000000..1d5bd0d --- /dev/null +++ b/usr/kinit/ipconfig/bootp_packet.h @@ -0,0 +1,44 @@ +#ifndef BOOTP_PACKET_H +#define BOOTP_PACKET_H + +#include <sys/uio.h> + +struct netdev; + +/* packet ops */ +#define BOOTP_REQUEST 1 +#define BOOTP_REPLY 2 + +/* your basic bootp packet */ +struct bootp_hdr { + uint8_t op; + uint8_t htype; + uint8_t hlen; + uint8_t hops; + uint32_t xid; + uint16_t secs; + uint16_t flags; + uint32_t ciaddr; + uint32_t yiaddr; + uint32_t siaddr; + uint32_t giaddr; + uint8_t chaddr[16]; + char server_name[64]; + char boot_file[128]; + /* 312 bytes of extensions */ +}; + +/* + * memory size of BOOTP Vendor Extensions/DHCP Options for receiving + * + * generic_ether_mtu:1500, min_sizeof(ip_hdr):20, sizeof(udp_hdr):8 + * + * #define BOOTP_EXTS_SIZE (1500 - 20 - 8 - sizeof(struct bootp_hdr)) + */ +/* larger size for backward compatibility of ipconfig */ +#define BOOTP_EXTS_SIZE 1500 + +/* minimum length of BOOTP/DHCP packet on sending */ +#define BOOTP_MIN_LEN 300 + +#endif /* BOOTP_PACKET_H */ diff --git a/usr/kinit/ipconfig/bootp_proto.c b/usr/kinit/ipconfig/bootp_proto.c new file mode 100644 index 0000000..f6f9dd4 --- /dev/null +++ b/usr/kinit/ipconfig/bootp_proto.c @@ -0,0 +1,565 @@ +/* + * BOOTP packet protocol handling. + */ +#include <sys/types.h> +#include <sys/uio.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <netinet/in.h> + +#include "ipconfig.h" +#include "netdev.h" +#include "bootp_packet.h" +#include "bootp_proto.h" +#include "packet.h" + +static uint8_t bootp_options[312] = { + [ 0] = 99, 130, 83, 99,/* RFC1048 magic cookie */ + [ 4] = 1, 4, /* 4- 9 subnet mask */ + [ 10] = 3, 4, /* 10- 15 default gateway */ + [ 16] = 5, 8, /* 16- 25 nameserver */ + [ 26] = 12, 32, /* 26- 59 host name */ + [ 60] = 40, 32, /* 60- 95 nis domain name */ + [ 96] = 17, 40, /* 96-137 boot path */ + [138] = 57, 2, 1, 150, /* 138-141 extension buffer */ + [142] = 255, /* end of list */ +}; + +/* + * Send a plain bootp request packet with options + */ +int bootp_send_request(struct netdev *dev) +{ + struct bootp_hdr bootp; + struct iovec iov[] = { + /* [0] = ip + udp headers */ + [1] = {&bootp, sizeof(bootp)}, + [2] = {bootp_options, 312} + }; + + memset(&bootp, 0, sizeof(struct bootp_hdr)); + + bootp.op = BOOTP_REQUEST, bootp.htype = dev->hwtype; + bootp.hlen = dev->hwlen; + bootp.xid = dev->bootp.xid; + bootp.ciaddr = dev->ip_addr; + bootp.secs = htons(time(NULL) - dev->open_time); + memcpy(bootp.chaddr, dev->hwaddr, 16); + + dprintf("-> bootp xid 0x%08x secs 0x%08x ", + bootp.xid, ntohs(bootp.secs)); + + return packet_send(dev, iov, 2); +} + +/* + * DESCRIPTION + * bootp_ext119_decode() decodes Domain Search Option data. + * The decoded string is separated with ' '. + * For example, it is either "foo.bar.baz. bar.baz.", "foo.bar.", or "foo.". + * + * ARGUMENTS + * const uint8_t *ext + * *ext is a pointer to a DHCP Domain Search Option data. *ext does not + * include a tag(code) octet and a length octet in DHCP options. + * For example, if *ext is {3, 'f', 'o', 'o', 0}, this function returns + * a pointer to a "foo." string. + * + * int16_t ext_size + * ext_size is the memory size of *ext. For example, + * if *ext is {3, 'f', 'o', 'o', 0}, ext_size must be 5. + * + * uint8_t *tmp + * *tmp is a pointer to a temporary memory space for decoding. + * The memory size must be equal to or more than ext_size. + * 'memset(tmp, 0, sizeof(tmp));' is not required, but values in *tmp + * are changed in decoding process. + * + * RETURN VALUE + * if OK, a pointer to a decoded string malloc-ed + * else , NULL + * + * SEE ALSO RFC3397 + */ +static char *bootp_ext119_decode(const void *ext, int16_t ext_size, void *tmp) +{ + uint8_t *u8ext; + int_fast32_t i; + int_fast32_t decoded_size; + int_fast8_t currentdomain_is_singledot; + + /* only for validating *ext */ + uint8_t *is_pointee; + int_fast32_t is_pointee_size; + + /* only for structing a decoded string */ + char *decoded_str; + int_fast32_t dst_i; + + if (ext == NULL || ext_size <= 0 || tmp == NULL) + return NULL; + + u8ext = (uint8_t *)ext; + is_pointee = tmp; + memset(is_pointee, 0, (size_t)ext_size); + is_pointee_size = 0; + + /* + * validate the format of *ext and + * calculate the memory size for a decoded string + */ + i = 0; + decoded_size = 0; + currentdomain_is_singledot = 1; + while (1) { + if (i >= ext_size) + return NULL; + + if (u8ext[i] == 0) { + /* Zero-ending */ + if (currentdomain_is_singledot) + decoded_size++; /* for '.' */ + decoded_size++; /* for ' ' or '\0' */ + currentdomain_is_singledot = 1; + i++; + if (i == ext_size) + break; + is_pointee_size = i; + } else if (u8ext[i] < 0x40) { + /* Label(sub-domain string) */ + int j; + + /* loosely validate characters for domain names */ + if (i + u8ext[i] >= ext_size) + return NULL; + for (j = i + 1; j <= i + u8ext[i]; j++) + if (!(u8ext[j] == '-' || + ('0' <= u8ext[j] && u8ext[j] <= '9') || + ('A' <= u8ext[j] && u8ext[j] <= 'Z') || + ('a' <= u8ext[j] && u8ext[j] <= 'z'))) + return NULL; + + is_pointee[i] = 1; + decoded_size += u8ext[i] + 1; /* for Label + '.' */ + currentdomain_is_singledot = 0; + i += u8ext[i] + 1; + } else if (u8ext[i] < 0xc0) + return NULL; + + else { + /* Compression-pointer (to a prior Label) */ + int_fast32_t p; + + if (i + 1 >= ext_size) + return NULL; + + p = ((0x3f & u8ext[i]) << 8) + u8ext[i + 1]; + if (!(p < is_pointee_size && is_pointee[p])) + return NULL; + + while (1) { + /* u8ext[p] was validated */ + if (u8ext[p] == 0) { + /* Zero-ending */ + decoded_size++; + break; + } else if (u8ext[p] < 0x40) { + /* Label(sub-domain string) */ + decoded_size += u8ext[p] + 1; + p += u8ext[p] + 1; + } else { + /* Compression-pointer */ + p = ((0x3f & u8ext[p]) << 8) + + u8ext[p + 1]; + } + } + + currentdomain_is_singledot = 1; + i += 2; + if (i == ext_size) + break; + is_pointee_size = i; + } + } + + + /* + * construct a decoded string + */ + decoded_str = malloc(decoded_size); + if (decoded_str == NULL) + return NULL; + + i = 0; + dst_i = 0; + currentdomain_is_singledot = 1; + while (1) { + if (u8ext[i] == 0) { + /* Zero-ending */ + if (currentdomain_is_singledot) { + if (dst_i != 0) + dst_i++; + decoded_str[dst_i] = '.'; + } + dst_i++; + decoded_str[dst_i] = ' '; + + currentdomain_is_singledot = 1; + i++; + if (i == ext_size) + break; + } else if (u8ext[i] < 0x40) { + /* Label(sub-domain string) */ + if (dst_i != 0) + dst_i++; + memcpy(&decoded_str[dst_i], &u8ext[i + 1], + (size_t)u8ext[i]); + dst_i += u8ext[i]; + decoded_str[dst_i] = '.'; + + currentdomain_is_singledot = 0; + i += u8ext[i] + 1; + } else { + /* Compression-pointer (to a prior Label) */ + int_fast32_t p; + + p = ((0x3f & u8ext[i]) << 8) + u8ext[i + 1]; + while (1) { + if (u8ext[p] == 0) { + /* Zero-ending */ + decoded_str[dst_i++] = '.'; + decoded_str[dst_i] = ' '; + break; + } else if (u8ext[p] < 0x40) { + /* Label(sub-domain string) */ + dst_i++; + memcpy(&decoded_str[dst_i], + &u8ext[p + 1], + (size_t)u8ext[p]); + dst_i += u8ext[p]; + decoded_str[dst_i] = '.'; + + p += u8ext[p] + 1; + } else { + /* Compression-pointer */ + p = ((0x3f & u8ext[p]) << 8) + + u8ext[p + 1]; + } + } + + currentdomain_is_singledot = 1; + i += 2; + if (i == ext_size) + break; + } + } + decoded_str[dst_i] = '\0'; +#ifdef DEBUG + if (dst_i + 1 != decoded_size) { + dprintf("bug:%s():bottom: malloc(%ld), write(%ld)\n", + __func__, (long)decoded_size, (long)(dst_i + 1)); + exit(1); + } +#endif + return decoded_str; +} + +/* + * DESCRIPTION + * bootp_ext121_decode() decodes Classless Route Option data. + * + * ARGUMENTS + * const uint8_t *ext + * *ext is a pointer to a DHCP Classless Route Option data. + * For example, if *ext is {16, 192, 168, 192, 168, 42, 1}, + * this function returns a pointer to + * { + * subnet = 192.168.0.0; + * netmask_width = 16; + * gateway = 192.168.42.1; + * next = NULL; + * } + * + * int16_t ext_size + * ext_size is the memory size of *ext. For example, + * if *ext is {16, 192, 168, 192, 168, 42, 1}, ext_size must be 7. + * + * RETURN VALUE + * if OK, a pointer to a decoded struct route malloc-ed + * else , NULL + * + * SEE ALSO RFC3442 + */ +struct route *bootp_ext121_decode(const uint8_t *ext, int16_t ext_size) +{ + int16_t index = 0; + uint8_t netmask_width; + uint8_t significant_octets; + struct route *routes = NULL; + struct route *prev_route = NULL; + + while (index < ext_size) { + netmask_width = ext[index]; + index++; + if (netmask_width > 32) { + printf("IP-Config: Given Classless Route Option subnet mask width '%u' " + "exceeds IPv4 limit of 32. Ignoring remaining option.\n", + netmask_width); + return routes; + } + significant_octets = netmask_width / 8 + (netmask_width % 8 > 0); + if (ext_size - index < significant_octets + 4) { + printf("IP-Config: Given Classless Route Option remaining lengths (%u octets) " + "is shorter than the expected %u octets. Ignoring remaining options.\n", + ext_size - index, significant_octets + 4); + return routes; + } + + struct route *route = malloc(sizeof(struct route)); + if (route == NULL) + return routes; + + /* convert only significant octets from byte array into integer in network byte order */ + route->subnet = 0; + memcpy(&route->subnet, &ext[index], significant_octets); + index += significant_octets; + /* RFC3442 demands: After deriving a subnet number and subnet mask from + each destination descriptor, the DHCP client MUST zero any bits in + the subnet number where the corresponding bit in the mask is zero. */ + route->subnet &= netdev_genmask(netmask_width); + + /* convert octet array into network byte order */ + memcpy(&route->gateway, &ext[index], 4); + index += 4; + + route->netmask_width = netmask_width; + route->next = NULL; + + if (prev_route == NULL) { + routes = route; + } else { + prev_route->next = route; + } + prev_route = route; + } + return routes; +} + +/* + * Parse a bootp reply packet + */ +int bootp_parse(struct netdev *dev, struct bootp_hdr *hdr, + uint8_t *exts, int extlen) +{ + uint8_t ext119_buf[BOOTP_EXTS_SIZE]; + int16_t ext119_len = 0; + uint8_t ext121_buf[BOOTP_EXTS_SIZE]; + int16_t ext121_len = 0; + + dev->bootp.gateway = hdr->giaddr; + dev->ip_addr = hdr->yiaddr; + dev->ip_server = hdr->siaddr; + dev->ip_netmask = INADDR_ANY; + dev->ip_broadcast = INADDR_ANY; + dev->ip_gateway = hdr->giaddr; + dev->ip_nameserver[0] = INADDR_ANY; + dev->ip_nameserver[1] = INADDR_ANY; + dev->hostname[0] = '\0'; + dev->nisdomainname[0] = '\0'; + dev->bootpath[0] = '\0'; + memcpy(&dev->filename, &hdr->boot_file, FNLEN); + + if (extlen >= 4 && exts[0] == 99 && exts[1] == 130 && + exts[2] == 83 && exts[3] == 99) { + uint8_t *ext; + + for (ext = exts + 4; ext - exts < extlen;) { + int len; + uint8_t opt = *ext++; + + if (opt == 0) + continue; + else if (opt == 255) + break; + + if (ext - exts >= extlen) + break; + len = *ext++; + + if (ext - exts + len > extlen) + break; + switch (opt) { + case 1: /* subnet mask */ + if (len == 4) + memcpy(&dev->ip_netmask, ext, 4); + break; + case 3: /* default gateway */ + if (len >= 4) + memcpy(&dev->ip_gateway, ext, 4); + break; + case 6: /* DNS server */ + if (len >= 4) + memcpy(&dev->ip_nameserver, ext, + len >= 8 ? 8 : 4); + break; + case 12: /* host name */ + if (len > sizeof(dev->hostname) - 1) + len = sizeof(dev->hostname) - 1; + memcpy(&dev->hostname, ext, len); + dev->hostname[len] = '\0'; + break; + case 15: /* domain name */ + if (len > sizeof(dev->dnsdomainname) - 1) + len = sizeof(dev->dnsdomainname) - 1; + memcpy(&dev->dnsdomainname, ext, len); + dev->dnsdomainname[len] = '\0'; + break; + case 17: /* root path */ + if (len > sizeof(dev->bootpath) - 1) + len = sizeof(dev->bootpath) - 1; + memcpy(&dev->bootpath, ext, len); + dev->bootpath[len] = '\0'; + break; + case 26: /* interface MTU */ + if (len == 2) + dev->mtu = (ext[0] << 8) + ext[1]; + break; + case 28: /* broadcast addr */ + if (len == 4) + memcpy(&dev->ip_broadcast, ext, 4); + break; + case 40: /* NIS domain name */ + if (len > sizeof(dev->nisdomainname) - 1) + len = sizeof(dev->nisdomainname) - 1; + memcpy(&dev->nisdomainname, ext, len); + dev->nisdomainname[len] = '\0'; + break; + case 54: /* server identifier */ + if (len == 4 && !dev->ip_server) + memcpy(&dev->ip_server, ext, 4); + break; + case 119: /* Domain Search Option */ + if (ext119_len >= 0 && + ext119_len + len <= sizeof(ext119_buf)) { + memcpy(ext119_buf + ext119_len, + ext, len); + ext119_len += len; + } else + ext119_len = -1; + + break; + case 121: /* Classless Static Route Option (RFC3442) */ + if (ext121_len >= 0 && + ext121_len + len <= sizeof(ext121_buf)) { + memcpy(ext121_buf + ext121_len, + ext, len); + ext121_len += len; + } else + ext121_len = -1; + + break; + } + + ext += len; + } + } + if (ext119_len > 0) { + char *ret; + uint8_t ext119_tmp[BOOTP_EXTS_SIZE]; + + ret = bootp_ext119_decode(ext119_buf, ext119_len, ext119_tmp); + if (ret != NULL) { + if (dev->domainsearch != NULL) + free(dev->domainsearch); + dev->domainsearch = ret; + } + } + + if (ext121_len > 0) { + struct route *ret; + + ret = bootp_ext121_decode(ext121_buf, ext121_len); + if (ret != NULL) { + struct route *cur = dev->routes; + struct route *next; + while (cur != NULL) { + next = cur->next; + free(cur); + cur = next; + } + dev->routes = ret; + } + } + + /* + * Got packet. + */ + return 1; +} + +/* + * Receive a bootp reply and parse packet + * Returns: + *-1 = Error in packet_recv, try again later + * 0 = Unexpected packet, discarded + * 1 = Correctly received and parsed packet + */ +int bootp_recv_reply(struct netdev *dev) +{ + struct bootp_hdr bootp; + uint8_t bootp_options[BOOTP_EXTS_SIZE]; + struct iovec iov[] = { + /* [0] = ip + udp headers */ + [1] = {&bootp, sizeof(struct bootp_hdr)}, + [2] = {bootp_options, sizeof(bootp_options)} + }; + int ret; + + ret = packet_recv(dev, iov, 3); + if (ret <= 0) + return ret; + + if (ret < sizeof(struct bootp_hdr) || + bootp.op != BOOTP_REPLY || /* RFC951 7.5 */ + bootp.xid != dev->bootp.xid || + memcmp(bootp.chaddr, dev->hwaddr, 16)) + return 0; + + ret -= sizeof(struct bootp_hdr); + + return bootp_parse(dev, &bootp, bootp_options, ret); +} + +/* + * Initialise interface for bootp. + */ +int bootp_init_if(struct netdev *dev) +{ + short flags; + + /* + * Get the device flags + */ + if (netdev_getflags(dev, &flags)) + return -1; + + /* + * We can't do DHCP nor BOOTP if this device + * doesn't support broadcast. + */ + if (dev->mtu < 364 || (flags & IFF_BROADCAST) == 0) { + dev->caps &= ~(CAP_BOOTP | CAP_DHCP); + return 0; + } + + /* + * Get a random XID + */ + dev->bootp.xid = (uint32_t) lrand48(); + dev->open_time = time(NULL); + + return 0; +} diff --git a/usr/kinit/ipconfig/bootp_proto.h b/usr/kinit/ipconfig/bootp_proto.h new file mode 100644 index 0000000..60873ce --- /dev/null +++ b/usr/kinit/ipconfig/bootp_proto.h @@ -0,0 +1,10 @@ +#ifndef IPCONFIG_BOOTP_PROTO_H +#define IPCONFIG_BOOTP_PROTO_H + +int bootp_send_request(struct netdev *dev); +int bootp_recv_reply(struct netdev *dev); +int bootp_parse(struct netdev *dev, struct bootp_hdr *hdr, uint8_t * exts, + int extlen); +int bootp_init_if(struct netdev *dev); + +#endif /* IPCONFIG_BOOTP_PROTO_H */ diff --git a/usr/kinit/ipconfig/dhcp_proto.c b/usr/kinit/ipconfig/dhcp_proto.c new file mode 100644 index 0000000..4e560b8 --- /dev/null +++ b/usr/kinit/ipconfig/dhcp_proto.c @@ -0,0 +1,301 @@ +/* + * DHCP RFC 2131 and 2132 + */ +#include <sys/types.h> +#include <sys/uio.h> +#include <netinet/in.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "ipconfig.h" +#include "netdev.h" +#include "bootp_packet.h" +#include "bootp_proto.h" +#include "dhcp_proto.h" +#include "packet.h" + +static uint8_t dhcp_params[] = { + 1, /* subnet mask */ + 3, /* default gateway */ + 6, /* DNS server */ + 12, /* host name */ + 15, /* domain name */ + 17, /* root path */ + 26, /* interface mtu */ + 28, /* broadcast addr */ + 40, /* NIS domain name (why?) */ + 119, /* Domain Search Option */ + 121, /* Classless Static Route Option (RFC3442) */ +}; + +static uint8_t dhcp_discover_hdr[] = { + 99, 130, 83, 99, /* bootp cookie */ + 53, 1, DHCPDISCOVER, /* dhcp message type */ + 55, sizeof(dhcp_params), /* parameter list */ +}; + +static uint8_t dhcp_request_hdr[] = { + 99, 130, 83, 99, /* boot cookie */ + 53, 1, DHCPREQUEST, /* dhcp message type */ +#define SERVER_IP_OFF 9 + 54, 4, 0, 0, 0, 0, /* server IP */ +#define REQ_IP_OFF 15 + 50, 4, 0, 0, 0, 0, /* requested IP address */ + 55, sizeof(dhcp_params), /* parameter list */ +}; + +static uint8_t dhcp_end[] = { + 255, +}; + +/* Both iovecs below have to have the same structure, since dhcp_send() + pokes at the internals */ +#define DHCP_IOV_LEN 8 + +static struct iovec dhcp_discover_iov[DHCP_IOV_LEN] = { + /* [0] = ip + udp header */ + /* [1] = bootp header */ + [2] = {dhcp_discover_hdr, sizeof(dhcp_discover_hdr)}, + [3] = {dhcp_params, sizeof(dhcp_params)}, + /* [4] = optional vendor class */ + /* [5] = optional hostname */ + /* [6] = {dhcp_end, sizeof(dhcp_end)} */ + /* [7] = optional padding */ +}; + +static struct iovec dhcp_request_iov[DHCP_IOV_LEN] = { + /* [0] = ip + udp header */ + /* [1] = bootp header */ + [2] = {dhcp_request_hdr, sizeof(dhcp_request_hdr)}, + [3] = {dhcp_params, sizeof(dhcp_params)}, + /* [4] = optional vendor class */ + /* [5] = optional hostname */ + /* [6] = {dhcp_end, sizeof(dhcp_end)} */ + /* [7] = optional padding */ +}; + +/* + * Parse a DHCP response packet + * Returns: + * 0 = Unexpected packet, not parsed + * 2 = DHCPOFFER (from dhcp_proto.h) + * 5 = DHCPACK + * 6 = DHCPNACK + */ +static int dhcp_parse(struct netdev *dev, struct bootp_hdr *hdr, + uint8_t *exts, int extlen) +{ + uint8_t type = 0; + uint32_t serverid = INADDR_NONE; + uint32_t leasetime = 0; + int ret = 0; + + if (extlen >= 4 && exts[0] == 99 && exts[1] == 130 && + exts[2] == 83 && exts[3] == 99) { + uint8_t *ext; + + for (ext = exts + 4; ext - exts < extlen;) { + int len; + uint8_t opt = *ext++; + + if (opt == 0) + continue; + else if (opt == 255) + break; + + if (ext - exts >= extlen) + break; + len = *ext++; + + if (ext - exts + len > extlen) + break; + switch (opt) { + case 51: /* IP Address Lease Time */ + if (len == 4) + leasetime = ntohl(*(uint32_t *)ext); + break; + case 53: /* DHCP Message Type */ + if (len == 1) + type = *ext; + break; + case 54: /* Server Identifier */ + if (len == 4) + memcpy(&serverid, ext, 4); + break; + } + ext += len; + } + } + + switch (type) { + case DHCPOFFER: + ret = bootp_parse(dev, hdr, exts, extlen) ? DHCPOFFER : 0; + if (ret == DHCPOFFER && serverid != INADDR_NONE) + dev->serverid = serverid; + dprintf("\n dhcp offer\n"); + break; + + case DHCPACK: + dev->dhcpleasetime = leasetime; + ret = bootp_parse(dev, hdr, exts, extlen) ? DHCPACK : 0; + dprintf("\n dhcp ack\n"); + break; + + case DHCPNAK: + ret = DHCPNAK; + dprintf("\n dhcp nak\n"); + break; + } + return ret; +} + +/* + * Receive and parse a DHCP packet + * Returns: + *-1 = Error in packet_recv, try again later + * 0 = Unexpected packet, discarded + * 2 = DHCPOFFER (from dhcp_proto.h) + * 5 = DHCPACK + * 6 = DHCPNACK + */ +static int dhcp_recv(struct netdev *dev) +{ + struct bootp_hdr bootp; + uint8_t dhcp_options[BOOTP_EXTS_SIZE]; + struct iovec iov[] = { + /* [0] = ip + udp header */ + [1] = {&bootp, sizeof(struct bootp_hdr)}, + [2] = {dhcp_options, sizeof(dhcp_options)} + }; + int ret; + + ret = packet_recv(dev, iov, 3); + if (ret <= 0) + return ret; + + dprintf("\n dhcp xid %08x ", dev->bootp.xid); + + if (ret < sizeof(struct bootp_hdr) || bootp.op != BOOTP_REPLY || + /* RFC951 7.5 */ bootp.xid != dev->bootp.xid || + memcmp(bootp.chaddr, dev->hwaddr, 16)) + return 0; + + ret -= sizeof(struct bootp_hdr); + + return dhcp_parse(dev, &bootp, dhcp_options, ret); +} + +static int dhcp_send(struct netdev *dev, struct iovec *vec) +{ + struct bootp_hdr bootp; + char dhcp_hostname[SYS_NMLN+2]; + uint8_t padding[BOOTP_MIN_LEN - sizeof(struct bootp_hdr)]; + int padding_len; + int i = 4; + int j; + + memset(&bootp, 0, sizeof(struct bootp_hdr)); + + bootp.op = BOOTP_REQUEST; + bootp.htype = dev->hwtype; + bootp.hlen = dev->hwlen; + bootp.xid = dev->bootp.xid; + bootp.ciaddr = INADDR_ANY; + /* yiaddr should always be set to 0 for the messages we're likely + * to send as a DHCP client: DHCPDISCOVER, DHCPREQUEST, DHCPDECLINE, + * DHCPINFORM, DHCPRELEASE + * cf. RFC2131 section 4.1.1, table 5. + */ + bootp.yiaddr = INADDR_ANY; + bootp.giaddr = INADDR_ANY; + bootp.flags = htons(0x8000); + bootp.secs = htons(time(NULL) - dev->open_time); + memcpy(bootp.chaddr, dev->hwaddr, 16); + + vec[1].iov_base = &bootp; + vec[1].iov_len = sizeof(struct bootp_hdr); + + dprintf("xid %08x secs %d ", bootp.xid, ntohs(bootp.secs)); + + if (vendor_class_identifier_len > 2) { + vec[i].iov_base = vendor_class_identifier; + vec[i].iov_len = vendor_class_identifier_len; + i++; + + dprintf("vendor_class_identifier \"%.*s\" ", + vendor_class_identifier_len-2, + vendor_class_identifier+2); + } + + if (dev->reqhostname[0] != '\0') { + int len = strlen(dev->reqhostname); + dhcp_hostname[0] = 12; + dhcp_hostname[1] = len; + memcpy(dhcp_hostname+2, dev->reqhostname, len); + + vec[i].iov_base = dhcp_hostname; + vec[i].iov_len = len+2; + i++; + + printf("hostname %.*s ", len, dhcp_hostname+2); + } + + vec[i].iov_base = dhcp_end; + vec[i].iov_len = sizeof(dhcp_end); + + /* Append padding if DHCP packet length is shorter than BOOTP_MIN_LEN */ + padding_len = sizeof(padding); + for (j = 2; j <= i; j++) + padding_len -= vec[j].iov_len; + if (padding_len > 0) { + memset(padding, 0, padding_len); + i++; + vec[i].iov_base = padding; + vec[i].iov_len = padding_len; + } + + return packet_send(dev, vec, i + 1); +} + +/* + * Send a DHCP discover packet + */ +int dhcp_send_discover(struct netdev *dev) +{ + dev->ip_addr = INADDR_ANY; + dev->ip_gateway = INADDR_ANY; + + dprintf("-> dhcp discover "); + + return dhcp_send(dev, dhcp_discover_iov); +} + +/* + * Receive a DHCP offer packet + */ +int dhcp_recv_offer(struct netdev *dev) +{ + return dhcp_recv(dev); +} + +/* + * Send a DHCP request packet + */ +int dhcp_send_request(struct netdev *dev) +{ + memcpy(&dhcp_request_hdr[SERVER_IP_OFF], &dev->serverid, 4); + memcpy(&dhcp_request_hdr[REQ_IP_OFF], &dev->ip_addr, 4); + + dprintf("-> dhcp request "); + + return dhcp_send(dev, dhcp_request_iov); +} + +/* + * Receive a DHCP ack packet + */ +int dhcp_recv_ack(struct netdev *dev) +{ + return dhcp_recv(dev); +} diff --git a/usr/kinit/ipconfig/dhcp_proto.h b/usr/kinit/ipconfig/dhcp_proto.h new file mode 100644 index 0000000..0fba92f --- /dev/null +++ b/usr/kinit/ipconfig/dhcp_proto.h @@ -0,0 +1,19 @@ +#ifndef IPCONFIG_DHCP_PROTO_H +#define IPCONFIG_DHCP_PROTO_H + +/* DHCP message types */ +#define DHCPDISCOVER 1 +#define DHCPOFFER 2 +#define DHCPREQUEST 3 +#define DHCPDECLINE 4 +#define DHCPACK 5 +#define DHCPNAK 6 +#define DHCPRELEASE 7 +#define DHCPINFORM 8 + +int dhcp_send_discover(struct netdev *dev); +int dhcp_recv_offer(struct netdev *dev); +int dhcp_send_request(struct netdev *dev); +int dhcp_recv_ack(struct netdev *dev); + +#endif /* IPCONFIG_DHCP_PROTO_H */ diff --git a/usr/kinit/ipconfig/ipconfig.h b/usr/kinit/ipconfig/ipconfig.h new file mode 100644 index 0000000..d1d7e42 --- /dev/null +++ b/usr/kinit/ipconfig/ipconfig.h @@ -0,0 +1,25 @@ +#ifndef IPCONFIG_IPCONFIG_H +#define IPCONFIG_IPCONFIG_H + +#include <stdint.h> +#include <sys/types.h> + +#define LOCAL_PORT 68 +#define REMOTE_PORT (LOCAL_PORT - 1) + +extern uint16_t cfg_local_port; +extern uint16_t cfg_remote_port; + +extern char vendor_class_identifier[]; +extern int vendor_class_identifier_len; + +int ipconfig_main(int argc, char *argv[]); +uint32_t ipconfig_server_address(void *next); + +#ifdef DEBUG +# define dprintf printf +#else +# define dprintf(...) ((void)0) +#endif + +#endif /* IPCONFIG_IPCONFIG_H */ diff --git a/usr/kinit/ipconfig/main.c b/usr/kinit/ipconfig/main.c new file mode 100644 index 0000000..64c5398 --- /dev/null +++ b/usr/kinit/ipconfig/main.c @@ -0,0 +1,924 @@ +#include <poll.h> +#include <limits.h> +#include <setjmp.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <arpa/inet.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/sysinfo.h> +#include <dirent.h> +#include <fcntl.h> +#include <unistd.h> /* for getopts */ + +#include <net/if_arp.h> + +#include "ipconfig.h" +#include "netdev.h" +#include "bootp_packet.h" +#include "bootp_proto.h" +#include "dhcp_proto.h" +#include "packet.h" + +static const char sysfs_class_net[] = "/sys/class/net"; +static const char *progname; +static jmp_buf abort_buf; +static char do_not_config; +static unsigned int default_caps = CAP_DHCP | CAP_BOOTP | CAP_RARP; +static int loop_timeout = -1; +static int configured; +static int bringup_first = 0; +static int n_devices = 0; + +/* DHCP vendor class identifier */ +char vendor_class_identifier[260]; +int vendor_class_identifier_len; + +struct state { + int state; + int restart_state; + time_t expire; + int retry_period; + + struct netdev *dev; + struct state *next; +}; + +/* #define PROTO_x : for uint8_t proto of struct netdev */ +struct protoinfo { + char *name; +} protoinfos[] = { +#define PROTO_NONE 0 + {"none"}, +#define PROTO_BOOTP 1 + {"bootp"}, +#define PROTO_DHCP 2 + {"dhcp"}, +#define PROTO_RARP 3 + {"rarp"} +}; + +static inline const char *my_inet_ntoa(uint32_t addr) +{ + struct in_addr a; + + a.s_addr = addr; + + return inet_ntoa(a); +} + +static void print_device_config(struct netdev *dev) +{ + int dns0_spaces; + int dns1_spaces; + printf("IP-Config: %s complete", dev->name); + if (dev->proto == PROTO_BOOTP || dev->proto == PROTO_DHCP) + printf(" (%s from %s)", protoinfos[dev->proto].name, + my_inet_ntoa(dev->serverid ? + dev->serverid : dev->ip_server)); + + printf(":\n address: %-16s ", my_inet_ntoa(dev->ip_addr)); + printf("broadcast: %-16s ", my_inet_ntoa(dev->ip_broadcast)); + printf("netmask: %-16s\n", my_inet_ntoa(dev->ip_netmask)); + if (dev->routes != NULL) { + struct route *cur; + char *delim = ""; + printf(" routes :"); + for (cur = dev->routes; cur != NULL; cur = cur->next) { + printf("%s %s/%u", delim, my_inet_ntoa(cur->subnet), cur->netmask_width); + if (cur->gateway != 0) { + printf(" via %s", my_inet_ntoa(cur->gateway)); + } + delim = ","; + } + printf("\n"); + dns0_spaces = 3; + dns1_spaces = 5; + } else { + printf(" gateway: %-16s", my_inet_ntoa(dev->ip_gateway)); + dns0_spaces = 5; + dns1_spaces = 3; + } + printf(" dns0%*c: %-16s", dns0_spaces, ' ', my_inet_ntoa(dev->ip_nameserver[0])); + printf(" dns1%*c: %-16s\n", dns1_spaces, ' ', my_inet_ntoa(dev->ip_nameserver[1])); + if (dev->hostname[0]) + printf(" host : %-64s\n", dev->hostname); + if (dev->dnsdomainname[0]) + printf(" domain : %-64s\n", dev->dnsdomainname); + if (dev->nisdomainname[0]) + printf(" nisdomain: %-64s\n", dev->nisdomainname); + printf(" rootserver: %s ", my_inet_ntoa(dev->ip_server)); + printf("rootpath: %s\n", dev->bootpath); + printf(" filename : %s\n", dev->filename); +} + +static void configure_device(struct netdev *dev) +{ + if (do_not_config) + return; + + if (netdev_setmtu(dev)) + printf("IP-Config: failed to set MTU on %s to %u\n", + dev->name, dev->mtu); + + if (netdev_setaddress(dev)) + printf("IP-Config: failed to set addresses on %s\n", + dev->name); + if (netdev_setroutes(dev)) + printf("IP-Config: failed to set routes on %s\n", + dev->name); + if (dev->hostname[0] && + sethostname(dev->hostname, strlen(dev->hostname))) + printf("IP-Config: failed to set hostname '%s' from %s\n", + dev->hostname, dev->name); +} + +/* + * Escape shell varialbes in git style: + * Always start with a single quote ('), then leave all characters + * except ' and ! unchanged. + */ +static void write_option(FILE *f, const char *name, const char *chr) +{ + + fprintf(f, "%s='", name); + while (*chr) { + switch (*chr) { + case '!': + case '\'': + fprintf(f, "'\\%c'", *chr); + break; + default: + fprintf(f, "%c", *chr); + break; + } + ++chr; + } + fprintf(f, "'\n"); +} + +static void dump_device_config(struct netdev *dev) +{ + char fn[40]; + FILE *f; + /* + * char UINT64_MAX[] = "18446744073709551615"; + * sizeof(UINT64_MAX)==21 + */ + char buf21[21]; + const char path[] = "/run/"; + + snprintf(fn, sizeof(fn), "%snet-%s.conf", path, dev->name); + f = fopen(fn, "w"); + if (f) { + write_option(f, "DEVICE", dev->name); + write_option(f, "PROTO", protoinfos[dev->proto].name); + write_option(f, "IPV4ADDR", + my_inet_ntoa(dev->ip_addr)); + write_option(f, "IPV4BROADCAST", + my_inet_ntoa(dev->ip_broadcast)); + write_option(f, "IPV4NETMASK", + my_inet_ntoa(dev->ip_netmask)); + if (dev->routes != NULL) { + /* Use 6 digits to encode the index */ + char key[23]; + char value[19]; + int i = 0; + struct route *cur; + for (cur = dev->routes; cur != NULL; cur = cur->next) { + snprintf(key, sizeof(key), "IPV4ROUTE%iSUBNET", i); + snprintf(value, sizeof(value), "%s/%u", my_inet_ntoa(cur->subnet), cur->netmask_width); + write_option(f, key, value); + snprintf(key, sizeof(key), "IPV4ROUTE%iGATEWAY", i); + write_option(f, key, my_inet_ntoa(cur->gateway)); + i++; + } + } else { + write_option(f, "IPV4GATEWAY", + my_inet_ntoa(dev->ip_gateway)); + } + write_option(f, "IPV4DNS0", + my_inet_ntoa(dev->ip_nameserver[0])); + write_option(f, "IPV4DNS1", + my_inet_ntoa(dev->ip_nameserver[1])); + write_option(f, "HOSTNAME", dev->hostname); + write_option(f, "DNSDOMAIN", dev->dnsdomainname); + write_option(f, "NISDOMAIN", dev->nisdomainname); + write_option(f, "ROOTSERVER", + my_inet_ntoa(dev->ip_server)); + write_option(f, "ROOTPATH", dev->bootpath); + write_option(f, "filename", dev->filename); + sprintf(buf21, "%ld", (long)dev->uptime); + write_option(f, "UPTIME", buf21); + sprintf(buf21, "%u", (unsigned int)dev->dhcpleasetime); + write_option(f, "DHCPLEASETIME", buf21); + write_option(f, "DOMAINSEARCH", dev->domainsearch == NULL ? + "" : dev->domainsearch); + fclose(f); + } +} + +static uint32_t inet_class_netmask(uint32_t ip) +{ + ip = ntohl(ip); + if (IN_CLASSA(ip)) + return htonl(IN_CLASSA_NET); + if (IN_CLASSB(ip)) + return htonl(IN_CLASSB_NET); + if (IN_CLASSC(ip)) + return htonl(IN_CLASSC_NET); + return INADDR_ANY; +} + +static void postprocess_device(struct netdev *dev) +{ + if (dev->ip_netmask == INADDR_ANY) { + dev->ip_netmask = inet_class_netmask(dev->ip_addr); + printf("IP-Config: %s guessed netmask %s\n", + dev->name, my_inet_ntoa(dev->ip_netmask)); + } + if (dev->ip_broadcast == INADDR_ANY) { + dev->ip_broadcast = + (dev->ip_addr & dev->ip_netmask) | ~dev->ip_netmask; + printf("IP-Config: %s guessed broadcast address %s\n", + dev->name, my_inet_ntoa(dev->ip_broadcast)); + } +} + +static void complete_device(struct netdev *dev) +{ + struct sysinfo info; + + if (!sysinfo(&info)) + dev->uptime = info.uptime; + postprocess_device(dev); + configure_device(dev); + dump_device_config(dev); + print_device_config(dev); + packet_close(dev); + + ++configured; + + dev->next = ifaces; + ifaces = dev; +} + +/* + * Returns: + * 0 = Not handled, try again later + * 1 = Handled + */ +static int process_receive_event(struct state *s, time_t now) +{ + int handled = 1; + + switch (s->state) { + case DEVST_ERROR: + return 0; /* Not handled */ + case DEVST_COMPLETE: + return 0; /* Not handled as already configured */ + + case DEVST_BOOTP: + s->restart_state = DEVST_BOOTP; + switch (bootp_recv_reply(s->dev)) { + case -1: + s->state = DEVST_ERROR; + break; + case 0: + handled = 0; + break; + case 1: + s->state = DEVST_COMPLETE; + s->dev->proto = PROTO_BOOTP; + dprintf("\n bootp reply\n"); + break; + } + break; + + case DEVST_DHCPDISC: + s->restart_state = DEVST_DHCPDISC; + switch (dhcp_recv_offer(s->dev)) { + case -1: + s->state = DEVST_ERROR; + break; + case 0: + handled = 0; + break; + case DHCPOFFER: /* Offer received */ + s->state = DEVST_DHCPREQ; + dhcp_send_request(s->dev); + break; + } + break; + + case DEVST_DHCPREQ: + s->restart_state = DEVST_DHCPDISC; + switch (dhcp_recv_ack(s->dev)) { + case -1: /* error */ + s->state = DEVST_ERROR; + break; + case 0: + handled = 0; + break; + case DHCPACK: /* ACK received */ + s->state = DEVST_COMPLETE; + s->dev->proto = PROTO_DHCP; + break; + case DHCPNAK: /* NAK received */ + s->state = DEVST_DHCPDISC; + break; + } + break; + + default: + dprintf("\n"); + handled = 0; + break; + } + + switch (s->state) { + case DEVST_COMPLETE: + complete_device(s->dev); + break; + + case DEVST_ERROR: + /* error occurred, try again in 10 seconds */ + s->expire = now + 10; + break; + } + + return handled; +} + +static void process_timeout_event(struct state *s, time_t now) +{ + int ret = 0; + + /* + * If we had an error, restore a sane state to + * restart from. + */ + if (s->state == DEVST_ERROR) + s->state = s->restart_state; + + /* + * Now send a packet depending on our state. + */ + switch (s->state) { + case DEVST_BOOTP: + ret = bootp_send_request(s->dev); + s->restart_state = DEVST_BOOTP; + break; + + case DEVST_DHCPDISC: + ret = dhcp_send_discover(s->dev); + s->restart_state = DEVST_DHCPDISC; + break; + + case DEVST_DHCPREQ: + ret = dhcp_send_request(s->dev); + s->restart_state = DEVST_DHCPDISC; + break; + } + + if (ret == -1) { + s->state = DEVST_ERROR; + s->expire = now + 1; + } else { + s->expire = now + s->retry_period; + + s->retry_period *= 2; + if (s->retry_period > 60) + s->retry_period = 60; + } +} + +static void process_error_event(struct state *s, time_t now) +{ + s->state = DEVST_ERROR; + s->expire = now + 1; +} + +static struct state *slist; +struct netdev *ifaces; + +/* + * Returns: + * 0 = No dhcp/bootp packet was received + * 1 = A packet was received and handled + */ +static int do_pkt_recv(int nr, struct pollfd *fds, time_t now) +{ + int i, ret = 0; + struct state *s; + + for (i = 0, s = slist; s && nr; s = s->next) { + if (s->dev->pkt_fd != fds[i].fd) + continue; + if (fds[i].revents) { + if (fds[i].revents & POLLRDNORM) + ret |= process_receive_event(s, now); + else + process_error_event(s, now); + nr--; + } + i++; + } + return ret; +} + +static int loop(void) +{ + struct pollfd *fds; + struct state *s; + int i, nr = 0, rc = 0; + struct timeval now, prev; + time_t start; + + fds = malloc(sizeof(struct pollfd) * n_devices); + if (!fds) { + fprintf(stderr, "malloc failed\n"); + rc = -1; + goto bail; + } + + memset(fds, 0, sizeof(*fds)); + + gettimeofday(&now, NULL); + start = now.tv_sec; + while (1) { + int timeout = 60; + int pending = 0; + int done = 0; + int timeout_ms; + int x; + + for (i = 0, s = slist; s; s = s->next) { + dprintf("%s: state = %d\n", s->dev->name, s->state); + + if (s->state == DEVST_COMPLETE) { + done++; + continue; + } + + pending++; + + if (s->expire - now.tv_sec <= 0) { + dprintf("timeout\n"); + process_timeout_event(s, now.tv_sec); + } + + if (s->state != DEVST_ERROR) { + fds[i].fd = s->dev->pkt_fd; + fds[i].events = POLLRDNORM; + i++; + } + + if (timeout > s->expire - now.tv_sec) + timeout = s->expire - now.tv_sec; + } + + if (pending == 0 || (bringup_first && done)) + break; + + timeout_ms = timeout * 1000; + + for (x = 0; x < 2; x++) { + int delta_ms; + + if (timeout_ms <= 0) + timeout_ms = 100; + + nr = poll(fds, i, timeout_ms); + prev = now; + gettimeofday(&now, NULL); + + if ((nr > 0) && do_pkt_recv(nr, fds, now.tv_sec)) + break; + + if (loop_timeout >= 0 && + now.tv_sec - start >= loop_timeout) { + printf("IP-Config: no response after %d " + "secs - giving up\n", loop_timeout); + rc = -1; + goto bail; + } + + delta_ms = (now.tv_sec - prev.tv_sec) * 1000; + delta_ms += (now.tv_usec - prev.tv_usec) / 1000; + + dprintf("Delta: %d ms\n", delta_ms); + + timeout_ms -= delta_ms; + } + } +bail: + if (fds) + free(fds); + return rc; +} + +static int add_one_dev(struct netdev *dev) +{ + struct state *state; + + state = malloc(sizeof(struct state)); + if (!state) + return -1; + + state->dev = dev; + state->expire = time(NULL); + state->retry_period = 1; + + /* + * Select the state that we start from. + */ + if (dev->caps & CAP_DHCP && dev->ip_addr == INADDR_ANY) + state->restart_state = state->state = DEVST_DHCPDISC; + else if (dev->caps & CAP_DHCP) + state->restart_state = state->state = DEVST_DHCPREQ; + else if (dev->caps & CAP_BOOTP) + state->restart_state = state->state = DEVST_BOOTP; + + state->next = slist; + slist = state; + + n_devices++; + + return 0; +} + +static void parse_addr(uint32_t *addr, const char *ip) +{ + struct in_addr in; + if (inet_aton(ip, &in) == 0) { + fprintf(stderr, "%s: can't parse IP address '%s'\n", + progname, ip); + longjmp(abort_buf, 1); + } + *addr = in.s_addr; +} + +static unsigned int parse_proto(const char *ip) +{ + unsigned int caps = 0; + + if (*ip == '\0' || strcmp(ip, "on") == 0 || strcmp(ip, "any") == 0) + caps = CAP_BOOTP | CAP_DHCP | CAP_RARP; + else if (strcmp(ip, "both") == 0) + caps = CAP_BOOTP | CAP_RARP; + else if (strcmp(ip, "dhcp") == 0) + caps = CAP_BOOTP | CAP_DHCP; + else if (strcmp(ip, "bootp") == 0) + caps = CAP_BOOTP; + else if (strcmp(ip, "rarp") == 0) + caps = CAP_RARP; + else if (strcmp(ip, "none") == 0 || strcmp(ip, "static") == 0 + || strcmp(ip, "off") == 0) + goto bail; + else { + fprintf(stderr, "%s: invalid protocol '%s'\n", progname, ip); + longjmp(abort_buf, 1); + } +bail: + return caps; +} + +static int add_all_devices(struct netdev *template); + +static int parse_device(struct netdev *dev, char *ip) +{ + char *cp; + int opt; + int is_ip = 0; + + dprintf("IP-Config: parse_device: \"%s\"\n", ip); + + if (strncmp(ip, "ip=", 3) == 0) { + ip += 3; + is_ip = 1; + } else if (strncmp(ip, "nfsaddrs=", 9) == 0) { + ip += 9; + is_ip = 1; /* Not sure about this...? */ + } + + if (!strchr(ip, ':')) { + /* Only one option, e.g. "ip=dhcp", or an interface name */ + if (is_ip) { + dev->caps = parse_proto(ip); + bringup_first = 1; + } else { + dev->name = ip; + } + } else { + for (opt = 0; ip && *ip; ip = cp, opt++) { + if ((cp = strchr(ip, ':'))) { + *cp++ = '\0'; + } + if (*ip == '\0') + continue; + dprintf("IP-Config: opt #%d: '%s'\n", opt, ip); + switch (opt) { + case 0: + parse_addr(&dev->ip_addr, ip); + dev->caps = 0; + break; + case 1: + parse_addr(&dev->ip_server, ip); + break; + case 2: + parse_addr(&dev->ip_gateway, ip); + break; + case 3: + parse_addr(&dev->ip_netmask, ip); + break; + case 4: + strncpy(dev->hostname, ip, SYS_NMLN - 1); + dev->hostname[SYS_NMLN - 1] = '\0'; + memcpy(dev->reqhostname, dev->hostname, + SYS_NMLN); + break; + case 5: + dev->name = ip; + break; + case 6: + dev->caps = parse_proto(ip); + break; + case 7: + parse_addr(&dev->ip_nameserver[0], ip); + break; + case 8: + parse_addr(&dev->ip_nameserver[1], ip); + break; + case 9: + /* NTP server - ignore */ + break; + } + } + } + + if (dev->name == NULL || + dev->name[0] == '\0' || strcmp(dev->name, "all") == 0) { + add_all_devices(dev); + bringup_first = 1; + return 0; + } + return 1; +} + +static void bringup_device(struct netdev *dev) +{ + if (netdev_up(dev) == 0) { + if (dev->caps) + add_one_dev(dev); + else { + dev->proto = PROTO_NONE; + complete_device(dev); + } + } +} + +static void bringup_one_dev(struct netdev *template, struct netdev *dev) +{ + if (template->ip_addr != INADDR_NONE) + dev->ip_addr = template->ip_addr; + if (template->ip_server != INADDR_NONE) + dev->ip_server = template->ip_server; + if (template->ip_gateway != INADDR_NONE) + dev->ip_gateway = template->ip_gateway; + if (template->ip_netmask != INADDR_NONE) + dev->ip_netmask = template->ip_netmask; + if (template->ip_nameserver[0] != INADDR_NONE) + dev->ip_nameserver[0] = template->ip_nameserver[0]; + if (template->ip_nameserver[1] != INADDR_NONE) + dev->ip_nameserver[1] = template->ip_nameserver[1]; + if (template->hostname[0] != '\0') + strcpy(dev->hostname, template->hostname); + if (template->reqhostname[0] != '\0') + strcpy(dev->reqhostname, template->reqhostname); + dev->caps &= template->caps; + + bringup_device(dev); +} + +static struct netdev *add_device(char *info) +{ + struct netdev *dev; + int i; + + dev = malloc(sizeof(struct netdev)); + if (dev == NULL) { + fprintf(stderr, "%s: out of memory\n", progname); + longjmp(abort_buf, 1); + } + + memset(dev, 0, sizeof(struct netdev)); + dev->caps = default_caps; + + if (parse_device(dev, info) == 0) + goto bail; + + if (netdev_init_if(dev) == -1) + goto bail; + + if (bootp_init_if(dev) == -1) + goto bail; + + if (packet_open(dev) == -1) + goto bail; + + printf("IP-Config: %s hardware address", dev->name); + for (i = 0; i < dev->hwlen; i++) + printf("%c%02x", i == 0 ? ' ' : ':', dev->hwaddr[i]); + printf(" mtu %d%s%s\n", dev->mtu, + dev->caps & CAP_DHCP ? " DHCP" : + dev->caps & CAP_BOOTP ? " BOOTP" : "", + dev->caps & CAP_RARP ? " RARP" : ""); + return dev; +bail: + free(dev); + return NULL; +} + +static int add_all_devices(struct netdev *template) +{ + DIR *d; + struct dirent *de; + struct netdev *dev; + char t[PATH_MAX], p[255]; + int i, fd; + unsigned long flags; + + d = opendir(sysfs_class_net); + if (!d) + return 0; + + while ((de = readdir(d)) != NULL) { + /* This excludes devices beginning with dots or "dummy", + as well as . or .. */ + if (de->d_name[0] == '.' || !strcmp(de->d_name, "..")) + continue; + i = snprintf(t, PATH_MAX - 1, "%s/%s/flags", sysfs_class_net, + de->d_name); + if (i < 0 || i >= PATH_MAX - 1) + continue; + t[i] = '\0'; + fd = open(t, O_RDONLY); + if (fd < 0) { + perror(t); + continue; + } + i = read(fd, &p, sizeof(p) - 1); + close(fd); + if (i < 0) { + perror(t); + continue; + } + p[i] = '\0'; + flags = strtoul(p, NULL, 0); + /* Heuristic for if this is a reasonable boot interface. + This is the same + logic the in-kernel ipconfig uses... */ + if (!(flags & IFF_LOOPBACK) && + (flags & (IFF_BROADCAST | IFF_POINTOPOINT))) { + dprintf("Trying to bring up %s\n", de->d_name); + + dev = add_device(de->d_name); + if (!dev) + continue; + bringup_one_dev(template, dev); + } + } + closedir(d); + return 1; +} + +static int check_autoconfig(void) +{ + int ndev = 0, nauto = 0; + struct state *s; + + for (s = slist; s; s = s->next) { + ndev++; + if (s->dev->caps) + nauto++; + } + + if (ndev == 0) { + if (configured == 0) { + fprintf(stderr, "%s: no devices to configure\n", + progname); + longjmp(abort_buf, 1); + } + } + + return nauto; +} + +static void set_vendor_identifier(const char *id) +{ + int len = strlen(id); + if (len >= 255) { + fprintf(stderr, + "%s: invalid vendor class identifier: " + "%s\n", progname, id); + longjmp(abort_buf, 1); + } + memcpy(vendor_class_identifier+2, id, len); + vendor_class_identifier[0] = 60; + vendor_class_identifier[1] = len; + vendor_class_identifier_len = len+2; +} + +int main(int argc, char *argv[]) + __attribute__ ((weak, alias("ipconfig_main"))); + +int ipconfig_main(int argc, char *argv[]) +{ + struct netdev *dev; + int c, port; + int err = 0; + + /* If progname is set we're invoked from another program */ + if (!progname) { + struct timeval now; + progname = argv[0]; + gettimeofday(&now, NULL); + srand48(now.tv_usec ^ (now.tv_sec << 24)); + } + + if ((err = setjmp(abort_buf))) + return err; + + /* Default vendor identifier */ + set_vendor_identifier("Linux ipconfig"); + + do { + c = getopt(argc, argv, "c:d:i:onp:t:"); + if (c == EOF) + break; + + switch (c) { + case 'c': + default_caps = parse_proto(optarg); + break; + case 'p': + port = atoi(optarg); + if (port <= 0 || port > USHRT_MAX) { + fprintf(stderr, + "%s: invalid port number %d\n", + progname, port); + longjmp(abort_buf, 1); + } + cfg_local_port = port; + cfg_remote_port = cfg_local_port - 1; + break; + case 't': + loop_timeout = atoi(optarg); + if (loop_timeout < 0) { + fprintf(stderr, + "%s: invalid timeout %d\n", + progname, loop_timeout); + longjmp(abort_buf, 1); + } + break; + case 'i': + set_vendor_identifier(optarg); + break; + case 'o': + bringup_first = 1; + break; + case 'n': + do_not_config = 1; + break; + case 'd': + dev = add_device(optarg); + if (dev) + bringup_device(dev); + break; + case '?': + fprintf(stderr, "%s: invalid option -%c\n", + progname, optopt); + longjmp(abort_buf, 1); + } + } while (1); + + for (c = optind; c < argc; c++) { + dev = add_device(argv[c]); + if (dev) + bringup_device(dev); + } + + if (check_autoconfig()) { + if (cfg_local_port != LOCAL_PORT) { + printf("IP-Config: binding source port to %d, " + "dest to %d\n", + cfg_local_port, cfg_remote_port); + } + err = loop(); + } + + return err; +} diff --git a/usr/kinit/ipconfig/netdev.c b/usr/kinit/ipconfig/netdev.c new file mode 100644 index 0000000..de87f96 --- /dev/null +++ b/usr/kinit/ipconfig/netdev.c @@ -0,0 +1,279 @@ +/* + * ioctl-based device configuration + */ +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <netinet/in.h> +#include <linux/route.h> +#include <linux/sockios.h> + +#include "netdev.h" + +static int cfd = -1; + +static void copy_name(struct netdev *dev, struct ifreq *ifr) +{ + strncpy(ifr->ifr_name, dev->name, sizeof(ifr->ifr_name)); + ifr->ifr_name[sizeof(ifr->ifr_name) - 1] = '\0'; +} + +int netdev_getflags(struct netdev *dev, short *flags) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) { + perror("SIOCGIFFLAGS"); + return -1; + } + + *flags = ifr.ifr_flags; + return 0; +} + +static int netdev_sif_addr(struct ifreq *ifr, int cmd, uint32_t addr) +{ + struct sockaddr_in sin; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = addr; + + memcpy(&ifr->ifr_addr, &sin, sizeof sin); + + return ioctl(cfd, cmd, ifr); +} + +int netdev_setaddress(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (dev->ip_addr != INADDR_ANY && + netdev_sif_addr(&ifr, SIOCSIFADDR, dev->ip_addr) == -1) { + perror("SIOCSIFADDR"); + return -1; + } + + if (dev->ip_broadcast != INADDR_ANY && + netdev_sif_addr(&ifr, SIOCSIFBRDADDR, dev->ip_broadcast) == -1) { + perror("SIOCSIFBRDADDR"); + return -1; + } + + if (dev->ip_netmask != INADDR_ANY && + netdev_sif_addr(&ifr, SIOCSIFNETMASK, dev->ip_netmask) == -1) { + perror("SIOCSIFNETMASK"); + return -1; + } + + return 0; +} + +static void set_s_addr(struct sockaddr *saddr, uint32_t ipaddr) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = ipaddr, + }; + memcpy(saddr, &sin, sizeof sin); +} + +int netdev_setroutes(struct netdev *dev) +{ + struct rtentry r; + + /* RFC3442 demands: + If the DHCP server returns both a Classless Static Routes option and + a Router option, the DHCP client MUST ignore the Router option. */ + if (dev->routes != NULL) { + struct route *cur; + for (cur = dev->routes; cur != NULL; cur = cur->next) { + memset(&r, 0, sizeof(r)); + + r.rt_dev = dev->name; + set_s_addr(&r.rt_dst, cur->subnet); + set_s_addr(&r.rt_gateway, cur->gateway); + set_s_addr(&r.rt_genmask, netdev_genmask(cur->netmask_width)); + r.rt_flags = RTF_UP; + if (cur->gateway != 0) { + r.rt_flags |= RTF_GATEWAY; + } + + if (ioctl(cfd, SIOCADDRT, &r) == -1 && errno != EEXIST) { + perror("SIOCADDRT"); + return -1; + } + } + } else if (dev->ip_gateway != INADDR_ANY) { + memset(&r, 0, sizeof(r)); + + set_s_addr(&r.rt_dst, INADDR_ANY); + set_s_addr(&r.rt_gateway, dev->ip_gateway); + set_s_addr(&r.rt_genmask, INADDR_ANY); + r.rt_flags = RTF_UP | RTF_GATEWAY; + + if (ioctl(cfd, SIOCADDRT, &r) == -1 && errno != EEXIST) { + perror("SIOCADDRT"); + return -1; + } + } + return 0; +} + +int netdev_setmtu(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + ifr.ifr_mtu = dev->mtu; + + return ioctl(cfd, SIOCSIFMTU, &ifr); +} + +static int netdev_gif_addr(struct ifreq *ifr, int cmd, uint32_t * ptr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; + + if (ioctl(cfd, cmd, ifr) == -1) + return -1; + + *ptr = sin->sin_addr.s_addr; + + return 0; +} + +int netdev_up(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) { + perror("SIOCGIFFLAGS"); + return -1; + } + + ifr.ifr_flags |= IFF_UP; + + if (ioctl(cfd, SIOCSIFFLAGS, &ifr) == -1) { + perror("SIOCSIFFLAGS"); + return -1; + } + return 0; +} + +int netdev_down(struct netdev *dev) +{ + struct ifreq ifr; + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) { + perror("SIOCGIFFLAGS"); + return -1; + } + + ifr.ifr_flags &= ~IFF_UP; + + if (ioctl(cfd, SIOCSIFFLAGS, &ifr) == -1) { + perror("SIOCSIFFLAGS"); + return -1; + } + return 0; +} + +int netdev_init_if(struct netdev *dev) +{ + struct ifreq ifr; + + if (cfd == -1) + cfd = socket(AF_INET, SOCK_DGRAM, 0); + if (cfd == -1) { + fprintf(stderr, "ipconfig: %s: socket(AF_INET): %s\n", + dev->name, strerror(errno)); + return -1; + } + + copy_name(dev, &ifr); + + if (ioctl(cfd, SIOCGIFINDEX, &ifr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFINDEX: %s\n", + dev->name, strerror(errno)); + return -1; + } + + dev->ifindex = ifr.ifr_ifindex; + + if (ioctl(cfd, SIOCGIFMTU, &ifr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFMTU: %s\n", + dev->name, strerror(errno)); + return -1; + } + + dev->mtu = ifr.ifr_mtu; + + if (ioctl(cfd, SIOCGIFHWADDR, &ifr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFHWADDR: %s\n", + dev->name, strerror(errno)); + return -1; + } + + dev->hwtype = ifr.ifr_hwaddr.sa_family; + dev->hwlen = 0; + + switch (dev->hwtype) { + case ARPHRD_ETHER: + dev->hwlen = 6; + break; + case ARPHRD_EUI64: + dev->hwlen = 8; + break; + case ARPHRD_LOOPBACK: + dev->hwlen = 0; + break; + default: + return -1; + } + + memcpy(dev->hwaddr, ifr.ifr_hwaddr.sa_data, dev->hwlen); + memset(dev->hwbrd, 0xff, dev->hwlen); + + /* + * Try to get the current interface information. + */ + if (dev->ip_addr == INADDR_NONE && + netdev_gif_addr(&ifr, SIOCGIFADDR, &dev->ip_addr) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFADDR: %s\n", + dev->name, strerror(errno)); + dev->ip_addr = 0; + dev->ip_broadcast = 0; + dev->ip_netmask = 0; + return 0; + } + + if (dev->ip_broadcast == INADDR_NONE && + netdev_gif_addr(&ifr, SIOCGIFBRDADDR, &dev->ip_broadcast) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFBRDADDR: %s\n", + dev->name, strerror(errno)); + dev->ip_broadcast = 0; + } + + if (dev->ip_netmask == INADDR_NONE && + netdev_gif_addr(&ifr, SIOCGIFNETMASK, &dev->ip_netmask) == -1) { + fprintf(stderr, "ipconfig: %s: SIOCGIFNETMASK: %s\n", + dev->name, strerror(errno)); + dev->ip_netmask = 0; + } + + return 0; +} diff --git a/usr/kinit/ipconfig/netdev.h b/usr/kinit/ipconfig/netdev.h new file mode 100644 index 0000000..dbc80cd --- /dev/null +++ b/usr/kinit/ipconfig/netdev.h @@ -0,0 +1,107 @@ +#ifndef IPCONFIG_NETDEV_H +#define IPCONFIG_NETDEV_H + +#include <arpa/inet.h> +#include <sys/utsname.h> +#include <net/if.h> + +#define BPLEN 256 +#define FNLEN 128 /* from DHCP RFC 2131 */ + +struct route { + uint32_t subnet; /* subnet */ + uint32_t netmask_width; /* subnet mask width */ + uint32_t gateway; /* gateway */ + struct route *next; +}; + +struct netdev { + char *name; /* Device name */ + unsigned int ifindex; /* interface index */ + unsigned int hwtype; /* ARPHRD_xxx */ + unsigned int hwlen; /* HW address length */ + uint8_t hwaddr[16]; /* HW address */ + uint8_t hwbrd[16]; /* Broadcast HW address */ + unsigned int mtu; /* Device mtu */ + unsigned int caps; /* Capabilities */ + time_t open_time; + + struct { /* BOOTP/DHCP info */ + int fd; + uint32_t xid; + uint32_t gateway; /* BOOTP/DHCP gateway */ + } bootp; + + struct { /* RARP information */ + int fd; + } rarp; + + uint8_t proto; /* a protocol used (e.g. PROTO_DHCP) */ + uint32_t ip_addr; /* my address */ + uint32_t ip_broadcast; /* broadcast address */ + uint32_t ip_server; /* server address */ + uint32_t ip_netmask; /* my subnet mask */ + uint32_t ip_gateway; /* my gateway */ + uint32_t ip_nameserver[2]; /* two nameservers */ + uint32_t serverid; /* dhcp serverid */ + uint32_t dhcpleasetime; /* duration in seconds */ + char reqhostname[SYS_NMLN]; /* requested hostname */ + char hostname[SYS_NMLN]; /* hostname */ + char dnsdomainname[SYS_NMLN]; /* dns domain name */ + char nisdomainname[SYS_NMLN]; /* nis domain name */ + char bootpath[BPLEN]; /* boot path */ + char filename[FNLEN]; /* filename */ + char *domainsearch; /* decoded, NULL or malloc-ed */ + struct route *routes; /* decoded, NULL or malloc-ed list */ + long uptime; /* when complete configuration */ + int pkt_fd; /* packet socket for this interface */ + struct netdev *next; /* next configured i/f */ +}; + +extern struct netdev *ifaces; + +/* + * Device capabilities + */ +#define CAP_BOOTP (1<<0) +#define CAP_DHCP (1<<1) +#define CAP_RARP (1<<2) + +/* + * Device states + */ +#define DEVST_UP 0 +#define DEVST_BOOTP 1 +#define DEVST_DHCPDISC 2 +#define DEVST_DHCPREQ 3 +#define DEVST_COMPLETE 4 +#define DEVST_ERROR 5 + +int netdev_getflags(struct netdev *dev, short *flags); +int netdev_setaddress(struct netdev *dev); +int netdev_setroutes(struct netdev *dev); +int netdev_up(struct netdev *dev); +int netdev_down(struct netdev *dev); +int netdev_init_if(struct netdev *dev); +int netdev_setmtu(struct netdev *dev); + +static inline int netdev_running(struct netdev *dev) +{ + short flags; + int ret = netdev_getflags(dev, &flags); + + return ret ? 0 : !!(flags & IFF_RUNNING); +} + +static inline uint32_t netdev_genmask(uint32_t netmask_width) +{ + /* Map netmask width to network mask in network byte order. + Example: 24 -> "255.255.255.0" -> htonl(0xFFFFFF00) */ + if (netmask_width == 0) { + return 0; + } else { + return htonl(~((1u << (32 - netmask_width)) - 1)); + } +} + +#endif /* IPCONFIG_NETDEV_H */ diff --git a/usr/kinit/ipconfig/packet.c b/usr/kinit/ipconfig/packet.c new file mode 100644 index 0000000..2e1487d --- /dev/null +++ b/usr/kinit/ipconfig/packet.c @@ -0,0 +1,278 @@ +#include <errno.h>/*XXX*/ +/* + * Packet socket handling glue. + */ +#include <sys/types.h> +#include <sys/socket.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <net/if_packet.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netpacket/packet.h> +#include <asm/byteorder.h> +#include <arpa/inet.h> +#include <netinet/ip.h> +#include <netinet/udp.h> + +#include "ipconfig.h" +#include "netdev.h" +#include "packet.h" + +uint16_t cfg_local_port = LOCAL_PORT; +uint16_t cfg_remote_port = REMOTE_PORT; + +int packet_open(struct netdev *dev) +{ + struct sockaddr_ll sll; + int fd, rv, one = 1; + + /* + * Get a PACKET socket for IP traffic. + */ + fd = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IP)); + if (fd == -1) { + perror("socket"); + return -1; + } + + /* + * We want to broadcast + */ + if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one, + sizeof(one)) == -1) { + perror("SO_BROADCAST"); + close(fd); + return -1; + } + + memset(&sll, 0, sizeof(sll)); + sll.sll_family = AF_PACKET; + sll.sll_ifindex = dev->ifindex; + + rv = bind(fd, (struct sockaddr *)&sll, sizeof(sll)); + if (-1 == rv) { + perror("bind"); + close(fd); + return -1; + } + + dev->pkt_fd = fd; + return fd; +} + +void packet_close(struct netdev *dev) +{ + close(dev->pkt_fd); + dev->pkt_fd = -1; +} + +static unsigned int ip_checksum(uint16_t *hdr, int len) +{ + unsigned int chksum = 0; + + while (len) { + chksum += *hdr++; + chksum += *hdr++; + len--; + } + chksum = (chksum & 0xffff) + (chksum >> 16); + chksum = (chksum & 0xffff) + (chksum >> 16); + return (~chksum) & 0xffff; +} + +struct header { + struct iphdr ip; + struct udphdr udp; +} __attribute__ ((packed, aligned(4))); + +static struct header ipudp_hdrs = { + .ip = { + .ihl = 5, + .version = IPVERSION, + .frag_off = __constant_htons(IP_DF), + .ttl = 64, + .protocol = IPPROTO_UDP, + .saddr = INADDR_ANY, + .daddr = INADDR_BROADCAST, + }, + .udp = { + .source = __constant_htons(LOCAL_PORT), + .dest = __constant_htons(REMOTE_PORT), + .len = 0, + .check = 0, + }, +}; + +#ifdef DEBUG /* Only used with dprintf() */ +static char *ntoa(uint32_t addr) +{ + struct in_addr in = { addr }; + return inet_ntoa(in); +} +#endif /* DEBUG */ + +/* + * Send a packet. The options are listed in iov[1...iov_len-1]. + * iov[0] is reserved for the bootp packet header. + */ +int packet_send(struct netdev *dev, struct iovec *iov, int iov_len) +{ + struct sockaddr_ll sll; + struct msghdr msg; + int i, len = 0; + + memset(&sll, 0, sizeof(sll)); + msg.msg_name = &sll; + msg.msg_namelen = sizeof(sll); + msg.msg_iov = iov; + msg.msg_iovlen = iov_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + if (cfg_local_port != LOCAL_PORT) { + ipudp_hdrs.udp.source = htons(cfg_local_port); + ipudp_hdrs.udp.dest = htons(cfg_remote_port); + } + + dprintf("\n udp src %d dst %d", ntohs(ipudp_hdrs.udp.source), + ntohs(ipudp_hdrs.udp.dest)); + + dprintf("\n ip src %s ", ntoa(ipudp_hdrs.ip.saddr)); + dprintf("dst %s ", ntoa(ipudp_hdrs.ip.daddr)); + + /* + * Glue in the ip+udp header iovec + */ + iov[0].iov_base = &ipudp_hdrs; + iov[0].iov_len = sizeof(struct header); + + for (i = 0; i < iov_len; i++) + len += iov[i].iov_len; + + sll.sll_family = AF_PACKET; + sll.sll_protocol = htons(ETH_P_IP); + sll.sll_ifindex = dev->ifindex; + sll.sll_hatype = dev->hwtype; + sll.sll_pkttype = PACKET_BROADCAST; + sll.sll_halen = dev->hwlen; + memcpy(sll.sll_addr, dev->hwbrd, dev->hwlen); + + ipudp_hdrs.ip.tot_len = htons(len); + ipudp_hdrs.ip.check = 0; + ipudp_hdrs.ip.check = ip_checksum((uint16_t *) &ipudp_hdrs.ip, + ipudp_hdrs.ip.ihl); + + ipudp_hdrs.udp.len = htons(len - sizeof(struct iphdr)); + + dprintf("\n bytes %d\n", len); + + return sendmsg(dev->pkt_fd, &msg, 0); +} + +void packet_discard(struct netdev *dev) +{ + struct iphdr iph; + struct sockaddr_ll sll; + socklen_t sllen = sizeof(sll); + + sll.sll_ifindex = dev->ifindex; + + recvfrom(dev->pkt_fd, &iph, sizeof(iph), 0, + (struct sockaddr *)&sll, &sllen); +} + +/* + * Receive a bootp packet. The options are listed in iov[1...iov_len]. + * iov[0] must point to the bootp packet header. + * Returns: + * -1 = Error, try again later +* 0 = Discarded packet (non-DHCP/BOOTP traffic) + * >0 = Size of packet + */ +int packet_recv(struct netdev *dev, struct iovec *iov, int iov_len) +{ + struct iphdr *ip, iph; + struct udphdr *udp; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = iov, + .msg_iovlen = iov_len, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + int ret, iphl; + struct sockaddr_ll sll; + socklen_t sllen = sizeof(sll); + + sll.sll_ifindex = dev->ifindex; + msg.msg_name = &sll; + msg.msg_namelen = sllen; + + ret = recvfrom(dev->pkt_fd, &iph, sizeof(struct iphdr), + MSG_PEEK, (struct sockaddr *)&sll, &sllen); + if (ret == -1) + return -1; + + if (iph.ihl < 5 || iph.version != IPVERSION) + goto discard_pkt; + + iphl = iph.ihl * 4; + + ip = malloc(iphl + sizeof(struct udphdr)); + if (!ip) + goto discard_pkt; + + udp = (struct udphdr *)((char *)ip + iphl); + + iov[0].iov_base = ip; + iov[0].iov_len = iphl + sizeof(struct udphdr); + + ret = recvmsg(dev->pkt_fd, &msg, 0); + if (ret == -1) + goto free_pkt; + + dprintf("<- bytes %d ", ret); + + if (ip_checksum((uint16_t *) ip, ip->ihl) != 0) + goto free_pkt; + + dprintf("\n ip src %s ", ntoa(ip->saddr)); + dprintf("dst %s ", ntoa(ip->daddr)); + + if (ntohs(ip->tot_len) > ret || ip->protocol != IPPROTO_UDP) + goto free_pkt; + + ret -= 4 * ip->ihl; + + dprintf("\n udp src %d dst %d ", ntohs(udp->source), + ntohs(udp->dest)); + + if (udp->source != htons(cfg_remote_port) || + udp->dest != htons(cfg_local_port)) + goto free_pkt; + + if (ntohs(udp->len) > ret) + goto free_pkt; + + ret -= sizeof(struct udphdr); + + free(ip); + + return ret; + +free_pkt: + dprintf("freed\n"); + free(ip); + return 0; + +discard_pkt: + dprintf("discarded\n"); + packet_discard(dev); + return 0; +} diff --git a/usr/kinit/ipconfig/packet.h b/usr/kinit/ipconfig/packet.h new file mode 100644 index 0000000..4367efe --- /dev/null +++ b/usr/kinit/ipconfig/packet.h @@ -0,0 +1,12 @@ +#ifndef IPCONFIG_PACKET_H +#define IPCONFIG_PACKET_H + +struct iovec; + +int packet_open(struct netdev *dev); +void packet_close(struct netdev *dev); +int packet_send(struct netdev *dev, struct iovec *iov, int iov_len); +void packet_discard(struct netdev *dev); +int packet_recv(struct netdev *dev, struct iovec *iov, int iov_len); + +#endif /* IPCONFIG_PACKET_H */ diff --git a/usr/kinit/kinit.c b/usr/kinit/kinit.c new file mode 100644 index 0000000..28d2953 --- /dev/null +++ b/usr/kinit/kinit.c @@ -0,0 +1,331 @@ +#include <sys/mount.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <alloca.h> +#include <limits.h> +#include <ctype.h> +#include <termios.h> + +#include "kinit.h" +#include "ipconfig.h" +#include "run-init.h" +#include "resume.h" + +const char *progname = "kinit"; +int mnt_procfs; +int mnt_sysfs; + +#ifdef DEBUG +void dump_args(int argc, char *argv[]) +{ + int i; + + printf(" argc == %d\n", argc); + + for (i = 0; i < argc; i++) + printf(" argv[%d]: \"%s\"\n", i, argv[i]); + + if (argv[argc] != NULL) + printf(" argv[%d]: \"%s\" (SHOULD BE NULL)\n", + argc, argv[argc]); +} +#endif /* DEBUG */ + + +static int do_ipconfig(int argc, char *argv[]) +{ + int i, a = 0; + char **args = alloca((argc + 3) * sizeof(char *)); + + if (!args) + return -1; + + args[a++] = (char *)"IP-Config"; + args[a++] = (char *)"-i"; + args[a++] = (char *)"Linux kinit"; + + dprintf("Running ipconfig\n"); + + for (i = 1; i < argc; i++) { + if (strncmp(argv[i], "ip=", 3) == 0 || + strncmp(argv[i], "nfsaddrs=", 9) == 0) { + args[a++] = argv[i]; + } + } + + if (a > 1) { + args[a] = NULL; + dump_args(a, args); + return ipconfig_main(a, args); + } + + return 0; +} + +static int split_cmdline(int cmdcmax, char *cmdv[], char *argv0, + char *cmdlines[], char *args[]) +{ + int was_space; + char c, *p; + int vmax = cmdcmax; + int v = 1; + int space; + + if (cmdv) + cmdv[0] = argv0; + + /* First, add the parsable command lines */ + + while (*cmdlines) { + p = *cmdlines++; + was_space = 1; + while (v < vmax) { + c = *p; + space = isspace(c); + if ((space || !c) && !was_space) { + if (cmdv) + *p = '\0'; + v++; + } else if (was_space) { + if (cmdv) + cmdv[v] = p; + } + + if (!c) + break; + + was_space = space; + p++; + } + } + + /* Second, add the explicit command line arguments */ + + while (*args && v < vmax) { + if (cmdv) + cmdv[v] = *args; + v++; + args++; + } + + if (cmdv) + cmdv[v] = NULL; + + return v; +} + +static int mount_sys_fs(const char *check, const char *fsname, + const char *fstype) +{ + struct stat st; + + if (stat(check, &st) == 0) + return 0; + + mkdir(fsname, 0555); + + if (mount("none", fsname, fstype, 0, NULL) == -1) { + fprintf(stderr, "%s: could not mount %s as %s\n", + progname, fsname, fstype); + return -1; + } + + return 1; +} + +static void check_path(const char *path) +{ + struct stat st; + + if (stat(path, &st) == -1) { + if (errno != ENOENT) { + perror("stat"); + exit(1); + } + if (mkdir(path, 0755) == -1) { + perror("mkdir"); + exit(1); + } + } else if (!S_ISDIR(st.st_mode)) { + fprintf(stderr, "%s: '%s' not a directory\n", progname, path); + exit(1); + } +} + +static const char *find_init(const char *root, const char *user) +{ + const char *init_paths[] = { + "/sbin/init", "/bin/init", "/etc/init", "/bin/sh", NULL + }; + const char **p; + const char *path; + + if (chdir(root)) { + perror("chdir"); + exit(1); + } + + if (user) + dprintf("Checking for init: %s\n", user); + + if (user && user[0] == '/' && !access(user+1, X_OK)) { + path = user; + } else { + for (p = init_paths; *p; p++) { + dprintf("Checking for init: %s\n", *p); + if (!access(*p+1, X_OK)) + break; + } + path = *p; + } + chdir("/"); + return path; +} + +/* This is the argc and argv we pass to init */ +const char *init_path; +int init_argc; +char **init_argv; + +extern ssize_t readfile(const char *, char **); + +int main(int argc, char *argv[]) +{ + char **cmdv, **args; + char *cmdlines[3]; + int i; + const char *errmsg; + int ret = 0; + int cmdc; + int fd; + struct timeval now; + + gettimeofday(&now, NULL); + srand48(now.tv_usec ^ (now.tv_sec << 24)); + + /* Default parameters for anything init-like we execute */ + init_argc = argc; + init_argv = alloca((argc+1)*sizeof(char *)); + memcpy(init_argv, argv, (argc+1)*sizeof(char *)); + + if ((fd = open("/dev/console", O_RDWR)) != -1) { + dup2(fd, STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + dup2(fd, STDERR_FILENO); + + if (fd > STDERR_FILENO) + close(fd); + } + + mnt_procfs = mount_sys_fs("/proc/cmdline", "/proc", "proc") >= 0; + if (!mnt_procfs) { + ret = 1; + goto bail; + } + + mnt_sysfs = mount_sys_fs("/sys/bus", "/sys", "sysfs") >= 0; + if (!mnt_sysfs) { + ret = 1; + goto bail; + } + + /* Construct the effective kernel command line. The + effective kernel command line consists of /arch.cmd, if + it exists, /proc/cmdline, plus any arguments after an -- + argument on the proper command line, in that order. */ + + ret = readfile("/arch.cmd", &cmdlines[0]); + if (ret < 0) + cmdlines[0] = ""; + + ret = readfile("/proc/cmdline", &cmdlines[1]); + if (ret < 0) { + fprintf(stderr, "%s: cannot read /proc/cmdline\n", progname); + ret = 1; + goto bail; + } + + cmdlines[2] = NULL; + + /* Find an -- argument, and if so append to the command line */ + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "--")) { + i++; + break; + } + } + args = &argv[i]; /* Points either to first argument past -- or + to the final NULL */ + + /* Count the number of arguments */ + cmdc = split_cmdline(INT_MAX, NULL, argv[0], cmdlines, args); + + /* Actually generate the cmdline array */ + cmdv = (char **)alloca((cmdc+1)*sizeof(char *)); + if (split_cmdline(cmdc, cmdv, argv[0], cmdlines, args) != cmdc) { + ret = 1; + goto bail; + } + + /* Debugging... */ + dump_args(cmdc, cmdv); + + /* Resume from suspend-to-disk, if appropriate */ + /* If successful, does not return */ + do_resume(cmdc, cmdv); + + /* Initialize networking, if applicable */ + do_ipconfig(cmdc, cmdv); + + check_path("/root"); + do_mounts(cmdc, cmdv); + + if (mnt_procfs) { + umount2("/proc", 0); + mnt_procfs = 0; + } + + if (mnt_sysfs) { + umount2("/sys", 0); + mnt_sysfs = 0; + } + + init_path = find_init("/root", get_arg(cmdc, cmdv, "init=")); + if (!init_path) { + fprintf(stderr, "%s: init not found!\n", progname); + ret = 2; + goto bail; + } + + init_argv[0] = strrchr(init_path, '/') + 1; + + errmsg = run_init("/root", "/dev/console", + get_arg(cmdc, cmdv, "drop_capabilities="), false, + false, init_path, init_argv); + + /* If run_init returned, something went bad */ + fprintf(stderr, "%s: %s: %s\n", progname, errmsg, strerror(errno)); + ret = 2; + goto bail; + +bail: + if (mnt_procfs) + umount2("/proc", 0); + + if (mnt_sysfs) + umount2("/sys", 0); + + /* + * If we get here, something bad probably happened, and the kernel + * will most likely panic. Drain console output so the user can + * figure out what happened. + */ + tcdrain(2); + tcdrain(1); + + return ret; +} diff --git a/usr/kinit/kinit.h b/usr/kinit/kinit.h new file mode 100644 index 0000000..ee006f4 --- /dev/null +++ b/usr/kinit/kinit.h @@ -0,0 +1,70 @@ +/* + * kinit/kinit.h + */ + +#ifndef KINIT_H +#define KINIT_H + +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> + +int do_mounts(int argc, char *argv[]); +int mount_nfs_root(int argc, char *argv[], int flags); +int ramdisk_load(int argc, char *argv[]); +void md_run(int argc, char *argv[]); +const char *bdevname(dev_t dev); + +extern int mnt_procfs; +extern int mnt_sysfs; + +extern int init_argc; +extern char **init_argv; +extern const char *progname; + +char *get_arg(int argc, char *argv[], const char *name); +int get_flag(int argc, char *argv[], const char *name); + +int getintfile(const char *path, long *val); + +ssize_t readfile(const char *path, char **pptr); +ssize_t freadfile(FILE *f, char **pptr); + +/* + * min()/max() macros that also do + * strict type-checking.. See the + * "unnecessary" pointer comparison. + * From the Linux kernel. + */ +#define min(x, y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#define max(x, y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + + +#ifdef DEBUG +# define dprintf printf +#else +# define dprintf(...) ((void)0) +#endif + +#ifdef DEBUG +void dump_args(int argc, char *argv[]); +#else +static inline void dump_args(int argc, char *argv[]) +{ + (void)argc; + (void)argv; +} +#endif + +int drop_capabilities(const char *caps); + +#endif /* KINIT_H */ diff --git a/usr/kinit/name_to_dev.c b/usr/kinit/name_to_dev.c new file mode 100644 index 0000000..c57b7ce --- /dev/null +++ b/usr/kinit/name_to_dev.c @@ -0,0 +1,276 @@ +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <alloca.h> +#include <inttypes.h> + +#include "do_mounts.h" +#include "kinit.h" + +#define BUF_SZ 65536 + +/* Find dev_t for e.g. "hda,NULL" or "hdb,2" */ +static dev_t try_name(char *name, int part) +{ + char path[BUF_SZ]; + char buf[BUF_SZ]; + int range; + unsigned int major_num, minor_num; + dev_t res; + char *s; + int len; + int fd; + + /* read device number from /sys/block/.../dev */ + snprintf(path, sizeof(path), "/sys/block/%s/dev", name); + fd = open(path, 0, 0); + if (fd < 0) + goto fail; + len = read(fd, buf, BUF_SZ); + close(fd); + + if (len <= 0 || len == BUF_SZ || buf[len - 1] != '\n') + goto fail; + buf[len - 1] = '\0'; + major_num = strtoul(buf, &s, 10); + if (*s != ':') + goto fail; + minor_num = strtoul(s + 1, &s, 10); + if (*s) + goto fail; + res = makedev(major_num, minor_num); + + /* if it's there and we are not looking for a partition - that's it */ + if (!part) + return res; + + /* otherwise read range from .../range */ + snprintf(path, sizeof(path), "/sys/block/%s/range", name); + fd = open(path, 0, 0); + if (fd < 0) + goto fail; + len = read(fd, buf, 32); + close(fd); + if (len <= 0 || len == 32 || buf[len - 1] != '\n') + goto fail; + buf[len - 1] = '\0'; + range = strtoul(buf, &s, 10); + if (*s) + goto fail; + + /* if partition is within range - we got it */ + if (part < range) { + dprintf("kinit: try_name %s,%d = %s\n", name, part, + bdevname(res + part)); + return res + part; + } + +fail: + return (dev_t) 0; +} + +/* + * Find dev_t for a block device based on the provided GPT partlabel. + * The partlabel to block device mapping is found by scanning all + * the entries in /sys/dev/block/, opening the uevent file and picking + * the device where the PARTNAME= entry matches partlabel. + */ +static dev_t partlabel_to_dev_t(const char *plabel) +{ + char path[BUF_SZ]; + DIR *dir; + FILE *fp; + struct dirent *dent; + char *ret; + char line[BUF_SZ]; + int match_label, major, minor; + + dir = opendir("/sys/dev/block"); + if (!dir) { + dprintf(stderr, "%s: error %i (%s) opening /sys/dev/block\n", + __func__, errno, strerror(errno)); + goto fail; + } + + while ((dent = readdir(dir)) != NULL) { + if (!strncmp(dent->d_name, ".", 1)) + continue; + snprintf(path, sizeof(path), "/sys/dev/block/%s/uevent", + dent->d_name); + + fp = fopen(path, "r"); + if (fp == NULL) { + dprintf(stderr, "kinit %s: error %i (%s) opening %s", + __func__, errno, strerror(errno), path); + continue; + } + + major = 0; + minor = 0; + match_label = 0; + while (!feof(fp)) { + ret = fgets(line, sizeof(line), fp); + if (ret == NULL) + continue; + if (!strncmp(line, "MAJOR=", 6)) + major = atoi(line+6); + if (!strncmp(line, "MINOR=", 6)) + minor = atoi(line+6); + if (!strncmp(line, "PARTNAME=", 9)) { + line[strcspn(line, "\n")] = 0; + if (!strncmp(line + 9, plabel, sizeof(line)-9)) + match_label = 1; + } + if (match_label && major && minor) { + fclose(fp); + closedir(dir); + return makedev(major, minor); + } + } + fclose(fp); + } + closedir(dir); + +fail: + return (dev_t) 0; +} + +/* + * Convert a name into device number. We accept the following variants: + * + * 1) device number in hexadecimal represents itself + * 2) device number in major:minor decimal represents itself + * 3) /dev/nfs represents Root_NFS + * 4) /dev/<disk_name> represents the device number of disk + * 5) /dev/<disk_name><decimal> represents the device number + * of partition - device number of disk plus the partition number + * 6) /dev/<disk_name>p<decimal> - same as the above, that form is + * used when disk name of partitioned disk ends on a digit. + * 7) an actual block device node in the initramfs filesystem + * 8) PARTLABEL=<name> with name being the GPT partition label. + * + * If name doesn't have fall into the categories above, we return 0. + * Driverfs is used to check if something is a disk name - it has + * all known disks under bus/block/devices. If the disk name + * contains slashes, name of driverfs node has them replaced with + * dots. try_name() does the actual checks, assuming that driverfs + * is mounted on rootfs /sys. + */ + +static inline dev_t name_to_dev_t_real(const char *name) +{ + char *p; + dev_t res = 0; + char *s; + int part; + struct stat st; + int len; + const char *devname; + char *cptr, *e1, *e2; + int major_num, minor_num; + + /* Are we a multi root line? */ + if (strchr(name, ',')) + return Root_MULTI; + + if (!strncmp(name, "PARTLABEL=", 10)) + return partlabel_to_dev_t(name + 10); + + if (name[0] == '/') { + devname = name; + } else { + char *dname = alloca(strlen(name) + 6); + sprintf(dname, "/dev/%s", name); + devname = dname; + } + + if (!stat(devname, &st) && S_ISBLK(st.st_mode)) + return st.st_rdev; + + if (strncmp(name, "/dev/", 5)) { + cptr = strchr(devname+5, ':'); + if (cptr && cptr[1] != '\0') { + /* Colon-separated decimal device number */ + *cptr = '\0'; + major_num = strtoul(devname+5, &e1, 10); + minor_num = strtoul(cptr+1, &e2, 10); + if (!*e1 && !*e2) + return makedev(major_num, minor_num); + *cptr = ':'; + } else { + /* Hexadecimal device number */ + res = (dev_t) strtoul(name, &p, 16); + if (!*p) + return res; + } + } else { + name += 5; + } + + if (!strcmp(name, "nfs")) + return Root_NFS; + + if (!strcmp(name, "ram")) /* /dev/ram - historic alias for /dev/ram0 */ + return Root_RAM0; + + if (!strncmp(name, "mtd", 3)) + return Root_MTD; + + len = strlen(name); + s = alloca(len + 1); + memcpy(s, name, len + 1); + + for (p = s; *p; p++) + if (*p == '/') + *p = '!'; + res = try_name(s, 0); + if (res) + return res; + + while (p > s && isdigit(p[-1])) + p--; + if (p == s || !*p || *p == '0') + goto fail; + part = strtoul(p, NULL, 10); + *p = '\0'; + res = try_name(s, part); + if (res) + return res; + + if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') + goto fail; + p[-1] = '\0'; + res = try_name(s, part); + return res; + +fail: + return (dev_t) 0; +} + +dev_t name_to_dev_t(const char *name) +{ + dev_t dev = name_to_dev_t_real(name); + + dprintf("kinit: name_to_dev_t(%s) = %s\n", name, bdevname(dev)); + return dev; +} + +#ifdef TEST_NAMETODEV /* Standalone test */ + +int main(int argc, char *argv[]) +{ + int i; + + for (i = 1; i < argc; i++) + name_to_dev_t(argv[i]); + + return 0; +} + +#endif diff --git a/usr/kinit/nfsmount/Kbuild b/usr/kinit/nfsmount/Kbuild new file mode 100644 index 0000000..5f34950 --- /dev/null +++ b/usr/kinit/nfsmount/Kbuild @@ -0,0 +1,31 @@ +# +# kbuild file for nfsmount +# + +static-y := static/nfsmount +#FIXME - build is broken static-y := dummypmap +shared-y := shared/nfsmount + +objs := main.o mount.o portmap.o dummypmap.o sunrpc.o + +# Create built-in.o with all .o files (used by kinit) +lib-y := $(objs) + +# .o files used for executables +static/nfsmount-y := $(objs) +shared/nfsmount-y := $(objs) + +# dummypmap uses a single .o file (rename src file?) +dummypmap-y := dummypmap_test.o + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + +clean-dirs := static shared + +# Install binary +ifdef KLIBCSHAREDFLAGS +install-y := $(shared-y) +else +install-y := $(static-y) +endif diff --git a/usr/kinit/nfsmount/README.locking b/usr/kinit/nfsmount/README.locking new file mode 100644 index 0000000..bf2e8e7 --- /dev/null +++ b/usr/kinit/nfsmount/README.locking @@ -0,0 +1,26 @@ +I have implemented portmap spoofing in klibc nfsmount (released as +klibc-0.144) This is basically a vestigial portmap daemon which gets +launched before the mount() call and then just records any +transactions it gets to a file and sends back an affirmative reply. + +There are two ways to use it (this belongs in a README file, but it's +too late at night right now): + +a) Set a fixed portnumber in /proc/sys/nfs/nlm_tcpport and +/proc/sys/nfs/nlm_udpport before calling nfsmount; once the portmapper +starts feed that fixed portnumber to pmap_set(8). In this case the +pmap_file can be /dev/null. + +b) Allow the kernel to bind to any port and use the file produced by +nfsroot to feed to pmap_set (it should be directly compatible); this +means the file needs to be transferred to a place where the "real +root" can find it before run-init. + +In either case, it is imperative that the real portmapper is launched +before any program actually tries to create locks! + +To use it: + + # We need the loopback device to be up before we do this! + ipconfig 127.0.0.1:::::lo:none + nfsroot -p pmap_file -o lock server:/pathname /realpath diff --git a/usr/kinit/nfsmount/dummypmap.c b/usr/kinit/nfsmount/dummypmap.c new file mode 100644 index 0000000..07210c5 --- /dev/null +++ b/usr/kinit/nfsmount/dummypmap.c @@ -0,0 +1,281 @@ +/* + * Enough portmapper functionality that mount doesn't hang trying + * to start lockd. Enables nfsroot with locking functionality. + * + * Note: the kernel will only speak to the local portmapper + * using RPC over UDP. + */ + +#include <sys/types.h> +#include <netinet/in.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <sys/socket.h> + +#include "dummypmap.h" +#include "sunrpc.h" + +extern const char *progname; + +struct portmap_args { + uint32_t program; + uint32_t version; + uint32_t proto; + uint32_t port; +}; + +struct portmap_call { + struct rpc_call rpc; + struct portmap_args args; +}; + +struct portmap_reply { + struct rpc_reply rpc; + uint32_t port; +}; + +static int bind_portmap(void) +{ + int sock = socket(PF_INET, SOCK_DGRAM, 0); + struct sockaddr_in sin; + + if (sock < 0) + return -1; + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(0x7f000001); /* 127.0.0.1 */ + sin.sin_port = htons(RPC_PMAP_PORT); + if (bind(sock, (struct sockaddr *)&sin, sizeof sin) < 0) { + int err = errno; + close(sock); + errno = err; + return -1; + } + + return sock; +} + +static const char *protoname(uint32_t proto) +{ + switch (ntohl(proto)) { + case IPPROTO_TCP: + return "tcp"; + case IPPROTO_UDP: + return "udp"; + default: + return NULL; + } +} + +static void *get_auth(struct rpc_auth *auth) +{ + switch (ntohl(auth->flavor)) { + case AUTH_NULL: + /* Fallthrough */ + case AUTH_UNIX: + return (char *)&auth->body + ntohl(auth->len); + default: + return NULL; + } +} + +static int check_unix_cred(struct rpc_auth *cred) +{ + uint32_t len; + int quad_len; + uint32_t node_name_len; + int quad_name_len; + uint32_t *base; + uint32_t *pos; + int ret = -1; + + len = ntohl(cred->len); + quad_len = (len + 3) >> 2; + if (quad_len < 6) + /* Malformed creds */ + goto out; + + base = pos = cred->body; + + /* Skip timestamp */ + pos++; + + /* Skip node name: only localhost can succeed. */ + node_name_len = ntohl(*pos++); + quad_name_len = (node_name_len + 3) >> 2; + if (pos + quad_name_len + 3 > base + quad_len) + /* Malformed creds */ + goto out; + pos += quad_name_len; + + /* uid must be 0 */ + if (*pos++ != 0) + goto out; + + /* gid must be 0 */ + if (*pos++ != 0) + goto out; + + /* Skip remaining gids */ + ret = 0; + +out: + return ret; +} + +static int check_cred(struct rpc_auth *cred) +{ + switch (ntohl(cred->flavor)) { + case AUTH_NULL: + return 0; + case AUTH_UNIX: + return check_unix_cred(cred); + default: + return -1; + } +} + +static int check_vrf(struct rpc_auth *vrf) +{ + return (vrf->flavor == htonl(AUTH_NULL)) ? 0 : -1; +} + +#define MAX_UDP_PACKET 65536 + +static int dummy_portmap(int sock, FILE *portmap_file) +{ + enum { PAYLOAD_SIZE = MAX_UDP_PACKET + offsetof(struct rpc_header, udp) }; + struct sockaddr_in sin; + int pktlen, addrlen; + union { + struct rpc_call rpc; + /* Max UDP packet size + unused TCP fragment size */ + char payload[PAYLOAD_SIZE]; + } pkt; + struct rpc_call *rpc = &pkt.rpc; + struct rpc_auth *cred; + struct rpc_auth *vrf; + struct portmap_args *args; + struct portmap_reply rply; + + for (;;) { + addrlen = sizeof sin; + pktlen = recvfrom(sock, &rpc->hdr.udp, MAX_UDP_PACKET, + 0, (struct sockaddr *)&sin, &addrlen); + + if (pktlen < 0) { + if (errno == EINTR) + continue; + + return -1; + } + + /* +4 to skip the TCP fragment header */ + if (pktlen + 4 < sizeof(struct portmap_call)) + continue; /* Bad packet */ + + if (rpc->hdr.udp.msg_type != htonl(RPC_CALL)) + continue; /* Bad packet */ + + memset(&rply, 0, sizeof rply); + + rply.rpc.hdr.udp.xid = rpc->hdr.udp.xid; + rply.rpc.hdr.udp.msg_type = htonl(RPC_REPLY); + + cred = (struct rpc_auth *) &rpc->cred_flavor; + if (rpc->rpc_vers != htonl(2)) { + rply.rpc.reply_state = htonl(REPLY_DENIED); + /* state <- RPC_MISMATCH == 0 */ + } else if (rpc->program != htonl(PORTMAP_PROGRAM)) { + rply.rpc.reply_state = htonl(PROG_UNAVAIL); + } else if (rpc->prog_vers != htonl(2)) { + rply.rpc.reply_state = htonl(PROG_MISMATCH); + } else if (!(vrf = get_auth(cred)) || + (char *)vrf > ((char *)&rpc->hdr.udp + pktlen - 8 - + sizeof(*args)) || + !(args = get_auth(vrf)) || + (char *)args > ((char *)&rpc->hdr.udp + pktlen - + sizeof(*args)) || + check_cred(cred) || check_vrf(vrf)) { + /* Can't deal with credentials data; the kernel + won't send them */ + rply.rpc.reply_state = htonl(SYSTEM_ERR); + } else { + switch (ntohl(rpc->proc)) { + case PMAP_PROC_NULL: + break; + case PMAP_PROC_SET: + if (args->proto == htonl(IPPROTO_TCP) || + args->proto == htonl(IPPROTO_UDP)) { + if (portmap_file) + fprintf(portmap_file, + "%u %u %s %u\n", + ntohl(args->program), + ntohl(args->version), + protoname(args->proto), + ntohl(args->port)); + rply.port = htonl(1); /* TRUE = success */ + } + break; + case PMAP_PROC_UNSET: + rply.port = htonl(1); /* TRUE = success */ + break; + case PMAP_PROC_GETPORT: + break; + case PMAP_PROC_DUMP: + break; + default: + rply.rpc.reply_state = htonl(PROC_UNAVAIL); + break; + } + } + + sendto(sock, &rply.rpc.hdr.udp, sizeof rply - 4, 0, + (struct sockaddr *)&sin, addrlen); + } +} + +pid_t start_dummy_portmap(const char *file) +{ + FILE *portmap_filep; + int sock; + pid_t spoof_portmap; + + portmap_filep = fopen(file, "w"); + if (!portmap_filep) { + fprintf(stderr, "%s: cannot write portmap file: %s\n", + progname, file); + return -1; + } + + sock = bind_portmap(); + if (sock == -1) { + if (errno == EINVAL || errno == EADDRINUSE) + return 0; /* Assume not needed */ + else { + fclose(portmap_filep); + fprintf(stderr, "%s: portmap spoofing failed\n", + progname); + return -1; + } + } + + spoof_portmap = fork(); + if (spoof_portmap == -1) { + fclose(portmap_filep); + fprintf(stderr, "%s: cannot fork\n", progname); + return -1; + } else if (spoof_portmap == 0) { + /* Child process */ + dummy_portmap(sock, portmap_filep); + _exit(255); /* Error */ + } else { + /* Parent process */ + close(sock); + return spoof_portmap; + } +} diff --git a/usr/kinit/nfsmount/dummypmap.h b/usr/kinit/nfsmount/dummypmap.h new file mode 100644 index 0000000..37650bf --- /dev/null +++ b/usr/kinit/nfsmount/dummypmap.h @@ -0,0 +1,11 @@ +/* + * Functions for the portmap spoofer + */ + +#ifndef NFSMOUNT_DUMMYPORTMAP_H +#define NFSMOUNT_DUMMYPORTMAP_H + +#include <unistd.h> +pid_t start_dummy_portmap(const char *file); + +#endif /* NFSMOUNT_DUMMYPORTMAP_H */ diff --git a/usr/kinit/nfsmount/dummypmap_test.c b/usr/kinit/nfsmount/dummypmap_test.c new file mode 100644 index 0000000..d81a141 --- /dev/null +++ b/usr/kinit/nfsmount/dummypmap_test.c @@ -0,0 +1,2 @@ +#define TEST +#include "dummypmap.c" diff --git a/usr/kinit/nfsmount/main.c b/usr/kinit/nfsmount/main.c new file mode 100644 index 0000000..66969f4 --- /dev/null +++ b/usr/kinit/nfsmount/main.c @@ -0,0 +1,288 @@ +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <arpa/inet.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <setjmp.h> +#include <sys/wait.h> +#include <unistd.h> +#include <klibc/sysconfig.h> /* For _KLIBC_NO_MMU */ + +#include <linux/nfs_mount.h> + +#include "nfsmount.h" +#include "sunrpc.h" +#include "dummypmap.h" + +const char *progname; +static jmp_buf abort_buf; + +static struct nfs_mount_data mount_data = { + .version = NFS_MOUNT_VERSION, + .flags = NFS_MOUNT_NONLM | NFS_MOUNT_VER3 | NFS_MOUNT_TCP, + .rsize = 0, /* Server's choice */ + .wsize = 0, /* Server's choice */ + .timeo = 0, /* Kernel client's default */ + .retrans = 3, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .namlen = NAME_MAX, +}; + +int nfs_port; +int nfs_version; + +static struct int_opts { + char *name; + int *val; +} int_opts[] = { + {"port", &nfs_port}, + {"nfsvers", &nfs_version}, + {"vers", &nfs_version}, + {"rsize", &mount_data.rsize}, + {"wsize", &mount_data.wsize}, + {"timeo", &mount_data.timeo}, + {"retrans", &mount_data.retrans}, + {"acregmin", &mount_data.acregmin}, + {"acregmax", &mount_data.acregmax}, + {"acdirmin", &mount_data.acdirmin}, + {"acdirmax", &mount_data.acdirmax}, + {NULL, NULL} +}; + +static struct bool_opts { + char *name; + int and_mask; + int or_mask; +} bool_opts[] = { + {"soft", ~NFS_MOUNT_SOFT, NFS_MOUNT_SOFT}, + {"hard", ~NFS_MOUNT_SOFT, 0}, + {"intr", ~NFS_MOUNT_INTR, NFS_MOUNT_INTR}, + {"nointr", ~NFS_MOUNT_INTR, 0}, + {"posix", ~NFS_MOUNT_POSIX, NFS_MOUNT_POSIX}, + {"noposix", ~NFS_MOUNT_POSIX, 0}, + {"cto", ~NFS_MOUNT_NOCTO, 0}, + {"nocto", ~NFS_MOUNT_NOCTO, NFS_MOUNT_NOCTO}, + {"ac", ~NFS_MOUNT_NOAC, 0}, + {"noac", ~NFS_MOUNT_NOAC, NFS_MOUNT_NOAC}, + {"lock", ~NFS_MOUNT_NONLM, 0}, + {"nolock", ~NFS_MOUNT_NONLM, NFS_MOUNT_NONLM}, + {"acl", ~NFS_MOUNT_NOACL, 0}, + {"noacl", ~NFS_MOUNT_NOACL, NFS_MOUNT_NOACL}, + {"v2", ~NFS_MOUNT_VER3, 0}, + {"v3", ~NFS_MOUNT_VER3, NFS_MOUNT_VER3}, + {"udp", ~NFS_MOUNT_TCP, 0}, + {"tcp", ~NFS_MOUNT_TCP, NFS_MOUNT_TCP}, + {"broken_suid", ~NFS_MOUNT_BROKEN_SUID, NFS_MOUNT_BROKEN_SUID}, + {"ro", ~NFS_MOUNT_KLIBC_RONLY, NFS_MOUNT_KLIBC_RONLY}, + {"rw", ~NFS_MOUNT_KLIBC_RONLY, 0}, + {NULL, 0, 0} +}; + +static int parse_int(const char *val, const char *ctx) +{ + char *end; + int ret; + + ret = (int)strtoul(val, &end, 0); + if (*val == '\0' || *end != '\0') { + fprintf(stderr, "%s: invalid value for %s\n", val, ctx); + longjmp(abort_buf, 1); + } + return ret; +} + +static void parse_opts(char *opts) +{ + char *cp, *val; + + while ((cp = strsep(&opts, ",")) != NULL) { + if (*cp == '\0') + continue; + val = strchr(cp, '='); + if (val != NULL) { + struct int_opts *opts = int_opts; + *val++ = '\0'; + while (opts->name && strcmp(opts->name, cp) != 0) + opts++; + if (opts->name) + *(opts->val) = parse_int(val, opts->name); + else { + fprintf(stderr, "%s: bad option '%s'\n", + progname, cp); + longjmp(abort_buf, 1); + } + } else { + struct bool_opts *opts = bool_opts; + while (opts->name && strcmp(opts->name, cp) != 0) + opts++; + if (opts->name) { + mount_data.flags &= opts->and_mask; + mount_data.flags |= opts->or_mask; + } else { + fprintf(stderr, "%s: bad option '%s'\n", + progname, cp); + longjmp(abort_buf, 1); + } + } + } + /* If new-style options "nfsvers=" or "vers=" are passed, override + old "v2" and "v3" options */ + if (nfs_version != 0) { + switch (nfs_version) { + case 2: + mount_data.flags &= ~NFS_MOUNT_VER3; + break; + case 3: + mount_data.flags |= NFS_MOUNT_VER3; + break; + default: + fprintf(stderr, "%s: bad NFS version '%d'\n", + progname, nfs_version); + longjmp(abort_buf, 1); + } + } +} + +static uint32_t parse_addr(const char *ip) +{ + struct in_addr in; + if (inet_aton(ip, &in) == 0) { + fprintf(stderr, "%s: can't parse IP address '%s'\n", + progname, ip); + longjmp(abort_buf, 1); + } + return in.s_addr; +} + +static void check_path(const char *path) +{ + struct stat st; + + if (stat(path, &st) == -1) { + perror("stat"); + longjmp(abort_buf, 1); + } else if (!S_ISDIR(st.st_mode)) { + fprintf(stderr, "%s: '%s' not a directory\n", progname, path); + longjmp(abort_buf, 1); + } +} + +int main(int argc, char *argv[]) + __attribute__ ((weak, alias("nfsmount_main"))); + +int nfsmount_main(int argc, char *argv[]) +{ + uint32_t server; + char *rem_name; + char *rem_path; + char *hostname; + char *path; + int c; + const char *portmap_file; + pid_t spoof_portmap; + int err, ret; + + if ((err = setjmp(abort_buf))) + return err; + + /* Set these here to avoid longjmp warning */ + portmap_file = NULL; + spoof_portmap = 0; + server = 0; + + /* If progname is set we're invoked from another program */ + if (!progname) { + struct timeval now; + progname = argv[0]; + gettimeofday(&now, NULL); + srand48(now.tv_usec ^ (now.tv_sec << 24)); + } + + while ((c = getopt(argc, argv, "o:p:")) != EOF) { + switch (c) { + case 'o': + parse_opts(optarg); + break; + case 'p': + portmap_file = optarg; + break; + case '?': + fprintf(stderr, "%s: invalid option -%c\n", + progname, optopt); + return 1; + } + } + + if (optind == argc) { + fprintf(stderr, "%s: need a path\n", progname); + return 1; + } + + hostname = rem_path = argv[optind]; + + rem_name = strdup(rem_path); + if (rem_name == NULL) { + perror("strdup"); + return 1; + } + + rem_path = strchr(rem_path, ':'); + if (rem_path == NULL) { + fprintf(stderr, "%s: need a server\n", progname); + free(rem_name); + return 1; + } + + *rem_path++ = '\0'; + + if (*rem_path != '/') { + fprintf(stderr, "%s: need a path\n", progname); + free(rem_name); + return 1; + } + + server = parse_addr(hostname); + + if (optind <= argc - 2) + path = argv[optind + 1]; + else + path = "/nfs_root"; + + check_path(path); + +#if !_KLIBC_NO_MMU + /* Note: uClinux can't fork(), so the spoof portmapper is not + available on uClinux. */ + if (portmap_file) + spoof_portmap = start_dummy_portmap(portmap_file); + + if (spoof_portmap == -1) { + free(rem_name); + return 1; + } +#endif + + ret = 0; + if (nfs_mount(rem_name, hostname, server, rem_path, path, + &mount_data) != 0) + ret = 1; + + /* If we set up the spoofer, tear it down now */ + if (spoof_portmap) { + kill(spoof_portmap, SIGTERM); + while (waitpid(spoof_portmap, NULL, 0) == -1 + && errno == EINTR) + ; + } + + free(rem_name); + + return ret; +} diff --git a/usr/kinit/nfsmount/mount.c b/usr/kinit/nfsmount/mount.c new file mode 100644 index 0000000..ae48354 --- /dev/null +++ b/usr/kinit/nfsmount/mount.c @@ -0,0 +1,347 @@ +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <linux/nfs.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "nfsmount.h" +#include "sunrpc.h" + +static uint32_t mount_port; + +struct mount_call { + struct rpc_call rpc; + uint32_t path_len; + char path[0]; +}; + +/* + * The following structure is the NFS v3 on-the-wire file handle, + * as defined in rfc1813. + * This differs from the structure used by the kernel, + * defined in <linux/nfh3.h>: rfc has a long in network order, + * kernel has a short in native order. + * Both kernel and rfc use the name nfs_fh; kernel name is + * visible to user apps in some versions of libc. + * Use different name to avoid clashes. + */ +#define NFS_MAXFHSIZE_WIRE 64 +struct nfs_fh_wire { + uint32_t size; + char data[NFS_MAXFHSIZE_WIRE]; +} __attribute__ ((packed, aligned(4))); + +struct mount_reply { + struct rpc_reply reply; + uint32_t status; + struct nfs_fh_wire fh; +} __attribute__ ((packed, aligned(4))); + +#define MNT_REPLY_MINSIZE (sizeof(struct rpc_reply) + sizeof(uint32_t)) + +static int get_ports(uint32_t server, const struct nfs_mount_data *data) +{ + uint32_t nfs_ver, mount_ver; + uint32_t proto; + + if (data->flags & NFS_MOUNT_VER3) { + nfs_ver = NFS3_VERSION; + mount_ver = NFS_MNT3_VERSION; + } else { + nfs_ver = NFS2_VERSION; + mount_ver = NFS_MNT_VERSION; + } + + proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; + + if (nfs_port == 0) { + nfs_port = portmap(server, NFS_PROGRAM, nfs_ver, proto); + if (nfs_port == 0) { + if (proto == IPPROTO_TCP) { + struct in_addr addr = { server }; + fprintf(stderr, "NFS over TCP not " + "available from %s\n", inet_ntoa(addr)); + return -1; + } + nfs_port = NFS_PORT; + } + } + + if (mount_port == 0) { + mount_port = portmap(server, NFS_MNT_PROGRAM, mount_ver, proto); + if (mount_port == 0) + mount_port = MOUNT_PORT; + } + return 0; +} + +static inline int pad_len(int len) +{ + return (len + 3) & ~3; +} + +static inline void dump_params(uint32_t server, + const char *path, + const struct nfs_mount_data *data) +{ + (void)server; + (void)path; + (void)data; + +#ifdef DEBUG + struct in_addr addr = { server }; + + printf("NFS params:\n"); + printf(" server = %s, path = \"%s\", ", inet_ntoa(addr), path); + printf("version = %d, proto = %s\n", + data->flags & NFS_MOUNT_VER3 ? 3 : 2, + (data->flags & NFS_MOUNT_TCP) ? "tcp" : "udp"); + printf(" mount_port = %d, nfs_port = %d, flags = %08x\n", + mount_port, nfs_port, data->flags); + printf(" rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", + data->rsize, data->wsize, data->timeo, data->retrans); + printf(" acreg (min, max) = (%d, %d), acdir (min, max) = (%d, %d)\n", + data->acregmin, data->acregmax, data->acdirmin, data->acdirmax); + printf(" soft = %d, intr = %d, posix = %d, nocto = %d, noac = %d\n", + (data->flags & NFS_MOUNT_SOFT) != 0, + (data->flags & NFS_MOUNT_INTR) != 0, + (data->flags & NFS_MOUNT_POSIX) != 0, + (data->flags & NFS_MOUNT_NOCTO) != 0, + (data->flags & NFS_MOUNT_NOAC) != 0); +#endif +} + +static inline void dump_fh(const char *data, int len) +{ + (void)data; + (void)len; + +#ifdef DEBUG + int i = 0; + int max = len - (len % 8); + + printf("Root file handle: %d bytes\n", NFS2_FHSIZE); + + while (i < max) { + int j; + + printf(" %4d: ", i); + for (j = 0; j < 4; j++) { + printf("%02x %02x %02x %02x ", + data[i] & 0xff, data[i + 1] & 0xff, + data[i + 2] & 0xff, data[i + 3] & 0xff); + } + i += j; + printf("\n"); + } +#endif +} + +static struct mount_reply mnt_reply; + +static int mount_call(uint32_t proc, uint32_t version, + const char *path, struct client *clnt) +{ + struct mount_call *mnt_call = NULL; + size_t path_len, call_len; + struct rpc rpc; + int ret = 0; + + path_len = strlen(path); + call_len = sizeof(*mnt_call) + pad_len(path_len); + + mnt_call = malloc(call_len); + if (mnt_call == NULL) { + perror("malloc"); + goto bail; + } + + memset(mnt_call, 0, sizeof(*mnt_call)); + + mnt_call->rpc.program = htonl(NFS_MNT_PROGRAM); + mnt_call->rpc.prog_vers = htonl(version); + mnt_call->rpc.proc = htonl(proc); + mnt_call->path_len = htonl(path_len); + memcpy(mnt_call->path, path, path_len); + + rpc.call = (struct rpc_call *)mnt_call; + rpc.call_len = call_len; + rpc.reply = (struct rpc_reply *)&mnt_reply; + rpc.reply_len = sizeof(mnt_reply); + + if (rpc_call(clnt, &rpc) < 0) + goto bail; + + if (proc != MNTPROC_MNT) + goto done; + + if (rpc.reply_len < MNT_REPLY_MINSIZE) { + fprintf(stderr, "incomplete reply: %zu < %zu\n", + rpc.reply_len, MNT_REPLY_MINSIZE); + goto bail; + } + + if (mnt_reply.status != 0) { + fprintf(stderr, "mount call failed - server replied: %s.\n", + strerror(ntohl(mnt_reply.status))); + goto bail; + } + + goto done; + +bail: + ret = -1; + +done: + if (mnt_call) + free(mnt_call); + + return ret; +} + +static int mount_v2(const char *path, + struct nfs_mount_data *data, struct client *clnt) +{ + int ret = mount_call(MNTPROC_MNT, NFS_MNT_VERSION, path, clnt); + + if (ret == 0) { + dump_fh((const char *)&mnt_reply.fh, NFS2_FHSIZE); + + data->root.size = NFS_FHSIZE; + memcpy(data->root.data, &mnt_reply.fh, NFS_FHSIZE); + memcpy(data->old_root.data, &mnt_reply.fh, NFS_FHSIZE); + } + + return ret; +} + +static inline int umount_v2(const char *path, struct client *clnt) +{ + return mount_call(MNTPROC_UMNT, NFS_MNT_VERSION, path, clnt); +} + +static int mount_v3(const char *path, + struct nfs_mount_data *data, struct client *clnt) +{ + int ret = mount_call(MNTPROC_MNT, NFS_MNT3_VERSION, path, clnt); + + if (ret == 0) { + size_t fhsize = ntohl(mnt_reply.fh.size); + + dump_fh((const char *)&mnt_reply.fh.data, fhsize); + + memset(data->old_root.data, 0, NFS_FHSIZE); + memset(&data->root, 0, sizeof(data->root)); + data->root.size = fhsize; + memcpy(&data->root.data, mnt_reply.fh.data, fhsize); + data->flags |= NFS_MOUNT_VER3; + } + + return ret; +} + +static inline int umount_v3(const char *path, struct client *clnt) +{ + return mount_call(MNTPROC_UMNT, NFS_MNT3_VERSION, path, clnt); +} + +int nfs_mount(const char *pathname, const char *hostname, + uint32_t server, const char *rem_path, const char *path, + struct nfs_mount_data *data) +{ + struct client *clnt = NULL; + struct sockaddr_in addr; + char mounted = 0; + int sock = -1; + int ret = 0; + int mountflags; + + if (get_ports(server, data) != 0) + goto bail; + + dump_params(server, rem_path, data); + + if (data->flags & NFS_MOUNT_TCP) + clnt = tcp_client(server, mount_port, CLI_RESVPORT); + else + clnt = udp_client(server, mount_port, CLI_RESVPORT); + + if (clnt == NULL) + goto bail; + + if (data->flags & NFS_MOUNT_VER3) + ret = mount_v3(rem_path, data, clnt); + else + ret = mount_v2(rem_path, data, clnt); + + if (ret == -1) + goto bail; + mounted = 1; + + if (data->flags & NFS_MOUNT_TCP) + sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + else + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); + + if (sock == -1) { + perror("socket"); + goto bail; + } + + if (bindresvport(sock, 0) == -1) { + perror("bindresvport"); + goto bail; + } + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = server; + addr.sin_port = htons(nfs_port); + memcpy(&data->addr, &addr, sizeof(data->addr)); + + strncpy(data->hostname, hostname, sizeof(data->hostname)); + + data->fd = sock; + + mountflags = (data->flags & NFS_MOUNT_KLIBC_RONLY) ? MS_RDONLY : 0; + data->flags = data->flags & NFS_MOUNT_FLAGMASK; + ret = mount(pathname, path, "nfs", mountflags, data); + + if (ret == -1) { + if (errno == ENODEV) { + fprintf(stderr, "mount: the kernel lacks NFS v%d " + "support\n", + (data->flags & NFS_MOUNT_VER3) ? 3 : 2); + } else { + perror("mount"); + } + goto bail; + } + + dprintf("Mounted %s on %s\n", pathname, path); + + goto done; + +bail: + if (mounted) { + if (data->flags & NFS_MOUNT_VER3) + umount_v3(rem_path, clnt); + else + umount_v2(rem_path, clnt); + } + + ret = -1; + +done: + if (clnt) + client_free(clnt); + + if (sock != -1) + close(sock); + + return ret; +} diff --git a/usr/kinit/nfsmount/nfsmount.h b/usr/kinit/nfsmount/nfsmount.h new file mode 100644 index 0000000..7b28ded --- /dev/null +++ b/usr/kinit/nfsmount/nfsmount.h @@ -0,0 +1,34 @@ +#ifndef NFSMOUNT_NFSMOUNT_H +#define NFSMOUNT_NFSMOUNT_H + +#include <linux/nfs_mount.h> + +extern int nfs_port; + +extern int nfsmount_main(int argc, char *argv[]); +int nfs_mount(const char *rem_name, const char *hostname, + uint32_t server, const char *rem_path, + const char *path, struct nfs_mount_data *data); + +enum nfs_proto { + v2 = 2, + v3, +}; + +/* masked with NFS_MOUNT_FLAGMASK before mount() call */ +#define NFS_MOUNT_KLIBC_RONLY 0x00010000U + +#ifdef DEBUG +# define dprintf printf +#else +# define dprintf(...) ((void)0) +#endif + +#ifndef MNTPROC_MNT +#define MNTPROC_MNT 1 +#endif +#ifndef MNTPROC_UMNT +#define MNTPROC_UMNT 3 +#endif + +#endif /* NFSMOUNT_NFSMOUNT_H */ diff --git a/usr/kinit/nfsmount/portmap.c b/usr/kinit/nfsmount/portmap.c new file mode 100644 index 0000000..0a3e2d0 --- /dev/null +++ b/usr/kinit/nfsmount/portmap.c @@ -0,0 +1,73 @@ +#include <sys/types.h> +#include <netinet/in.h> +#include <asm/byteorder.h> /* __constant_hton* */ +#include <stdio.h> +#include <stdlib.h> + +#include "nfsmount.h" +#include "sunrpc.h" + +struct portmap_call { + struct rpc_call rpc; + uint32_t program; + uint32_t version; + uint32_t proto; + uint32_t port; +}; + +struct portmap_reply { + struct rpc_reply rpc; + uint32_t port; +}; + +static struct portmap_call call = { + .rpc = { + .program = __constant_htonl(RPC_PMAP_PROGRAM), + .prog_vers = __constant_htonl(RPC_PMAP_VERSION), + .proc = __constant_htonl(PMAP_PROC_GETPORT), + } +}; + +uint32_t portmap(uint32_t server, uint32_t program, uint32_t version, uint32_t proto) +{ + struct portmap_reply reply; + struct client *clnt; + struct rpc rpc; + uint32_t port = 0; + + clnt = tcp_client(server, RPC_PMAP_PORT, 0); + if (clnt == NULL) { + clnt = udp_client(server, RPC_PMAP_PORT, 0); + if (clnt == NULL) + goto bail; + } + + call.program = htonl(program); + call.version = htonl(version); + call.proto = htonl(proto); + + rpc.call = (struct rpc_call *)&call; + rpc.call_len = sizeof(call); + rpc.reply = (struct rpc_reply *)&reply; + rpc.reply_len = sizeof(reply); + + if (rpc_call(clnt, &rpc) < 0) + goto bail; + + if (rpc.reply_len < sizeof(reply)) { + fprintf(stderr, "incomplete reply: %zu < %zu\n", + rpc.reply_len, sizeof(reply)); + goto bail; + } + + port = ntohl(reply.port); + +bail: + dprintf("Port for %d/%d[%s]: %d\n", program, version, + proto == IPPROTO_TCP ? "tcp" : "udp", port); + + if (clnt) + client_free(clnt); + + return port; +} diff --git a/usr/kinit/nfsmount/sunrpc.c b/usr/kinit/nfsmount/sunrpc.c new file mode 100644 index 0000000..0a7fcf5 --- /dev/null +++ b/usr/kinit/nfsmount/sunrpc.c @@ -0,0 +1,252 @@ +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <poll.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> + +#include "nfsmount.h" +#include "sunrpc.h" + +/* + * The magic offset is needed here because RPC over TCP includes a + * field that RPC over UDP doesn't. Luvverly. + */ +static int rpc_do_reply(struct client *clnt, struct rpc *rpc, size_t off) +{ + int ret; + + if ((ret = read(clnt->sock, + ((char *)rpc->reply) + off, + rpc->reply_len - off)) == -1) { + perror("read"); + goto bail; + } else if (ret < sizeof(struct rpc_reply) - off) { + fprintf(stderr, "short read: %d < %zu\n", ret, + sizeof(struct rpc_reply) - off); + goto bail; + } + rpc->reply_len = ret + off; + + if ((!off && !(ntohl(rpc->reply->hdr.frag_hdr) & LAST_FRAG)) || + rpc->reply->hdr.udp.xid != rpc->call->hdr.udp.xid || + rpc->reply->hdr.udp.msg_type != htonl(RPC_REPLY)) { + fprintf(stderr, "bad reply\n"); + goto bail; + } + + if (ntohl(rpc->reply->state) != REPLY_OK) { + fprintf(stderr, "rpc failed: %d\n", ntohl(rpc->reply->state)); + goto bail; + } + + ret = 0; + goto done; + +bail: + ret = -1; +done: + return ret; +} + +static void rpc_header(struct client *clnt, struct rpc *rpc) +{ + (void)clnt; + + rpc->call->hdr.frag_hdr = htonl(LAST_FRAG | (rpc->call_len - 4)); + rpc->call->hdr.udp.xid = lrand48(); + rpc->call->hdr.udp.msg_type = htonl(RPC_CALL); + rpc->call->rpc_vers = htonl(2); +} + +static int rpc_call_tcp(struct client *clnt, struct rpc *rpc) +{ + int ret; + + rpc_header(clnt, rpc); + + if ((ret = write(clnt->sock, rpc->call, rpc->call_len)) == -1) { + perror("write"); + goto bail; + } else if (ret < rpc->call_len) { + fprintf(stderr, "short write: %d < %zu\n", ret, rpc->call_len); + goto bail; + } + + ret = rpc_do_reply(clnt, rpc, 0); + goto done; + + bail: + ret = -1; + + done: + return ret; +} + +static int rpc_call_udp(struct client *clnt, struct rpc *rpc) +{ +#define NR_FDS 1 +#define TIMEOUT_MS 3000 +#define MAX_TRIES 100 +#define UDP_HDR_OFF (sizeof(struct rpc_header) - sizeof(struct rpc_udp_header)) + struct pollfd fds[NR_FDS]; + int ret = -1; + int i; + + rpc_header(clnt, rpc); + + fds[0].fd = clnt->sock; + fds[0].events = POLLRDNORM; + + rpc->call_len -= UDP_HDR_OFF; + + for (i = 0; i < MAX_TRIES; i++) { + int timeout_ms = TIMEOUT_MS + (lrand48() % (TIMEOUT_MS / 2)); + if ((ret = write(clnt->sock, + ((char *)rpc->call) + UDP_HDR_OFF, + rpc->call_len)) == -1) { + perror("write"); + goto bail; + } else if (ret < rpc->call_len) { + fprintf(stderr, "short write: %d < %zu\n", ret, + rpc->call_len); + goto bail; + } + for (; i < MAX_TRIES; i++) { + if ((ret = poll(fds, NR_FDS, timeout_ms)) == -1) { + perror("poll"); + goto bail; + } + if (ret == 0) { + dprintf("Timeout #%d\n", i + 1); + break; + } + if ((ret = rpc_do_reply(clnt, rpc, UDP_HDR_OFF)) == 0) { + goto done; + } else { + dprintf("Failed on try #%d - retrying\n", + i + 1); + } + } + } + + bail: + ret = -1; + + done: + return ret; +} + +struct client *tcp_client(uint32_t server, uint16_t port, uint32_t flags) +{ + struct client *clnt = malloc(sizeof(*clnt)); + struct sockaddr_in addr; + int sock; + + if (clnt == NULL) { + perror("malloc"); + goto bail; + } + + memset(clnt, 0, sizeof(*clnt)); + + if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) { + perror("socket"); + goto bail; + } + + if ((flags & CLI_RESVPORT) && bindresvport(sock, 0) == -1) { + perror("bindresvport"); + goto bail; + } + + clnt->sock = sock; + clnt->call_stub = rpc_call_tcp; + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = server; + + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + perror("connect"); + goto bail; + } + + goto done; + bail: + if (clnt) { + free(clnt); + clnt = NULL; + } + done: + return clnt; +} + +struct client *udp_client(uint32_t server, uint16_t port, uint32_t flags) +{ + struct client *clnt = malloc(sizeof(*clnt)); + struct sockaddr_in addr; + int sock; + + if (clnt == NULL) { + perror("malloc"); + goto bail; + } + + memset(clnt, 0, sizeof(*clnt)); + + if ((sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) { + perror("socket"); + goto bail; + } + + if ((flags & CLI_RESVPORT) && bindresvport(sock, 0) == -1) { + perror("bindresvport"); + goto bail; + } else { + struct sockaddr_in me; + + me.sin_family = AF_INET; + me.sin_port = 0; + me.sin_addr.s_addr = INADDR_ANY; + + if (0 && bind(sock, (struct sockaddr *)&me, sizeof(me)) == -1) { + perror("bind"); + goto bail; + } + } + + clnt->sock = sock; + clnt->call_stub = rpc_call_udp; + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + addr.sin_addr.s_addr = server; + + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == -1) { + perror("connect"); + goto bail; + } + + goto done; + bail: + if (clnt) { + free(clnt); + clnt = NULL; + } + done: + return clnt; +} + +void client_free(struct client *c) +{ + if (c->sock != -1) + close(c->sock); + free(c); +} + +int rpc_call(struct client *client, struct rpc *rpc) +{ + return client->call_stub(client, rpc); +} diff --git a/usr/kinit/nfsmount/sunrpc.h b/usr/kinit/nfsmount/sunrpc.h new file mode 100644 index 0000000..1bcfeea --- /dev/null +++ b/usr/kinit/nfsmount/sunrpc.h @@ -0,0 +1,110 @@ +/* + * open-coded SunRPC structures + */ +#ifndef NFSMOUNT_SUNRPC_H +#define NFSMOUNT_SUNRPC_H + +#include <sys/types.h> +#include <inttypes.h> + +#define SUNRPC_PORT 111 +#define MOUNT_PORT 627 + +#define RPC_CALL 0 +#define RPC_REPLY 1 + +#define PORTMAP_PROGRAM 100000 +#define NLM_PROGRAM 100021 + +#define RPC_PMAP_PROGRAM 100000 +#define RPC_PMAP_VERSION 2 +#define RPC_PMAP_PORT 111 + +#define PMAP_PROC_NULL 0 +#define PMAP_PROC_SET 1 +#define PMAP_PROC_UNSET 2 +#define PMAP_PROC_GETPORT 3 +#define PMAP_PROC_DUMP 4 + +#define LAST_FRAG 0x80000000 + +#define REPLY_OK 0 +#define REPLY_DENIED 1 + +#define SUCCESS 0 +#define PROG_UNAVAIL 1 +#define PROG_MISMATCH 2 +#define PROC_UNAVAIL 3 +#define GARBAGE_ARGS 4 +#define SYSTEM_ERR 5 + +enum { + AUTH_NULL, + AUTH_UNIX, +}; + +struct rpc_auth { + uint32_t flavor; + uint32_t len; + uint32_t body[]; +}; + +struct rpc_udp_header { + uint32_t xid; + uint32_t msg_type; +}; + +struct rpc_header { + uint32_t frag_hdr; + struct rpc_udp_header udp; +}; + +struct rpc_call { + struct rpc_header hdr; + uint32_t rpc_vers; + + uint32_t program; + uint32_t prog_vers; + uint32_t proc; + uint32_t cred_flavor; + + uint32_t cred_len; + uint32_t vrf_flavor; + uint32_t vrf_len; +}; + +struct rpc_reply { + struct rpc_header hdr; + uint32_t reply_state; + uint32_t vrf_flavor; + uint32_t vrf_len; + uint32_t state; +}; + +struct rpc { + struct rpc_call *call; + size_t call_len; + struct rpc_reply *reply; + size_t reply_len; +}; + +struct client; + +typedef int (*call_stub) (struct client *, struct rpc *); + +struct client { + int sock; + call_stub call_stub; +}; + +#define CLI_RESVPORT 00000001 + +struct client *tcp_client(uint32_t server, uint16_t port, uint32_t flags); +struct client *udp_client(uint32_t server, uint16_t port, uint32_t flags); +void client_free(struct client *client); + +int rpc_call(struct client *client, struct rpc *rpc); + +uint32_t portmap(uint32_t server, uint32_t program, uint32_t version, uint32_t proto); + +#endif /* NFSMOUNT_SUNRPC_H */ diff --git a/usr/kinit/nfsroot.c b/usr/kinit/nfsroot.c new file mode 100644 index 0000000..3b80773 --- /dev/null +++ b/usr/kinit/nfsroot.c @@ -0,0 +1,111 @@ +#include <arpa/inet.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +#include "kinit.h" +#include "netdev.h" +#include "nfsmount.h" + +static char *sub_client(__u32 client, char *path, size_t len) +{ + struct in_addr addr = { client }; + char buf[len]; + + if (strstr(path, "%s") != NULL) { + if (client == INADDR_NONE) { + fprintf(stderr, "Root-NFS: no client address\n"); + exit(1); + } + + snprintf(buf, len, path, inet_ntoa(addr)); + strcpy(path, buf); + } + + return path; +} + +#define NFS_ARGC 6 +#define MOUNT_POINT "/root" + +int mount_nfs_root(int argc, char *argv[], int flags) +{ + (void)flags; /* FIXME - don't ignore this */ + + struct in_addr addr = { INADDR_NONE }; + __u32 client = INADDR_NONE; + const int len = 1024; + struct netdev *dev; + char *mtpt = MOUNT_POINT; + char *path = NULL; + char *dev_bootpath = NULL; + char root[len]; + char *x, *opts; + int ret = 0; + int a = 1; + char *nfs_argv[NFS_ARGC + 1] = { "NFS-Mount" }; + + for (dev = ifaces; dev; dev = dev->next) { + if (dev->ip_server != INADDR_NONE && + dev->ip_server != INADDR_ANY) { + addr.s_addr = dev->ip_server; + client = dev->ip_addr; + dev_bootpath = dev->bootpath; + break; + } + if (dev->ip_addr != INADDR_NONE && dev->ip_addr != INADDR_ANY) + client = dev->ip_addr; + } + + /* + * if the "nfsroot" option is set then it overrides + * bootpath supplied by the boot server. + */ + if ((path = get_arg(argc, argv, "nfsroot=")) == NULL) { + if ((path = dev_bootpath) == NULL || path[0] == '\0') + /* no path - set a default */ + path = (char *)"/tftpboot/%s"; + } else if (dev_bootpath && dev_bootpath[0] != '\0') + fprintf(stderr, + "nfsroot=%s overrides boot server bootpath %s\n", + path, dev_bootpath); + + if ((opts = strchr(path, ',')) != NULL) { + *opts++ = '\0'; + nfs_argv[a++] = (char *)"-o"; + nfs_argv[a++] = opts; + } + + if ((x = strchr(path, ':')) == NULL) { + if (addr.s_addr == INADDR_NONE) { + fprintf(stderr, "Root-NFS: no server defined\n"); + exit(1); + } + + snprintf(root, len, "%s:%s", inet_ntoa(addr), path); + } else { + strcpy(root, path); + } + + nfs_argv[a++] = sub_client(client, root, len); + + dprintf("NFS-Root: mounting %s on %s with options \"%s\"\n", + nfs_argv[a-1], mtpt, opts ? opts : ""); + + nfs_argv[a++] = mtpt; + nfs_argv[a] = NULL; + assert(a <= NFS_ARGC); + + dump_args(a, nfs_argv); + + if ((ret = nfsmount_main(a, nfs_argv)) != 0) { + ret = -1; + goto done; + } + +done: + return ret; +} diff --git a/usr/kinit/ramdisk_load.c b/usr/kinit/ramdisk_load.c new file mode 100644 index 0000000..e3e15d8 --- /dev/null +++ b/usr/kinit/ramdisk_load.c @@ -0,0 +1,281 @@ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <inttypes.h> +#include <sys/stat.h> +#include <linux/fs.h> +#include <linux/cdrom.h> +#include <linux/fd.h> + +#include "kinit.h" +#include "do_mounts.h" +#include "fstype.h" +#include "zlib.h" + +#define BUF_SZ 65536 + +static void wait_for_key(void) +{ + /* Wait until the user presses Enter */ + while (getchar() != '\n') + ; +} + +static int change_disk(const char *devpath, int rfd, int disk) +{ + /* Try to eject and/or quiesce the device */ + sync(); + if (ioctl(rfd, FDEJECT, 0)) { + if (errno == ENOTTY) { + /* Not a floppy */ + ioctl(rfd, CDROMEJECT, 0); + } else { + /* Non-ejectable floppy */ + ioctl(rfd, FDRESET, (void *)FD_RESET_IF_NEEDED); + } + } + close(rfd); + + fprintf(stderr, + "\nPlease insert disk %d for ramdisk and press Enter...", disk); + wait_for_key(); + + return open(devpath, O_RDONLY); +} + +#ifdef CONFIG_KLIBC_ZLIB +/* Also used in initrd.c */ +int load_ramdisk_compressed(const char *devpath, FILE * wfd, + off_t ramdisk_start) +{ + int rfd = -1; + unsigned long long ramdisk_size, ramdisk_left; + int disk = 1; + ssize_t bytes; + int rv; + unsigned char in_buf[BUF_SZ], out_buf[BUF_SZ]; + z_stream zs; + + zs.zalloc = Z_NULL; /* Use malloc() */ + zs.zfree = Z_NULL; /* Use free() */ + zs.next_in = Z_NULL; /* No data read yet */ + zs.avail_in = 0; + zs.next_out = out_buf; + zs.avail_out = BUF_SZ; + + if (inflateInit2(&zs, 32 + 15) != Z_OK) + goto err1; + + rfd = open(devpath, O_RDONLY); + if (rfd < 0) + goto err2; + + /* Set to the size of the medium, or "infinite" */ + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + + do { + /* Purge the output preferentially over reading new + input, so we don't end up overrunning the input by + accident and demanding a new disk which doesn't + exist... */ + if (zs.avail_out == 0) { + _fwrite(out_buf, BUF_SZ, wfd); + zs.next_out = out_buf; + zs.avail_out = BUF_SZ; + } else if (zs.avail_in == 0) { + if (ramdisk_start >= ramdisk_size) { + rfd = change_disk(devpath, rfd, ++disk); + if (rfd < 0) + goto err2; + + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + ramdisk_start = 0; + dprintf("New size = %llu\n", ramdisk_size); + } + do { + ramdisk_left = ramdisk_size - ramdisk_start; + bytes = min(ramdisk_left, + (unsigned long long)BUF_SZ); + bytes = pread(rfd, in_buf, bytes, + ramdisk_start); + } while (bytes == -1 && errno == EINTR); + if (bytes <= 0) + goto err2; + ramdisk_start += bytes; + zs.next_in = in_buf; + zs.avail_in = bytes; + + /* Print dots if we're reading from a real block device */ + if (ramdisk_size != ~0ULL) + putc('.', stderr); + } + rv = inflate(&zs, Z_SYNC_FLUSH); + } while (rv == Z_OK || rv == Z_BUF_ERROR); + + dprintf("kinit: inflate returned %d\n", rv); + + if (rv != Z_STREAM_END) + goto err2; + + /* Write the last */ + _fwrite(out_buf, BUF_SZ - zs.avail_out, wfd); + dprintf("kinit: writing %d bytes\n", BUF_SZ - zs.avail_out); + + inflateEnd(&zs); + return 0; + +err2: + inflateEnd(&zs); +err1: + return -1; +} +#else +int load_ramdisk_compressed(const char *devpath, FILE * wfd, + off_t ramdisk_start) +{ + fprintf(stderr, "Compressed ramdisk not supported\n"); + return -1; +} +#endif + +static int +load_ramdisk_raw(const char *devpath, FILE * wfd, off_t ramdisk_start, + unsigned long long fssize) +{ + unsigned long long ramdisk_size, ramdisk_left; + int disk = 1; + ssize_t bytes; + unsigned char buf[BUF_SZ]; + int rfd; + + rfd = open(devpath, O_RDONLY); + if (rfd < 0) + return -1; + + /* Set to the size of the medium, or "infinite" */ + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + + dprintf("start: %llu size: %llu fssize: %llu\n", + ramdisk_start, ramdisk_size, fssize); + + while (fssize) { + + if (ramdisk_start >= ramdisk_size) { + rfd = change_disk(devpath, rfd, ++disk); + if (rfd < 0) + return -1; + + if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size)) + ramdisk_size = ~0ULL; + ramdisk_start = 0; + } + + do { + ramdisk_left = + min(ramdisk_size - ramdisk_start, fssize); + bytes = min(ramdisk_left, (unsigned long long)BUF_SZ); + bytes = pread(rfd, buf, bytes, ramdisk_start); + } while (bytes == -1 && errno == EINTR); + if (bytes <= 0) + break; + _fwrite(buf, bytes, wfd); + + ramdisk_start += bytes; + fssize -= bytes; + + /* Print dots if we're reading from a real block device */ + if (ramdisk_size != ~0ULL) + putc('.', stderr); + } + + return !!fssize; +} + +int ramdisk_load(int argc, char *argv[]) +{ + const char *arg_prompt_ramdisk = get_arg(argc, argv, "prompt_ramdisk="); + const char *arg_ramdisk_blocksize = + get_arg(argc, argv, "ramdisk_blocksize="); + const char *arg_ramdisk_start = get_arg(argc, argv, "ramdisk_start="); + const char *arg_ramdisk_device = get_arg(argc, argv, "ramdisk_device="); + + int prompt_ramdisk = arg_prompt_ramdisk ? atoi(arg_prompt_ramdisk) : 0; + int ramdisk_blocksize = + arg_ramdisk_blocksize ? atoi(arg_ramdisk_blocksize) : 512; + off_t ramdisk_start = + arg_ramdisk_start + ? strtoumax(arg_ramdisk_start, NULL, 10) * ramdisk_blocksize : 0; + const char *ramdisk_device = + arg_ramdisk_device ? arg_ramdisk_device : "/dev/fd0"; + + dev_t ramdisk_dev; + int rfd; + FILE *wfd; + const char *fstype; + unsigned long long fssize; + int is_gzip = 0; + int err; + + if (prompt_ramdisk) { + fprintf(stderr, + "Please insert disk for ramdisk and press Enter..."); + wait_for_key(); + } + + ramdisk_dev = name_to_dev_t(ramdisk_device); + if (!ramdisk_dev) { + fprintf(stderr, + "Failure loading ramdisk: unknown device: %s\n", + ramdisk_device); + return 0; + } + + create_dev("/dev/rddev", ramdisk_dev); + create_dev("/dev/ram0", Root_RAM0); + rfd = open("/dev/rddev", O_RDONLY); + wfd = fopen("/dev/ram0", "w"); + + if (rfd < 0 || !wfd) { + perror("Could not open ramdisk device"); + return 0; + } + + /* Check filesystem type */ + if (identify_fs(rfd, &fstype, &fssize, ramdisk_start) || + (fssize == 0 && !(is_gzip = !strcmp(fstype, "gzip")))) { + fprintf(stderr, + "Failure loading ramdisk: unknown filesystem type\n"); + close(rfd); + fclose(wfd); + return 0; + } + + dprintf("kinit: ramdisk is %s, size %llu\n", fstype, fssize); + + fprintf(stderr, "Loading ramdisk (%s) ...", is_gzip ? "gzip" : "raw"); + + close(rfd); + + if (is_gzip) + err = load_ramdisk_compressed("/dev/rddev", wfd, ramdisk_start); + else + err = load_ramdisk_raw("/dev/rddev", wfd, + ramdisk_start, fssize); + + fclose(wfd); + + putc('\n', stderr); + + if (err) { + perror("Failure loading ramdisk"); + return 0; + } + + return 1; +} diff --git a/usr/kinit/readfile.c b/usr/kinit/readfile.c new file mode 100644 index 0000000..7a16b4a --- /dev/null +++ b/usr/kinit/readfile.c @@ -0,0 +1,86 @@ +/* + * Read the entire contents of a file into malloc'd storage. This + * is mostly useful for things like /proc files where we can't just + * fstat() to get the length and then mmap(). + * + * Returns the number of bytes read, or -1 on error. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <sys/stat.h> + +#include "kinit.h" + +ssize_t freadfile(FILE *f, char **pp) +{ + size_t bs; /* Decent starting point... */ + size_t bf; /* Bytes free */ + size_t bu = 0; /* Bytes used */ + char *buffer, *nb; + size_t rv; + int old_errno = errno; + + bs = BUFSIZ; /* A guess as good as any */ + bf = bs; + buffer = malloc(bs); + + if (!buffer) + return -1; + + for (;;) { + errno = 0; + + while (bf && (rv = _fread(buffer + bu, bf, f))) { + bu += rv; + bf -= rv; + } + + if (errno && errno != EINTR && errno != EAGAIN) { + /* error */ + free(buffer); + return -1; + } + + if (bf) { + /* Hit EOF, no error */ + + /* Try to free superfluous memory */ + if ((nb = realloc(buffer, bu + 1))) + buffer = nb; + + /* Null-terminate result for good measure */ + buffer[bu] = '\0'; + + *pp = buffer; + errno = old_errno; + return bu; + } + + /* Double the size of the buffer */ + bf += bs; + bs += bs; + if (!(nb = realloc(buffer, bs))) { + /* out of memory error */ + free(buffer); + return -1; + } + buffer = nb; + } +} + +ssize_t readfile(const char *filename, char **pp) +{ + FILE *f = fopen(filename, "r"); + ssize_t rv; + + if (!f) + return -1; + + rv = freadfile(f, pp); + + fclose(f); + + return rv; +} diff --git a/usr/kinit/resume/Kbuild b/usr/kinit/resume/Kbuild new file mode 100644 index 0000000..c804a85 --- /dev/null +++ b/usr/kinit/resume/Kbuild @@ -0,0 +1,34 @@ +# +# Kbuild file for resume +# + +static-y := static/resume +shared-y := shared/resume + +# common .o files +objs := resume.o resumelib.o + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + +# Create lib.a with all object files (used by kinit) +lib-y := $(objs) + +# Additional include paths files +KLIBCCFLAGS += -I$(srctree)/$(src)/.. + +# .o files used to built executables +static/resume-y := $(objs) +static/resume-lib := ../lib.a +shared/resume-y := $(objs) +shared/resume-lib := ../lib.a + +# Cleaning +clean-dirs := static shared + +# install binary +ifdef KLIBCSHAREDFLAGS +install-y := $(shared-y) +else +install-y := $(static-y) +endif diff --git a/usr/kinit/resume/resume.c b/usr/kinit/resume/resume.c new file mode 100644 index 0000000..2138078 --- /dev/null +++ b/usr/kinit/resume/resume.c @@ -0,0 +1,25 @@ +/* + * Handle resume from suspend-to-disk + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "resume.h" + +char *progname; + +static __noreturn usage(void) +{ + fprintf(stderr, "Usage: %s /dev/<resumedevice> [offset]\n", progname); + exit(1); +} + +int main(int argc, char *argv[]) +{ + progname = argv[0]; + if (argc < 2 || argc > 3) + usage(); + + return resume(argv[1], (argc > 2) ? strtoull(argv[2], NULL, 0) : 0ULL); +} diff --git a/usr/kinit/resume/resume.h b/usr/kinit/resume/resume.h new file mode 100644 index 0000000..5fb929f --- /dev/null +++ b/usr/kinit/resume/resume.h @@ -0,0 +1,7 @@ +#ifndef RESUME_H +#define RESUME_H + +int do_resume(int argc, char *argv[]); +int resume(const char *resume_file, unsigned long long resume_offset); + +#endif /* RESUME_H */ diff --git a/usr/kinit/resume/resumelib.c b/usr/kinit/resume/resumelib.c new file mode 100644 index 0000000..03e596a --- /dev/null +++ b/usr/kinit/resume/resumelib.c @@ -0,0 +1,106 @@ +/* + * Handle resume from suspend-to-disk + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> + +#include "kinit.h" +#include "do_mounts.h" +#include "resume.h" + +#ifndef CONFIG_PM_STD_PARTITION +# define CONFIG_PM_STD_PARTITION "" +#endif + +int do_resume(int argc, char *argv[]) +{ + const char *resume_file = CONFIG_PM_STD_PARTITION; + const char *resume_arg; + unsigned long long resume_offset; + + resume_arg = get_arg(argc, argv, "resume="); + resume_file = resume_arg ? resume_arg : resume_file; + /* No resume device specified */ + if (!resume_file[0]) + return 0; + + resume_arg = get_arg(argc, argv, "resume_offset="); + resume_offset = resume_arg ? strtoull(resume_arg, NULL, 0) : 0ULL; + + /* Fix: we either should consider reverting the device back to + ordinary swap, or (better) put that code into swapon */ + /* Noresume requested */ + if (get_flag(argc, argv, "noresume")) + return 0; + return resume(resume_file, resume_offset); +} + +int resume(const char *resume_file, unsigned long long resume_offset) +{ + dev_t resume_device; + int attr_fd = -1; + char attr_value[64]; + int len; + + resume_device = name_to_dev_t(resume_file); + + if (major(resume_device) == 0) { + fprintf(stderr, "Invalid resume device: %s\n", resume_file); + goto failure; + } + + if ((attr_fd = open("/sys/power/resume_offset", O_WRONLY)) < 0) + goto fail_offset; + + len = snprintf(attr_value, sizeof attr_value, + "%llu", + resume_offset); + + /* This should never happen */ + if (len >= sizeof attr_value) + goto fail_offset; + + if (write(attr_fd, attr_value, len) != len) + goto fail_offset; + + close(attr_fd); + + if ((attr_fd = open("/sys/power/resume", O_WRONLY)) < 0) + goto fail_r; + + len = snprintf(attr_value, sizeof attr_value, + "%u:%u", + major(resume_device), minor(resume_device)); + + /* This should never happen */ + if (len >= sizeof attr_value) + goto fail_r; + + dprintf("kinit: trying to resume from %s\n", resume_file); + + if (write(attr_fd, attr_value, len) != len) + goto fail_r; + + /* Okay, what are we still doing alive... */ +failure: + if (attr_fd >= 0) + close(attr_fd); + dprintf("kinit: No resume image, doing normal boot...\n"); + return -1; + +fail_offset: + fprintf(stderr, "Cannot write /sys/power/resume_offset " + "(no software suspend kernel support, or old kernel version?)\n"); + goto failure; + +fail_r: + fprintf(stderr, "Cannot write /sys/power/resume " + "(no software suspend kernel support?)\n"); + goto failure; +} diff --git a/usr/kinit/run-init/Kbuild b/usr/kinit/run-init/Kbuild new file mode 100644 index 0000000..eeff906 --- /dev/null +++ b/usr/kinit/run-init/Kbuild @@ -0,0 +1,38 @@ +# +# Kbuild file for run-init +# + +static-y := static/run-init +shared-y := shared/run-init + +# common .o files +objs := run-init.o runinitlib.o + +# TODO - do we want a stripped version +# TODO - do we want the static.g + shared.g directories? + +# Create built-in.o with all object files (used by kinit) +lib-y := $(objs) + +# force run-init to not have an executable stack (to keep READ_IMPLIES_EXEC +# personality(2) flag from getting set and passed to init). +EXTRA_KLIBCLDFLAGS += -z noexecstack + +# Additional include paths files +KLIBCCFLAGS += -I$(srctree)/$(src)/.. + +# .o files used to built executables +static/run-init-y := $(objs) +static/run-init-lib := ../lib.a +shared/run-init-y := $(objs) +shared/run-init-lib := ../lib.a + +# Cleaning +clean-dirs := static shared + +# install binary +ifdef KLIBCSHAREDFLAGS +install-y := $(shared-y) +else +install-y := $(static-y) +endif diff --git a/usr/kinit/run-init/run-init.c b/usr/kinit/run-init/run-init.c new file mode 100644 index 0000000..6a4ad3e --- /dev/null +++ b/usr/kinit/run-init/run-init.c @@ -0,0 +1,114 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall + * be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ----------------------------------------------------------------------- */ + +/* + * Usage: exec run-init [-d caps] [-c /dev/console] [-n] [-p] /real-root /sbin/init "$@" + * + * This program should be called as the last thing in a shell script + * acting as /init in an initramfs; it does the following: + * + * 1. Delete all files in the initramfs; + * 2. Remounts /real-root onto the root filesystem; + * 3. Drops comma-separated list of capabilities; + * 4. Chroots; + * 5. Opens /dev/console; + * 6. Spawns the specified init program (with arguments.) + * + * With the -p option, it skips step 1 in order to allow the initramfs to + * be persisted into the running system. + * + * With the -n option, it skips steps 1, 2 and 6 and can be used to check + * whether the given root and init are likely to work. + */ + +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include "run-init.h" + +static const char *program; + +static void __attribute__ ((noreturn)) usage(void) +{ + fprintf(stderr, + "Usage: exec %s [-d caps] [-c consoledev] [-n] [-p] /real-root /sbin/init [args]\n", + program); + exit(1); +} + +int main(int argc, char *argv[]) +{ + /* Command-line options and defaults */ + const char *console = "/dev/console"; + const char *realroot; + const char *init; + const char *error; + const char *drop_caps = NULL; + bool dry_run = false; + bool persist_initramfs = false; + char **initargs; + + /* Variables... */ + int o; + + /* Parse the command line */ + program = argv[0]; + + while ((o = getopt(argc, argv, "c:d:pn")) != -1) { + if (o == 'c') { + console = optarg; + } else if (o == 'd') { + drop_caps = optarg; + } else if (o == 'n') { + dry_run = true; + } else if (o == 'p') { + persist_initramfs = true; + } else { + usage(); + } + } + + if (argc - optind < 2) + usage(); + + realroot = argv[optind]; + init = argv[optind + 1]; + initargs = argv + optind + 1; + + error = run_init(realroot, console, drop_caps, dry_run, persist_initramfs, init, initargs); + + if (error) { + fprintf(stderr, "%s: %s: %s\n", program, error, strerror(errno)); + return 1; + } else { + /* Must have been a dry run */ + return 0; + } +} diff --git a/usr/kinit/run-init/run-init.h b/usr/kinit/run-init/run-init.h new file mode 100644 index 0000000..5240ce7 --- /dev/null +++ b/usr/kinit/run-init/run-init.h @@ -0,0 +1,38 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall + * be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ----------------------------------------------------------------------- */ + +#ifndef RUN_INIT_H +#define RUN_INIT_H + +#include <stdbool.h> + +const char *run_init(const char *realroot, const char *console, + const char *drop_caps, bool dry_run, + bool persist_initramfs, const char *init, + char **initargs); + +#endif diff --git a/usr/kinit/run-init/runinitlib.c b/usr/kinit/run-init/runinitlib.c new file mode 100644 index 0000000..1c2e56a --- /dev/null +++ b/usr/kinit/run-init/runinitlib.c @@ -0,0 +1,232 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall + * be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * ----------------------------------------------------------------------- */ + +/* + * run_init(realroot, consoledev, drop_caps, persist_initramfs, init, initargs) + * + * This function should be called as the last thing in kinit, + * from initramfs, it does the following: + * + * - Delete all files in the initramfs; + * - Remounts /real-root onto the root filesystem; + * - Chroots; + * - Drops comma-separated list of capabilities; + * - Opens /dev/console; + * - Spawns the specified init program (with arguments.) + * + * On failure, returns a human-readable error message. + */ + +#include <assert.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/vfs.h> +#include "run-init.h" +#include "capabilities.h" + +/* Make it possible to compile on glibc by including constants that the + always-behind shipped glibc headers may not include. Classic example + on why the lack of ABI headers screw us up. */ +#ifndef TMPFS_MAGIC +# define TMPFS_MAGIC 0x01021994 +#endif +#ifndef RAMFS_MAGIC +# define RAMFS_MAGIC 0x858458f6 +#endif +#ifndef MS_MOVE +# define MS_MOVE 8192 +#endif + +static int nuke(const char *what); + +static int nuke_dirent(int len, const char *dir, const char *name, dev_t me) +{ + int bytes = len + strlen(name) + 2; + char path[bytes]; + int xlen; + struct stat st; + + xlen = snprintf(path, bytes, "%s/%s", dir, name); + assert(xlen < bytes); + + if (lstat(path, &st)) + return ENOENT; /* Return 0 since already gone? */ + + if (st.st_dev != me) + return 0; /* DO NOT recurse down mount points!!!!! */ + + return nuke(path); +} + +/* Wipe the contents of a directory, but not the directory itself */ +static int nuke_dir(const char *what) +{ + int len = strlen(what); + DIR *dir; + struct dirent *d; + int err = 0; + struct stat st; + + if (lstat(what, &st)) + return errno; + + if (!S_ISDIR(st.st_mode)) + return ENOTDIR; + + if (!(dir = opendir(what))) { + /* EACCES means we can't read it. Might be empty and removable; + if not, the rmdir() in nuke() will trigger an error. */ + return (errno == EACCES) ? 0 : errno; + } + + while ((d = readdir(dir))) { + /* Skip . and .. */ + if (d->d_name[0] == '.' && + (d->d_name[1] == '\0' || + (d->d_name[1] == '.' && d->d_name[2] == '\0'))) + continue; + + err = nuke_dirent(len, what, d->d_name, st.st_dev); + if (err) { + closedir(dir); + return err; + } + } + + closedir(dir); + + return 0; +} + +static int nuke(const char *what) +{ + int rv; + int err = 0; + + rv = unlink(what); + if (rv < 0) { + if (errno == EISDIR) { + /* It's a directory. */ + err = nuke_dir(what); + if (!err) + err = rmdir(what) ? errno : err; + } else { + err = errno; + } + } + + if (err) { + errno = err; + return err; + } else { + return 0; + } +} + +const char *run_init(const char *realroot, const char *console, + const char *drop_caps, bool dry_run, + bool persist_initramfs, const char *init, char **initargs) +{ + struct stat rst, cst, ist; + struct statfs sfs; + int confd; + + /* First, change to the new root directory */ + if (chdir(realroot)) + return "chdir to new root"; + + /* This is a potentially highly destructive program. Take some + extra precautions. */ + + /* Make sure the current directory is not on the same filesystem + as the root directory */ + if (stat("/", &rst) || stat(".", &cst)) + return "stat"; + + if (rst.st_dev == cst.st_dev) + return "current directory on the same filesystem as the root"; + + /* Make sure we're on a ramfs */ + if (statfs("/", &sfs)) + return "statfs /"; + if (sfs.f_type != RAMFS_MAGIC && sfs.f_type != TMPFS_MAGIC) + return "rootfs not a ramfs or tmpfs"; + + /* Okay, I think we should be safe... */ + + if (!dry_run) { + if (!persist_initramfs) { + /* Delete rootfs contents */ + if (nuke_dir("/")) + return "nuking initramfs contents"; + } + + /* Overmount the root */ + if (mount(".", "/", NULL, MS_MOVE, NULL)) + return "overmounting root"; + } + + /* chroot, chdir */ + if (chroot(".") || chdir("/")) + return "chroot"; + + /* Drop capabilities */ + if (drop_capabilities(drop_caps) < 0) + return "dropping capabilities"; + + /* Open /dev/console */ + if ((confd = open(console, O_RDWR)) < 0) + return "opening console"; + if (!dry_run) { + dup2(confd, 0); + dup2(confd, 1); + dup2(confd, 2); + } + close(confd); + + if (!dry_run) { + /* Spawn init */ + execv(init, initargs); + return init; /* Failed to spawn init */ + } else { + if (stat(init, &ist)) + return init; + if (!S_ISREG(ist.st_mode) || !(ist.st_mode & S_IXUGO)) { + errno = EACCES; + return init; + } + return NULL; /* Success */ + } +} diff --git a/usr/kinit/xpio.c b/usr/kinit/xpio.c new file mode 100644 index 0000000..42a9844 --- /dev/null +++ b/usr/kinit/xpio.c @@ -0,0 +1,51 @@ +/* + * Looping versions of pread() and pwrite() + */ + +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include "xpio.h" + +ssize_t xpread(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t ctr = 0; + ssize_t rv = 0; + char *bp = buf; + + while (count) { + rv = pread(fd, bp, count, offset); + + if (rv == 0 || (rv == -1 && errno != EINTR)) + break; + + bp += rv; + count -= rv; + offset += rv; + ctr += rv; + } + + return ctr ? ctr : rv; +} + +ssize_t xpwrite(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t ctr = 0; + ssize_t rv = 0; + char *bp = buf; + + while (count) { + rv = pwrite(fd, bp, count, offset); + + if (rv == 0 || (rv == -1 && errno != EINTR)) + break; + + bp += rv; + count -= rv; + offset += rv; + ctr += rv; + } + + return ctr ? ctr : rv; +} diff --git a/usr/kinit/xpio.h b/usr/kinit/xpio.h new file mode 100644 index 0000000..0596a32 --- /dev/null +++ b/usr/kinit/xpio.h @@ -0,0 +1,11 @@ +/* + * kinit/xpio.h + */ + +#ifndef KINIT_XPIO_H +#define KINIT_XPIO_H + +ssize_t xpread(int fd, void *buf, size_t count, off_t offset); +ssize_t xpwrite(int fd, void *buf, size_t count, off_t offset); + +#endif /* KINIT_XPIO_H */ |