summaryrefslogtreecommitdiffstats
path: root/usr/kinit
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:06:04 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 17:06:04 +0000
commit2f0649f6fe411d7e07c8d56cf8ea56db53536da8 (patch)
tree778611fb52176dce1ad06c68e87b2cb348ca0f7b /usr/kinit
parentInitial commit. (diff)
downloadklibc-upstream.tar.xz
klibc-upstream.zip
Adding upstream version 2.0.13.upstream/2.0.13upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--usr/kinit/.gitignore3
-rw-r--r--usr/kinit/Kbuild43
-rw-r--r--usr/kinit/README9
-rw-r--r--usr/kinit/capabilities.c231
-rw-r--r--usr/kinit/capabilities.h10
-rw-r--r--usr/kinit/devname.c116
-rw-r--r--usr/kinit/do_mounts.c533
-rw-r--r--usr/kinit/do_mounts.h49
-rw-r--r--usr/kinit/do_mounts_md.c400
-rw-r--r--usr/kinit/do_mounts_mtd.c42
-rw-r--r--usr/kinit/fstype/Kbuild29
-rw-r--r--usr/kinit/fstype/btrfs.h57
-rw-r--r--usr/kinit/fstype/cramfs_fs.h85
-rw-r--r--usr/kinit/fstype/ext2_fs.h84
-rw-r--r--usr/kinit/fstype/ext3_fs.h134
-rw-r--r--usr/kinit/fstype/fstype.c445
-rw-r--r--usr/kinit/fstype/fstype.h20
-rw-r--r--usr/kinit/fstype/gfs2_fs.h56
-rw-r--r--usr/kinit/fstype/iso9660_sb.h24
-rw-r--r--usr/kinit/fstype/jfs_superblock.h114
-rw-r--r--usr/kinit/fstype/luks_fs.h44
-rw-r--r--usr/kinit/fstype/lvm2_sb.h18
-rw-r--r--usr/kinit/fstype/main.c57
-rw-r--r--usr/kinit/fstype/minix_fs.h85
-rw-r--r--usr/kinit/fstype/nilfs_fs.h64
-rw-r--r--usr/kinit/fstype/ocfs2_fs.h90
-rw-r--r--usr/kinit/fstype/reiser4_fs.h31
-rw-r--r--usr/kinit/fstype/reiserfs_fs.h74
-rw-r--r--usr/kinit/fstype/romfs_fs.h56
-rw-r--r--usr/kinit/fstype/squashfs_fs.h48
-rw-r--r--usr/kinit/fstype/swap_fs.h25
-rw-r--r--usr/kinit/fstype/xfs_sb.h21
-rw-r--r--usr/kinit/getarg.c57
-rw-r--r--usr/kinit/getintfile.c30
-rw-r--r--usr/kinit/initrd.c204
-rw-r--r--usr/kinit/ipconfig/Kbuild35
-rw-r--r--usr/kinit/ipconfig/README.ipconfig120
-rw-r--r--usr/kinit/ipconfig/bootp_packet.h44
-rw-r--r--usr/kinit/ipconfig/bootp_proto.c565
-rw-r--r--usr/kinit/ipconfig/bootp_proto.h10
-rw-r--r--usr/kinit/ipconfig/dhcp_proto.c301
-rw-r--r--usr/kinit/ipconfig/dhcp_proto.h19
-rw-r--r--usr/kinit/ipconfig/ipconfig.h25
-rw-r--r--usr/kinit/ipconfig/main.c924
-rw-r--r--usr/kinit/ipconfig/netdev.c279
-rw-r--r--usr/kinit/ipconfig/netdev.h107
-rw-r--r--usr/kinit/ipconfig/packet.c278
-rw-r--r--usr/kinit/ipconfig/packet.h12
-rw-r--r--usr/kinit/kinit.c331
-rw-r--r--usr/kinit/kinit.h70
-rw-r--r--usr/kinit/name_to_dev.c276
-rw-r--r--usr/kinit/nfsmount/Kbuild31
-rw-r--r--usr/kinit/nfsmount/README.locking26
-rw-r--r--usr/kinit/nfsmount/dummypmap.c281
-rw-r--r--usr/kinit/nfsmount/dummypmap.h11
-rw-r--r--usr/kinit/nfsmount/dummypmap_test.c2
-rw-r--r--usr/kinit/nfsmount/main.c288
-rw-r--r--usr/kinit/nfsmount/mount.c347
-rw-r--r--usr/kinit/nfsmount/nfsmount.h34
-rw-r--r--usr/kinit/nfsmount/portmap.c73
-rw-r--r--usr/kinit/nfsmount/sunrpc.c252
-rw-r--r--usr/kinit/nfsmount/sunrpc.h110
-rw-r--r--usr/kinit/nfsroot.c111
-rw-r--r--usr/kinit/ramdisk_load.c281
-rw-r--r--usr/kinit/readfile.c86
-rw-r--r--usr/kinit/resume/Kbuild34
-rw-r--r--usr/kinit/resume/resume.c25
-rw-r--r--usr/kinit/resume/resume.h7
-rw-r--r--usr/kinit/resume/resumelib.c106
-rw-r--r--usr/kinit/run-init/Kbuild38
-rw-r--r--usr/kinit/run-init/run-init.c114
-rw-r--r--usr/kinit/run-init/run-init.h38
-rw-r--r--usr/kinit/run-init/runinitlib.c232
-rw-r--r--usr/kinit/xpio.c51
-rw-r--r--usr/kinit/xpio.h11
75 files changed, 9373 insertions, 0 deletions
diff --git a/usr/kinit/.gitignore b/usr/kinit/.gitignore
new file mode 100644
index 0000000..f5a4f24
--- /dev/null
+++ b/usr/kinit/.gitignore
@@ -0,0 +1,3 @@
+lib.a
+kinit
+kinit.shared
diff --git a/usr/kinit/Kbuild b/usr/kinit/Kbuild
new file mode 100644
index 0000000..6cd5ba6
--- /dev/null
+++ b/usr/kinit/Kbuild
@@ -0,0 +1,43 @@
+#
+# Kbuild file for kinit
+#
+
+# library part of kinit. Is used by programs in sub-directories (resume et al)
+lib-y := name_to_dev.o devname.o getarg.o capabilities.o
+# use lib for kinit
+static/kinit-y := lib.a
+
+static/kinit-y += kinit.o do_mounts.o ramdisk_load.o initrd.o
+static/kinit-y += getintfile.o readfile.o xpio.o
+static/kinit-y += do_mounts_md.o do_mounts_mtd.o nfsroot.o
+
+static/kinit-y += ipconfig/
+static/kinit-y += nfsmount/
+static/kinit-y += run-init/
+static/kinit-y += fstype/
+static/kinit-y += resume/
+
+static-y := static/kinit
+shared-y := shared/kinit
+shared/kinit-y := $(static/kinit-y)
+
+# Additional include paths files
+KLIBCCFLAGS += -I$(srctree)/$(src)/fstype \
+ -I$(srctree)/$(src)/ipconfig \
+ -I$(srctree)/$(src)/nfsmount \
+ -I$(srctree)/$(src)/resume \
+ -I$(srctree)/$(src)/run-init
+
+# Cleaning
+targets += static/kinit static/kinit.g shared/kinit shared/kinit.g
+subdir- := fstype ipconfig nfsmount resume run-init
+
+# Clean deletes the static and shared dir
+clean-dirs := static shared
+
+# install binary
+ifdef KLIBCSHAREDFLAGS
+install-y := shared/kinit
+else
+install-y := static/kinit
+endif
diff --git a/usr/kinit/README b/usr/kinit/README
new file mode 100644
index 0000000..fa7f645
--- /dev/null
+++ b/usr/kinit/README
@@ -0,0 +1,9 @@
+kinit - tiny init program
+-------------------------
+
+This program is intended for use as /sbin/init in an initramfs
+environment. It currently replaces the kernel's ipconfig and nfsroot
+code.
+
+--
+Bryan O'Sullivan (2003/05/05)
diff --git a/usr/kinit/capabilities.c b/usr/kinit/capabilities.c
new file mode 100644
index 0000000..2c61025
--- /dev/null
+++ b/usr/kinit/capabilities.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2011 Google Inc. All Rights Reserved
+ * Author: mikew@google.com (Mike Waychison)
+ */
+
+/*
+ * We have to include the klibc types.h here to keep the kernel's
+ * types.h from being used.
+ */
+#include <sys/types.h>
+
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <sys/utsname.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "kinit.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+#define MAKE_CAP(cap) [cap] = { .cap_name = #cap }
+
+struct capability {
+ const char *cap_name;
+} capabilities[] = {
+ MAKE_CAP(CAP_CHOWN),
+ MAKE_CAP(CAP_DAC_OVERRIDE),
+ MAKE_CAP(CAP_DAC_READ_SEARCH),
+ MAKE_CAP(CAP_FOWNER),
+ MAKE_CAP(CAP_FSETID),
+ MAKE_CAP(CAP_KILL),
+ MAKE_CAP(CAP_SETGID),
+ MAKE_CAP(CAP_SETUID),
+ MAKE_CAP(CAP_SETPCAP),
+ MAKE_CAP(CAP_LINUX_IMMUTABLE),
+ MAKE_CAP(CAP_NET_BIND_SERVICE),
+ MAKE_CAP(CAP_NET_BROADCAST),
+ MAKE_CAP(CAP_NET_ADMIN),
+ MAKE_CAP(CAP_NET_RAW),
+ MAKE_CAP(CAP_IPC_LOCK),
+ MAKE_CAP(CAP_IPC_OWNER),
+ MAKE_CAP(CAP_SYS_MODULE),
+ MAKE_CAP(CAP_SYS_RAWIO),
+ MAKE_CAP(CAP_SYS_CHROOT),
+ MAKE_CAP(CAP_SYS_PTRACE),
+ MAKE_CAP(CAP_SYS_PACCT),
+ MAKE_CAP(CAP_SYS_ADMIN),
+ MAKE_CAP(CAP_SYS_BOOT),
+ MAKE_CAP(CAP_SYS_NICE),
+ MAKE_CAP(CAP_SYS_RESOURCE),
+ MAKE_CAP(CAP_SYS_TIME),
+ MAKE_CAP(CAP_SYS_TTY_CONFIG),
+ MAKE_CAP(CAP_MKNOD),
+ MAKE_CAP(CAP_LEASE),
+ MAKE_CAP(CAP_AUDIT_WRITE),
+ MAKE_CAP(CAP_AUDIT_CONTROL),
+ MAKE_CAP(CAP_SETFCAP),
+ MAKE_CAP(CAP_MAC_OVERRIDE),
+ MAKE_CAP(CAP_MAC_ADMIN),
+ MAKE_CAP(CAP_SYSLOG),
+};
+
+static void fail(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+static void fail(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ exit(1);
+}
+
+/*
+ * Find the capability ordinal by name, and return its ordinal.
+ * Returns -1 on failure.
+ */
+static int find_capability(const char *s)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(capabilities); i++) {
+ if (capabilities[i].cap_name
+ && strcasecmp(s, capabilities[i].cap_name) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void do_capset(int cap_ordinal)
+{
+ struct __user_cap_header_struct hdr;
+ struct __user_cap_data_struct caps[2];
+
+ /* Get the current capability mask */
+ hdr.version = _LINUX_CAPABILITY_VERSION_3;
+ hdr.pid = getpid();
+ if (capget(&hdr, caps)) {
+ perror("capget()");
+ exit(1);
+ }
+
+ /* Drop the bits */
+ if (cap_ordinal < 32)
+ caps[0].inheritable &= ~(1U << cap_ordinal);
+ else
+ caps[1].inheritable &= ~(1U << (cap_ordinal - 32));
+
+ /* And drop the capability. */
+ hdr.version = _LINUX_CAPABILITY_VERSION_3;
+ hdr.pid = getpid();
+ if (capset(&hdr, caps))
+ fail("Couldn't drop the capability \"%s\"\n",
+ capabilities[cap_ordinal].cap_name);
+}
+
+static void do_bset(int cap_ordinal)
+{
+ int ret;
+
+ ret = prctl(PR_CAPBSET_READ, cap_ordinal);
+ if (ret == 1) {
+ ret = prctl(PR_CAPBSET_DROP, cap_ordinal);
+ if (ret != 0)
+ fail("Error dropping capability %s from bset\n",
+ capabilities[cap_ordinal].cap_name);
+ } else if (ret < 0)
+ fail("Kernel doesn't recognize capability %d\n", cap_ordinal);
+}
+
+static void do_usermodehelper_file(const char *filename, int cap_ordinal)
+{
+ uint32_t lo32, hi32;
+ FILE *file;
+ static const size_t buf_size = 80;
+ char buf[buf_size];
+ char tail;
+ size_t bytes_read;
+ int ret;
+
+ /* Try and open the file */
+ file = fopen(filename, "r+");
+ if (!file && errno == ENOENT)
+ fail("Could not disable usermode helpers capabilities as "
+ "%s is not available\n", filename);
+ if (!file)
+ fail("Failed to access file %s errno %d\n", filename, errno);
+
+ /* Read and process the current bits */
+ bytes_read = fread(buf, 1, buf_size - 1, file);
+ if (bytes_read == 0)
+ fail("Trouble reading %s\n", filename);
+ buf[bytes_read] = '\0';
+ ret = sscanf(buf, "%u %u %c", &lo32, &hi32, &tail);
+ if (ret != 2)
+ fail("Failed to understand %s \"%s\"\n", filename, buf);
+
+ /* Clear the bits in the local copy */
+ if (cap_ordinal < 32)
+ lo32 &= ~(1 << cap_ordinal);
+ else
+ hi32 &= ~(1 << (cap_ordinal - 32));
+
+ /* Commit the new bit masks to the kernel */
+ ret = fflush(file);
+ if (ret != 0)
+ fail("Failed on file %s to fflush %d\n", filename, ret);
+ sprintf(buf, "%u %u", lo32, hi32);
+ ret = fwrite(buf, 1, strlen(buf) + 1, file);
+ if (ret != 0)
+ fail("Failed to commit usermode helper bitmasks: %d\n", ret);
+
+ /* Cleanup */
+ fclose(file);
+}
+
+static void do_usermodehelper(int cap_ordinal)
+{
+ static const char * const files[] = {
+ "/proc/sys/kernel/usermodehelper/bset",
+ "/proc/sys/kernel/usermodehelper/inheritable",
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(files); i++)
+ do_usermodehelper_file(files[i], cap_ordinal);
+}
+
+static void drop_capability(int cap_ordinal)
+{
+ do_usermodehelper(cap_ordinal);
+ do_bset(cap_ordinal);
+ do_capset(cap_ordinal);
+
+ printf("Dropped capability: %s\n", capabilities[cap_ordinal].cap_name);
+}
+
+int drop_capabilities(const char *caps)
+{
+ char *s, *saveptr = NULL;
+ char *token;
+
+ if (!caps)
+ return 0;
+
+ /* Create a duplicate string that can be modified. */
+ s = strdup(caps);
+ if (!s)
+ fail("Failed to drop caps as requested. Exiting\n");
+
+ token = strtok_r(s, ",", &saveptr);
+ while (token) {
+ int cap_ordinal = find_capability(token);
+
+ if (cap_ordinal < 0)
+ fail("Could not understand capability name \"%s\" "
+ "on command line, failing init\n", token);
+
+ drop_capability(cap_ordinal);
+
+ token = strtok_r(NULL, ",", &saveptr);
+ }
+
+ free(s);
+ return 0;
+}
diff --git a/usr/kinit/capabilities.h b/usr/kinit/capabilities.h
new file mode 100644
index 0000000..a32a66a
--- /dev/null
+++ b/usr/kinit/capabilities.h
@@ -0,0 +1,10 @@
+/*
+ * capabilities.h
+ */
+
+#ifndef KINIT_CAPABILITIES_H
+#define KINIT_CAPABILITIES_H
+
+int drop_capabilities(const char *caps);
+
+#endif /* KINIT_CAPABILITIES_H */
diff --git a/usr/kinit/devname.c b/usr/kinit/devname.c
new file mode 100644
index 0000000..c327e3b
--- /dev/null
+++ b/usr/kinit/devname.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+
+#include "kinit.h"
+
+/*
+ * Print the name of a block device.
+ */
+#define BUF_SIZE 512
+
+static int scansysdir(char *namebuf, char *sysdir, dev_t dev)
+{
+ char *dirtailptr = strchr(sysdir, '\0');
+ DIR *dir;
+ int done = 0;
+ struct dirent *de;
+ char *systail;
+ FILE *sysdev;
+ unsigned long ma, mi;
+ char *ep;
+ ssize_t rd;
+
+ dir = opendir(sysdir);
+ if (!dir)
+ return 0;
+
+ *dirtailptr++ = '/';
+
+ while (!done && (de = readdir(dir))) {
+ /* Assume if we see a dot-name in sysfs it's special */
+ if (de->d_name[0] == '.')
+ continue;
+
+ if (de->d_type != DT_UNKNOWN && de->d_type != DT_DIR)
+ continue;
+
+ if (strlen(de->d_name) >=
+ (BUF_SIZE - 64) - (dirtailptr - sysdir))
+ continue; /* Badness... */
+
+ strcpy(dirtailptr, de->d_name);
+ systail = strchr(sysdir, '\0');
+
+ strcpy(systail, "/dev");
+ sysdev = fopen(sysdir, "r");
+ if (!sysdev)
+ continue;
+
+ /* Abusing the namebuf as temporary storage here. */
+ rd = fread(namebuf, 1, BUF_SIZE, sysdev);
+ namebuf[rd] = '\0'; /* Just in case... */
+
+ fclose(sysdev);
+
+ ma = strtoul(namebuf, &ep, 10);
+ if (ma != major(dev) || *ep != ':')
+ continue;
+
+ mi = strtoul(ep + 1, &ep, 10);
+ if (*ep != '\n')
+ continue;
+
+ if (mi == minor(dev)) {
+ /* Found it! */
+ strcpy(namebuf, de->d_name);
+ done = 1;
+ } else {
+ /* we have a major number match, scan for partitions */
+ *systail = '\0';
+ done = scansysdir(namebuf, sysdir, dev);
+ }
+ }
+
+ closedir(dir);
+ return done;
+}
+
+const char *bdevname(dev_t dev)
+{
+ static char buf[BUF_SIZE];
+ char sysdir[BUF_SIZE];
+ char *p;
+
+ strcpy(sysdir, "/sys/block");
+
+ if (!scansysdir(buf, sysdir, dev))
+ strcpy(buf, "dev"); /* prints e.g. dev(3,5) */
+
+ p = strchr(buf, '\0');
+ snprintf(p, sizeof buf - (p - buf), "(%d,%d)", major(dev), minor(dev));
+
+ return buf;
+}
+
+#ifdef TEST_DEVNAME /* Standalone test */
+
+int main(int argc, char *argv[])
+{
+ dev_t dev;
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ dev = strtoul(argv[i], NULL, 0);
+
+ printf("0x%08x = %s\n", (unsigned int)dev, bdevname(dev));
+ }
+
+ return 0;
+}
+
+#endif /* TEST */
diff --git a/usr/kinit/do_mounts.c b/usr/kinit/do_mounts.c
new file mode 100644
index 0000000..b648299
--- /dev/null
+++ b/usr/kinit/do_mounts.c
@@ -0,0 +1,533 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <mntent.h>
+
+#include "do_mounts.h"
+#include "kinit.h"
+#include "fstype.h"
+#include "zlib.h"
+
+#ifndef MS_RELATIME
+# define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
+#endif
+
+#ifndef MS_STRICTATIME
+# define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#endif
+
+/*
+ * The following mount option parsing was stolen from
+ *
+ * usr/utils/mount_opts.c
+ *
+ * and adapted to add some later mount flags.
+ */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+struct mount_opts {
+ const char str[16];
+ unsigned long rwmask;
+ unsigned long rwset;
+ unsigned long rwnoset;
+};
+
+struct extra_opts {
+ char *str;
+ char *end;
+ int used_size;
+ int alloc_size;
+};
+
+/*
+ * These options define the function of "mount(2)".
+ */
+#define MS_TYPE (MS_REMOUNT|MS_BIND|MS_MOVE)
+
+
+/* These must be in alphabetic order! */
+static const struct mount_opts options[] = {
+ /* name mask set noset */
+ {"async", MS_SYNCHRONOUS, 0, MS_SYNCHRONOUS},
+ {"atime", MS_NOATIME, 0, MS_NOATIME},
+ {"bind", MS_TYPE, MS_BIND, 0,},
+ {"dev", MS_NODEV, 0, MS_NODEV},
+ {"diratime", MS_NODIRATIME, 0, MS_NODIRATIME},
+ {"dirsync", MS_DIRSYNC, MS_DIRSYNC, 0},
+ {"exec", MS_NOEXEC, 0, MS_NOEXEC},
+ {"move", MS_TYPE, MS_MOVE, 0},
+ {"nodev", MS_NODEV, MS_NODEV, 0},
+ {"noexec", MS_NOEXEC, MS_NOEXEC, 0},
+ {"nosuid", MS_NOSUID, MS_NOSUID, 0},
+ {"recurse", MS_REC, MS_REC, 0},
+ {"relatime", MS_RELATIME, MS_RELATIME, 0},
+ {"remount", MS_TYPE, MS_REMOUNT, 0},
+ {"ro", MS_RDONLY, MS_RDONLY, 0},
+ {"rw", MS_RDONLY, 0, MS_RDONLY},
+ {"strictatime", MS_STRICTATIME, MS_STRICTATIME, 0},
+ {"suid", MS_NOSUID, 0, MS_NOSUID},
+ {"sync", MS_SYNCHRONOUS, MS_SYNCHRONOUS, 0},
+ {"verbose", MS_VERBOSE, MS_VERBOSE, 0},
+};
+
+/*
+ * Append 's' to 'extra->str'. 's' is a mount option that can't be turned into
+ * a flag. Return 0 on success, -1 on error.
+ */
+static int add_extra_option(struct extra_opts *extra, char *s)
+{
+ int len = strlen(s);
+ int newlen = extra->used_size + len;
+
+ if (extra->str)
+ len++; /* +1 for ',' */
+
+ if (newlen >= extra->alloc_size) {
+ char *new;
+
+ new = realloc(extra->str, newlen + 1); /* +1 for NUL */
+ if (!new) {
+ if (extra->str)
+ free(extra->str);
+ return -1;
+ }
+
+ extra->str = new;
+ extra->end = extra->str + extra->used_size;
+ extra->alloc_size = newlen;
+ }
+
+ if (extra->used_size) {
+ *extra->end = ',';
+ extra->end++;
+ }
+ strcpy(extra->end, s);
+ extra->used_size += len;
+
+ return 0;
+}
+
+/*
+ * Parse the options in 'arg'; put numeric mount flags into 'flags' and
+ * the rest into 'extra'. Return 0 on success, -1 on error.
+ */
+static int
+parse_mount_options(char *arg, unsigned long *flags, struct extra_opts *extra)
+{
+ char *s;
+
+ while ((s = strsep(&arg, ",")) != NULL) {
+ char *opt = s;
+ unsigned int i;
+ int res;
+ int no = (s[0] == 'n' && s[1] == 'o');
+ int found = 0;
+
+ if (no)
+ s += 2;
+
+ for (i = 0; i < ARRAY_SIZE(options); i++) {
+
+ res = strcmp(s, options[i].str);
+ if (res == 0) {
+ found = 1;
+ *flags &= ~options[i].rwmask;
+ if (no)
+ *flags |= options[i].rwnoset;
+ else
+ *flags |= options[i].rwset;
+ break;
+
+ /* If we're beyond 's' alphabetically, we're done */
+ } else if (res < 0)
+ break;
+ }
+ if (! found)
+ if (add_extra_option(extra, opt) != 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Create the device node "name" */
+int create_dev(const char *name, dev_t dev)
+{
+ unlink(name);
+ return mknod(name, S_IFBLK | 0600, dev);
+}
+
+
+/*
+ * If there is not a block device for the input 'name', try to create one; if
+ * we can't that's okay.
+ */
+static void create_dev_if_not_present(const char *name)
+{
+ struct stat st;
+ dev_t dev;
+
+ if (stat(name, &st) == 0) /* file present; we're done */
+ return;
+ dev = name_to_dev_t(name);
+ if (dev)
+ (void) create_dev(name, dev);
+}
+
+
+/* mount a filesystem, possibly trying a set of different types */
+const char *mount_block(const char *source, const char *target,
+ const char *type, unsigned long flags,
+ const void *data)
+{
+ char *fslist, *p, *ep;
+ const char *rp;
+ ssize_t fsbytes;
+ int fd;
+
+ if (type) {
+ dprintf("kinit: trying to mount %s on %s "
+ "with type %s, flags 0x%lx, data '%s'\n",
+ source, target, type, flags, (char *)data);
+ int rv = mount(source, target, type, flags, data);
+
+ if (rv != 0)
+ dprintf("kinit: mount %s on %s failed "
+ "with errno = %d\n",
+ source, target, errno);
+ /* Mount readonly if necessary */
+ if (rv == -1 && errno == EACCES && !(flags & MS_RDONLY))
+ rv = mount(source, target, type, flags | MS_RDONLY,
+ data);
+ return rv ? NULL : type;
+ }
+
+ /* If no type given, try to identify the type first; this
+ also takes care of specific ordering requirements, like
+ ext3 before ext2... */
+ fd = open(source, O_RDONLY);
+ if (fd >= 0) {
+ int err = identify_fs(fd, &type, NULL, 0);
+ close(fd);
+
+ if (!err && type) {
+ dprintf("kinit: %s appears to be a %s filesystem\n",
+ source, type);
+ type = mount_block(source, target, type, flags, data);
+ if (type)
+ return type;
+ }
+ }
+
+ dprintf("kinit: failed to identify filesystem %s, trying all\n",
+ source);
+
+ fsbytes = readfile("/proc/filesystems", &fslist);
+
+ errno = EINVAL;
+ if (fsbytes < 0)
+ return NULL;
+
+ p = fslist;
+ ep = fslist + fsbytes;
+
+ rp = NULL;
+
+ while (p < ep) {
+ type = p;
+ p = strchr(p, '\n');
+ if (!p)
+ break;
+ *p++ = '\0';
+ /* We can't mount a block device as a "nodev" fs */
+ if (*type != '\t')
+ continue;
+
+ type++;
+ rp = mount_block(source, target, type, flags, data);
+ if (rp)
+ break;
+ if (errno != EINVAL)
+ break;
+ }
+
+ free(fslist);
+ return rp;
+}
+
+/* mount the root filesystem from a block device */
+static int
+mount_block_root(int argc, char *argv[], dev_t root_dev,
+ const char *type, unsigned long flags)
+{
+ const char *data, *rp;
+
+ data = get_arg(argc, argv, "rootflags=");
+ create_dev("/dev/root", root_dev);
+
+ errno = 0;
+
+ if (type) {
+ if ((rp = mount_block("/dev/root", "/root", type, flags, data)))
+ goto ok;
+ if (errno != EINVAL)
+ goto bad;
+ }
+
+ if (!errno
+ && (rp = mount_block("/dev/root", "/root", NULL, flags, data)))
+ goto ok;
+
+bad:
+ if (errno != EINVAL) {
+ /*
+ * Allow the user to distinguish between failed open
+ * and bad superblock on root device.
+ */
+ fprintf(stderr, "%s: Cannot open root device %s\n",
+ progname, bdevname(root_dev));
+ return -errno;
+ } else {
+ fprintf(stderr, "%s: Unable to mount root fs on device %s\n",
+ progname, bdevname(root_dev));
+ return -ESRCH;
+ }
+
+ok:
+ printf("%s: Mounted root (%s filesystem)%s.\n",
+ progname, rp, (flags & MS_RDONLY) ? " readonly" : "");
+ return 0;
+}
+
+static int
+mount_roots(int argc, char *argv[], const char *root_dev_name)
+{
+ char *roots = strdup(root_dev_name);
+ char *root;
+ const char *sep = ",";
+ char *saveptr;
+ int ret = -ESRCH;
+
+ root = strtok_r(roots, sep, &saveptr);
+ while (root) {
+ dev_t root_dev;
+
+ dprintf("kinit: trying to mount %s\n", root);
+ root_dev = name_to_dev_t(root);
+ ret = mount_root(argc, argv, root_dev, root);
+ if (!ret)
+ break;
+ root = strtok_r(NULL, sep, &saveptr);
+ }
+ free(roots);
+ return ret;
+}
+
+int
+mount_root(int argc, char *argv[], dev_t root_dev, const char *root_dev_name)
+{
+ unsigned long flags = MS_RDONLY | MS_VERBOSE;
+ int ret;
+ const char *type = get_arg(argc, argv, "rootfstype=");
+
+ if (get_flag(argc, argv, "rw") > get_flag(argc, argv, "ro")) {
+ dprintf("kinit: mounting root rw\n");
+ flags &= ~MS_RDONLY;
+ }
+
+ if (type) {
+ if (!strcmp(type, "nfs"))
+ root_dev = Root_NFS;
+ else if (!strcmp(type, "jffs2") && !major(root_dev))
+ root_dev = Root_MTD;
+ }
+
+ switch (root_dev) {
+ case Root_NFS:
+ ret = mount_nfs_root(argc, argv, flags);
+ break;
+ case Root_MTD:
+ ret = mount_mtd_root(argc, argv, root_dev_name, type, flags);
+ break;
+ default:
+ ret = mount_block_root(argc, argv, root_dev, type, flags);
+ break;
+ }
+
+ if (!ret)
+ chdir("/root");
+
+ return ret;
+}
+
+/* Allocate a buffer and prepend '/root' onto 'src'. */
+static char *prepend_root_dir(const char *src)
+{
+ size_t len = strlen(src) + 6; /* "/root" */
+ char *p = malloc(len);
+
+ if (!p)
+ return NULL;
+
+ strcpy(p, "/root");
+ strcat(p, src);
+ return p;
+}
+
+int do_cmdline_mounts(int argc, char *argv[])
+{
+ int arg_i;
+ int ret = 0;
+
+ for (arg_i = 0; arg_i < argc; arg_i++) {
+ const char *fs_dev, *fs_dir, *fs_type;
+ char *fs_opts;
+ unsigned long flags = 0;
+ char *saveptr = NULL;
+ char *new_dir;
+ struct extra_opts extra = { 0, 0, 0, 0 };
+
+ if (strncmp(argv[arg_i], "kinit_mount=", 12))
+ continue;
+ /*
+ * Format:
+ * <fs_dev>;<dir>;<fs_type>;[opt1],[optn...]
+ */
+ fs_dev = strtok_r(&argv[arg_i][12], ";", &saveptr);
+ if (!fs_dev) {
+ fprintf(stderr, "Failed to parse fs_dev\n");
+ continue;
+ }
+ fs_dir = strtok_r(NULL, ";", &saveptr);
+ if (!fs_dir) {
+ fprintf(stderr, "Failed to parse fs_dir\n");
+ continue;
+ }
+ fs_type = strtok_r(NULL, ";", &saveptr);
+ if (!fs_type) {
+ fprintf(stderr, "Failed to parse fs_type\n");
+ continue;
+ }
+ fs_opts = strtok_r(NULL, ";", &saveptr);
+ /* Don't error if there is no option string sent */
+
+ new_dir = prepend_root_dir(fs_dir);
+ if (! new_dir)
+ return -ENOMEM;
+ create_dev_if_not_present(fs_dev);
+ ret = parse_mount_options(fs_opts, &flags, &extra);
+ if (ret != 0)
+ break;
+
+ if (!mount_block(fs_dev, new_dir, fs_type,
+ flags, extra.str))
+ fprintf(stderr, "Skipping failed mount '%s'\n", fs_dev);
+ free(new_dir);
+ if (extra.str)
+ free(extra.str);
+ }
+ return ret;
+}
+
+int do_fstab_mounts(FILE *fp)
+{
+ struct mntent *ent = NULL;
+ char *new_dir;
+ int ret = 0;
+
+ while ((ent = getmntent(fp))) {
+ unsigned long flags = 0;
+ struct extra_opts extra = { 0, 0, 0, 0 };
+
+ new_dir = prepend_root_dir(ent->mnt_dir);
+ if (! new_dir)
+ return -ENOMEM;
+ create_dev_if_not_present(ent->mnt_fsname);
+ ret = parse_mount_options(ent->mnt_opts, &flags, &extra);
+ if (ret != 0)
+ break;
+
+ if (!mount_block(ent->mnt_fsname,
+ new_dir,
+ ent->mnt_type,
+ flags,
+ extra.str)) {
+ fprintf(stderr, "Skipping failed mount '%s'\n",
+ ent->mnt_fsname);
+ }
+ free(new_dir);
+ if (extra.str)
+ free(extra.str);
+ }
+ return 0;
+}
+
+int do_mounts(int argc, char *argv[])
+{
+ const char *root_dev_name = get_arg(argc, argv, "root=");
+ const char *root_delay = get_arg(argc, argv, "rootdelay=");
+ const char *load_ramdisk = get_arg(argc, argv, "load_ramdisk=");
+ dev_t root_dev = 0;
+ int err;
+ FILE *fp;
+
+ dprintf("kinit: do_mounts\n");
+
+ if (root_delay) {
+ int delay = atoi(root_delay);
+ fprintf(stderr, "Waiting %d s before mounting root device...\n",
+ delay);
+ sleep(delay);
+ }
+
+ md_run(argc, argv);
+
+ if (root_dev_name) {
+ root_dev = name_to_dev_t(root_dev_name);
+ } else if (get_arg(argc, argv, "nfsroot=") ||
+ get_arg(argc, argv, "nfsaddrs=")) {
+ root_dev = Root_NFS;
+ } else {
+ long rootdev;
+ getintfile("/proc/sys/kernel/real-root-dev", &rootdev);
+ root_dev = (dev_t) rootdev;
+ }
+
+ dprintf("kinit: root_dev = %s\n", bdevname(root_dev));
+
+ if (initrd_load(argc, argv, root_dev)) {
+ dprintf("initrd loaded\n");
+ return 0;
+ }
+
+ if (load_ramdisk && atoi(load_ramdisk)) {
+ if (ramdisk_load(argc, argv))
+ root_dev = Root_RAM0;
+ }
+
+ if (root_dev == Root_MULTI)
+ err = mount_roots(argc, argv, root_dev_name);
+ else
+ err = mount_root(argc, argv, root_dev, root_dev_name);
+
+ if (err)
+ return err;
+
+ if ((fp = setmntent("/etc/fstab", "r"))) {
+ err = do_fstab_mounts(fp);
+ fclose(fp);
+ }
+
+ if (err)
+ return err;
+
+ if (get_arg(argc, argv, "kinit_mount="))
+ err = do_cmdline_mounts(argc, argv);
+ return err;
+}
diff --git a/usr/kinit/do_mounts.h b/usr/kinit/do_mounts.h
new file mode 100644
index 0000000..99bc6a6
--- /dev/null
+++ b/usr/kinit/do_mounts.h
@@ -0,0 +1,49 @@
+/*
+ * do_mounts.h
+ */
+
+#ifndef DO_MOUNTS_H
+#define DO_MOUNTS_H
+
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/stat.h>
+
+#define Root_RAM0 __makedev(1, 0)
+
+/* These device numbers are only used internally */
+#define Root_NFS __makedev(0, 255)
+#define Root_MTD __makedev(0, 254)
+#define Root_MULTI __makedev(0, 253)
+
+int create_dev(const char *name, dev_t dev);
+
+dev_t name_to_dev_t(const char *name);
+
+const char *mount_block(const char *source, const char *target,
+ const char *type, unsigned long flags,
+ const void *data);
+
+int mount_root(int argc, char *argv[], dev_t root_dev,
+ const char *root_dev_name);
+
+int mount_mtd_root(int argc, char *argv[], const char *root_dev_name,
+ const char *type, unsigned long flags);
+
+int do_mounts(int argc, char *argv[]);
+
+int initrd_load(int argc, char *argv[], dev_t root_dev);
+
+static inline dev_t bstat(const char *name)
+{
+ struct stat st;
+
+ if (stat(name, &st) || !S_ISBLK(st.st_mode))
+ return 0;
+ return st.st_rdev;
+}
+
+int load_ramdisk_compressed(const char *devpath, FILE * wfd,
+ off_t ramdisk_start);
+
+#endif /* DO_MOUNTS_H */
diff --git a/usr/kinit/do_mounts_md.c b/usr/kinit/do_mounts_md.c
new file mode 100644
index 0000000..f446620
--- /dev/null
+++ b/usr/kinit/do_mounts_md.c
@@ -0,0 +1,400 @@
+/*
+ * Handle autoconfiguration of md devices. This is ugly, partially since
+ * it still relies on a sizable kernel component.
+ *
+ * This file is derived from the Linux kernel.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/sysmacros.h>
+#include <sys/md.h>
+#include <linux/major.h>
+
+#include "kinit.h"
+#include "do_mounts.h"
+
+#define LEVEL_NONE (-1000000)
+
+/*
+ * When md (and any require personalities) are compiled into the kernel
+ * (not a module), arrays can be assembles are boot time using with AUTODETECT
+ * where specially marked partitions are registered with md_autodetect_dev(),
+ * and with MD_BOOT where devices to be collected are given on the boot line
+ * with md=.....
+ * The code for that is here.
+ */
+
+static int raid_noautodetect, raid_autopart;
+
+static struct {
+ int minor;
+ int partitioned;
+ int level;
+ int chunk;
+ char *device_names;
+} md_setup_args[MAX_MD_DEVS];
+
+static int md_setup_ents;
+
+/**
+ * get_option - Parse integer from an option string
+ * @str: option string
+ * @pint: (output) integer value parsed from @str
+ *
+ * Read an int from an option string; if available accept a subsequent
+ * comma as well.
+ *
+ * Return values:
+ * 0 : no int in string
+ * 1 : int found, no subsequent comma
+ * 2 : int found including a subsequent comma
+ */
+
+static int get_option(char **str, int *pint)
+{
+ char *cur = *str;
+
+ if (!cur || !(*cur))
+ return 0;
+ *pint = strtol(cur, str, 0);
+ if (cur == *str)
+ return 0;
+ if (**str == ',') {
+ (*str)++;
+ return 2;
+ }
+
+ return 1;
+}
+
+/*
+ * Find the partitioned md device major number... of course this *HAD*
+ * to be done dynamically instead of using a registered number.
+ * Sigh. Double sigh.
+ */
+static int mdp_major(void)
+{
+ static int found = 0;
+ FILE *f;
+ char line[512], *p;
+ int is_blk, major_no;
+
+ if (found)
+ return found;
+
+ f = fopen("/proc/devices", "r");
+ is_blk = 0;
+ while (fgets(line, sizeof line, f)) {
+ if (!strcmp(line, "Block devices:\n"))
+ is_blk = 1;
+ if (is_blk) {
+ major_no = strtol(line, &p, 10);
+ while (*p && isspace(*p))
+ p++;
+
+ if (major_no == 0) /* Not a number */
+ is_blk = 0;
+ else if (major_no > 0 && !strcmp(p, "mdp")) {
+ found = major_no;
+ break;
+ }
+ }
+ }
+ fclose(f);
+
+ if (!found) {
+ fprintf(stderr,
+ "Error: mdp devices detected but no mdp device found!\n");
+ exit(1);
+ }
+
+ return found;
+}
+
+/*
+ * Parse the command-line parameters given our kernel, but do not
+ * actually try to invoke the MD device now; that is handled by
+ * md_setup_drive after the low-level disk drivers have initialised.
+ *
+ * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which
+ * assigns the task of parsing integer arguments to the
+ * invoked program now). Added ability to initialise all
+ * the MD devices (by specifying multiple "md=" lines)
+ * instead of just one. -- KTK
+ * 18May2000: Added support for persistent-superblock arrays:
+ * md=n,0,factor,fault,device-list uses RAID0 for device n
+ * md=n,-1,factor,fault,device-list uses LINEAR for device n
+ * md=n,device-list reads a RAID superblock from the devices
+ * elements in device-list are read by name_to_kdev_t so can be
+ * a hex number or something like /dev/hda1 /dev/sdb
+ * 2001-06-03: Dave Cinege <dcinege@psychosis.com>
+ * Shifted name_to_kdev_t() and related operations to md_set_drive()
+ * for later execution. Rewrote section to make devfs compatible.
+ */
+static int md_setup(char *str)
+{
+ int minor_num, level, factor, fault, partitioned = 0;
+ char *pername = "";
+ char *str1;
+ int ent;
+
+ if (*str == 'd') {
+ partitioned = 1;
+ str++;
+ }
+ if (get_option(&str, &minor_num) != 2) { /* MD Number */
+ fprintf(stderr, "md: Too few arguments supplied to md=.\n");
+ return 0;
+ }
+ str1 = str;
+ if (minor_num >= MAX_MD_DEVS) {
+ fprintf(stderr, "md: md=%d, Minor device number too high.\n",
+ minor_num);
+ return 0;
+ }
+ for (ent = 0; ent < md_setup_ents; ent++)
+ if (md_setup_args[ent].minor == minor_num &&
+ md_setup_args[ent].partitioned == partitioned) {
+ fprintf(stderr,
+ "md: md=%s%d, Specified more than once. "
+ "Replacing previous definition.\n",
+ partitioned ? "d" : "", minor_num);
+ break;
+ }
+ if (ent >= MAX_MD_DEVS) {
+ fprintf(stderr, "md: md=%s%d - too many md initialisations\n",
+ partitioned ? "d" : "", minor_num);
+ return 0;
+ }
+ if (ent >= md_setup_ents)
+ md_setup_ents++;
+ switch (get_option(&str, &level)) { /* RAID level */
+ case 2: /* could be 0 or -1.. */
+ if (level == 0 || level == LEVEL_LINEAR) {
+ if (get_option(&str, &factor) != 2 || /* Chunk Size */
+ get_option(&str, &fault) != 2) {
+ fprintf(stderr,
+ "md: Too few arguments supplied to md=.\n");
+ return 0;
+ }
+ md_setup_args[ent].level = level;
+ md_setup_args[ent].chunk = 1 << (factor + 12);
+ if (level == LEVEL_LINEAR)
+ pername = "linear";
+ else
+ pername = "raid0";
+ break;
+ }
+ /* FALL THROUGH */
+ case 1: /* the first device is numeric */
+ str = str1;
+ /* FALL THROUGH */
+ case 0:
+ md_setup_args[ent].level = LEVEL_NONE;
+ pername = "super-block";
+ }
+
+ fprintf(stderr, "md: Will configure md%s%d (%s) from %s, below.\n",
+ partitioned ? "_d" : "", minor_num, pername, str);
+ md_setup_args[ent].device_names = str;
+ md_setup_args[ent].partitioned = partitioned;
+ md_setup_args[ent].minor = minor_num;
+
+ return 1;
+}
+
+#define MdpMinorShift 6
+
+static void md_setup_drive(void)
+{
+ int dev_minor, i, ent, partitioned;
+ dev_t dev;
+ dev_t devices[MD_SB_DISKS + 1];
+
+ for (ent = 0; ent < md_setup_ents; ent++) {
+ int fd;
+ int err = 0;
+ char *devname;
+ mdu_disk_info_t dinfo;
+ char name[16];
+ struct stat st_chk;
+
+ dev_minor = md_setup_args[ent].minor;
+ partitioned = md_setup_args[ent].partitioned;
+ devname = md_setup_args[ent].device_names;
+
+ snprintf(name, sizeof name,
+ "/dev/md%s%d", partitioned ? "_d" : "", dev_minor);
+
+ if (stat(name, &st_chk) == 0)
+ continue;
+
+ if (partitioned)
+ dev = makedev(mdp_major(), dev_minor << MdpMinorShift);
+ else
+ dev = makedev(MD_MAJOR, dev_minor);
+ create_dev(name, dev);
+ for (i = 0; i < MD_SB_DISKS && devname != 0; i++) {
+ char *p;
+
+ p = strchr(devname, ',');
+ if (p)
+ *p++ = 0;
+
+ dev = name_to_dev_t(devname);
+ if (!dev) {
+ fprintf(stderr, "md: Unknown device name: %s\n",
+ devname);
+ break;
+ }
+
+ devices[i] = dev;
+
+ devname = p;
+ }
+ devices[i] = 0;
+
+ if (!i)
+ continue;
+
+ fprintf(stderr, "md: Loading md%s%d: %s\n",
+ partitioned ? "_d" : "", dev_minor,
+ md_setup_args[ent].device_names);
+
+ fd = open(name, 0, 0);
+ if (fd < 0) {
+ fprintf(stderr, "md: open failed - cannot start "
+ "array %s\n", name);
+ continue;
+ }
+ if (ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
+ fprintf(stderr,
+ "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
+ dev_minor);
+ close(fd);
+ continue;
+ }
+
+ if (md_setup_args[ent].level != LEVEL_NONE) {
+ /* non-persistent */
+ mdu_array_info_t ainfo;
+ ainfo.level = md_setup_args[ent].level;
+ ainfo.size = 0;
+ ainfo.nr_disks = 0;
+ ainfo.raid_disks = 0;
+ while (devices[ainfo.raid_disks])
+ ainfo.raid_disks++;
+ ainfo.md_minor = dev_minor;
+ ainfo.not_persistent = 1;
+
+ ainfo.state = (1 << MD_SB_CLEAN);
+ ainfo.layout = 0;
+ ainfo.chunk_size = md_setup_args[ent].chunk;
+ err = ioctl(fd, SET_ARRAY_INFO, &ainfo);
+ for (i = 0; !err && i <= MD_SB_DISKS; i++) {
+ dev = devices[i];
+ if (!dev)
+ break;
+ dinfo.number = i;
+ dinfo.raid_disk = i;
+ dinfo.state =
+ (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC);
+ dinfo.major = major(dev);
+ dinfo.minor = minor(dev);
+ err = ioctl(fd, ADD_NEW_DISK, &dinfo);
+ }
+ } else {
+ /* persistent */
+ for (i = 0; i <= MD_SB_DISKS; i++) {
+ dev = devices[i];
+ if (!dev)
+ break;
+ dinfo.major = major(dev);
+ dinfo.minor = minor(dev);
+ ioctl(fd, ADD_NEW_DISK, &dinfo);
+ }
+ }
+ if (!err)
+ err = ioctl(fd, RUN_ARRAY, 0);
+ if (err)
+ fprintf(stderr, "md: starting md%d failed\n",
+ dev_minor);
+ else {
+ /* reread the partition table.
+ * I (neilb) and not sure why this is needed, but I
+ * cannot boot a kernel with devfs compiled in from
+ * partitioned md array without it
+ */
+ close(fd);
+ fd = open(name, 0, 0);
+ ioctl(fd, BLKRRPART, 0);
+ }
+ close(fd);
+ }
+}
+
+static int raid_setup(char *str)
+{
+ int len, pos;
+
+ len = strlen(str) + 1;
+ pos = 0;
+
+ while (pos < len) {
+ char *comma = strchr(str + pos, ',');
+ int wlen;
+ if (comma)
+ wlen = (comma - str) - pos;
+ else
+ wlen = (len - 1) - pos;
+
+ if (!strncmp(str, "noautodetect", wlen))
+ raid_noautodetect = 1;
+ if (strncmp(str, "partitionable", wlen) == 0)
+ raid_autopart = 1;
+ if (strncmp(str, "part", wlen) == 0)
+ raid_autopart = 1;
+ pos += wlen + 1;
+ }
+ return 1;
+}
+
+static void md_run_setup(void)
+{
+ create_dev("/dev/md0", makedev(MD_MAJOR, 0));
+ if (raid_noautodetect)
+ fprintf(stderr,
+ "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n");
+ else {
+ int fd = open("/dev/md0", 0, 0);
+ if (fd >= 0) {
+ ioctl(fd, RAID_AUTORUN,
+ (void *)(intptr_t) raid_autopart);
+ close(fd);
+ }
+ }
+ md_setup_drive();
+}
+
+void md_run(int argc, char *argv[])
+{
+ char **pp, *p;
+
+ for (pp = argv; (p = *pp); pp++) {
+ if (!strncmp(p, "raid=", 5))
+ raid_setup(p + 5);
+ else if (!strncmp(p, "md=", 3))
+ md_setup(p + 3);
+ }
+
+ md_run_setup();
+}
diff --git a/usr/kinit/do_mounts_mtd.c b/usr/kinit/do_mounts_mtd.c
new file mode 100644
index 0000000..20d27ca
--- /dev/null
+++ b/usr/kinit/do_mounts_mtd.c
@@ -0,0 +1,42 @@
+/*
+ * Mount an MTD device as a character device.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include "kinit.h"
+#include "do_mounts.h"
+
+int mount_mtd_root(int argc, char *argv[], const char *root_dev_name,
+ const char *type, unsigned long flags)
+{
+ const char *data = get_arg(argc, argv, "rootflags=");
+
+ if (!type)
+ type = "jffs2";
+
+ printf("Trying to mount MTD %s as root (%s filesystem)\n",
+ root_dev_name, type);
+
+ if (mount(root_dev_name, "/root", type, flags, data)) {
+ int err = errno;
+ fprintf(stderr,
+ "%s: Unable to mount MTD %s (%s filesystem) "
+ "as root: %s\n",
+ progname, root_dev_name, type, strerror(err));
+ return -err;
+ } else {
+ fprintf(stderr, "%s: Mounted root (%s filesystem)%s.\n",
+ progname, type, (flags & MS_RDONLY) ? " readonly" : "");
+ return 0;
+ }
+
+}
diff --git a/usr/kinit/fstype/Kbuild b/usr/kinit/fstype/Kbuild
new file mode 100644
index 0000000..631eb32
--- /dev/null
+++ b/usr/kinit/fstype/Kbuild
@@ -0,0 +1,29 @@
+#
+# Kbuild file for fstype
+#
+
+static-y := static/fstype
+shared-y := shared/fstype
+
+# common .o files
+objs := main.o fstype.o
+
+# TODO - do we want a stripped version
+# TODO - do we want the static.g + shared.g directories?
+
+# Create built-in.o with all object files (used by kinit)
+lib-y := $(objs)
+
+# .o files used to built executables
+static/fstype-y := $(objs)
+shared/fstype-y := $(objs)
+
+# Cleaning
+clean-dirs := static shared
+
+# install binary
+ifdef KLIBCSHAREDFLAGS
+install-y := $(shared-y)
+else
+install-y := $(static-y)
+endif
diff --git a/usr/kinit/fstype/btrfs.h b/usr/kinit/fstype/btrfs.h
new file mode 100644
index 0000000..459da12
--- /dev/null
+++ b/usr/kinit/fstype/btrfs.h
@@ -0,0 +1,57 @@
+#ifndef __BTRFS_H
+#define __BTRFS_H
+
+# define BTRFS_MAGIC "_BHRfS_M"
+# define BTRFS_MAGIC_L 8
+
+/*
+ * Structure of the super block
+ */
+struct btrfs_super_block {
+ uint8_t csum[32];
+ uint8_t fsid[16];
+ uint64_t bytenr;
+ uint64_t flags;
+ uint8_t magic[8];
+ uint64_t generation;
+ uint64_t root;
+ uint64_t chunk_root;
+ uint64_t log_root;
+ uint64_t log_root_transid;
+ uint64_t total_bytes;
+ uint64_t bytes_used;
+ uint64_t root_dir_objectid;
+ uint64_t num_devices;
+ uint32_t sectorsize;
+ uint32_t nodesize;
+ uint32_t leafsize;
+ uint32_t stripesize;
+ uint32_t sys_chunk_array_size;
+ uint64_t chunk_root_generation;
+ uint64_t compat_flags;
+ uint64_t compat_ro_flags;
+ uint64_t incompat_flags;
+ uint16_t csum_type;
+ uint8_t root_level;
+ uint8_t chunk_root_level;
+ uint8_t log_root_level;
+ struct btrfs_dev_item {
+ uint64_t devid;
+ uint64_t total_bytes;
+ uint64_t bytes_used;
+ uint32_t io_align;
+ uint32_t io_width;
+ uint32_t sector_size;
+ uint64_t type;
+ uint64_t generation;
+ uint64_t start_offset;
+ uint32_t dev_group;
+ uint8_t seek_speed;
+ uint8_t bandwidth;
+ uint8_t uuid[16];
+ uint8_t fsid[16];
+ } __attribute__ ((__packed__)) dev_item;
+ uint8_t label[256];
+} __attribute__ ((__packed__));
+
+#endif /* __BTRFS_H */
diff --git a/usr/kinit/fstype/cramfs_fs.h b/usr/kinit/fstype/cramfs_fs.h
new file mode 100644
index 0000000..6f5ad4f
--- /dev/null
+++ b/usr/kinit/fstype/cramfs_fs.h
@@ -0,0 +1,85 @@
+#ifndef __CRAMFS_H
+#define __CRAMFS_H
+
+#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */
+#define CRAMFS_SIGNATURE "Compressed ROMFS"
+
+/*
+ * Width of various bitfields in struct cramfs_inode.
+ * Primarily used to generate warnings in mkcramfs.
+ */
+#define CRAMFS_MODE_WIDTH 16
+#define CRAMFS_UID_WIDTH 16
+#define CRAMFS_SIZE_WIDTH 24
+#define CRAMFS_GID_WIDTH 8
+#define CRAMFS_NAMELEN_WIDTH 6
+#define CRAMFS_OFFSET_WIDTH 26
+
+/*
+ * Since inode.namelen is a unsigned 6-bit number, the maximum cramfs
+ * path length is 63 << 2 = 252.
+ */
+#define CRAMFS_MAXPATHLEN (((1 << CRAMFS_NAMELEN_WIDTH) - 1) << 2)
+
+/*
+ * Reasonably terse representation of the inode data.
+ */
+struct cramfs_inode {
+ __u32 mode:CRAMFS_MODE_WIDTH, uid:CRAMFS_UID_WIDTH;
+ /* SIZE for device files is i_rdev */
+ __u32 size:CRAMFS_SIZE_WIDTH, gid:CRAMFS_GID_WIDTH;
+ /* NAMELEN is the length of the file name, divided by 4 and
+ rounded up. (cramfs doesn't support hard links.) */
+ /* OFFSET: For symlinks and non-empty regular files, this
+ contains the offset (divided by 4) of the file data in
+ compressed form (starting with an array of block pointers;
+ see README). For non-empty directories it is the offset
+ (divided by 4) of the inode of the first file in that
+ directory. For anything else, offset is zero. */
+ __u32 namelen:CRAMFS_NAMELEN_WIDTH, offset:CRAMFS_OFFSET_WIDTH;
+};
+
+struct cramfs_info {
+ __u32 crc;
+ __u32 edition;
+ __u32 blocks;
+ __u32 files;
+};
+
+/*
+ * Superblock information at the beginning of the FS.
+ */
+struct cramfs_super {
+ __u32 magic; /* 0x28cd3d45 - random number */
+ __u32 size; /* length in bytes */
+ __u32 flags; /* feature flags */
+ __u32 future; /* reserved for future use */
+ __u8 signature[16]; /* "Compressed ROMFS" */
+ struct cramfs_info fsid; /* unique filesystem info */
+ __u8 name[16]; /* user-defined name */
+ struct cramfs_inode root; /* root inode data */
+};
+
+/*
+ * Feature flags
+ *
+ * 0x00000000 - 0x000000ff: features that work for all past kernels
+ * 0x00000100 - 0xffffffff: features that don't work for past kernels
+ */
+#define CRAMFS_FLAG_FSID_VERSION_2 0x00000001 /* fsid version #2 */
+#define CRAMFS_FLAG_SORTED_DIRS 0x00000002 /* sorted dirs */
+#define CRAMFS_FLAG_HOLES 0x00000100 /* support for holes */
+#define CRAMFS_FLAG_WRONG_SIGNATURE 0x00000200 /* reserved */
+#define CRAMFS_FLAG_SHIFTED_ROOT_OFFSET 0x00000400 /* shifted root fs */
+
+/*
+ * Valid values in super.flags. Currently we refuse to mount
+ * if (flags & ~CRAMFS_SUPPORTED_FLAGS). Maybe that should be
+ * changed to test super.future instead.
+ */
+#define CRAMFS_SUPPORTED_FLAGS ( 0x000000ff \
+ | CRAMFS_FLAG_HOLES \
+ | CRAMFS_FLAG_WRONG_SIGNATURE \
+ | CRAMFS_FLAG_SHIFTED_ROOT_OFFSET )
+
+#endif
diff --git a/usr/kinit/fstype/ext2_fs.h b/usr/kinit/fstype/ext2_fs.h
new file mode 100644
index 0000000..775df8f
--- /dev/null
+++ b/usr/kinit/fstype/ext2_fs.h
@@ -0,0 +1,84 @@
+#ifndef __EXT2_FS_H
+#define __EXT2_FS_H
+
+/*
+ * The second extended file system magic number
+ */
+#define EXT2_SUPER_MAGIC 0xEF53
+
+/*
+ * Structure of the super block
+ */
+struct ext2_super_block {
+ __le32 s_inodes_count; /* Inodes count */
+ __le32 s_blocks_count; /* Blocks count */
+ __le32 s_r_blocks_count; /* Reserved blocks count */
+ __le32 s_free_blocks_count; /* Free blocks count */
+ __le32 s_free_inodes_count; /* Free inodes count */
+ __le32 s_first_data_block; /* First Data Block */
+ __le32 s_log_block_size; /* Block size */
+ __le32 s_log_frag_size; /* Fragment size */
+ __le32 s_blocks_per_group; /* # Blocks per group */
+ __le32 s_frags_per_group; /* # Fragments per group */
+ __le32 s_inodes_per_group; /* # Inodes per group */
+ __le32 s_mtime; /* Mount time */
+ __le32 s_wtime; /* Write time */
+ __le16 s_mnt_count; /* Mount count */
+ __le16 s_max_mnt_count; /* Maximal mount count */
+ __le16 s_magic; /* Magic signature */
+ __le16 s_state; /* File system state */
+ __le16 s_errors; /* Behaviour when detecting errors */
+ __le16 s_minor_rev_level; /* minor revision level */
+ __le32 s_lastcheck; /* time of last check */
+ __le32 s_checkinterval; /* max. time between checks */
+ __le32 s_creator_os; /* OS */
+ __le32 s_rev_level; /* Revision level */
+ __le16 s_def_resuid; /* Default uid for reserved blocks */
+ __le16 s_def_resgid; /* Default gid for reserved blocks */
+ /*
+ * These fields are for EXT2_DYNAMIC_REV superblocks only.
+ *
+ * Note: the difference between the compatible feature set and
+ * the incompatible feature set is that if there is a bit set
+ * in the incompatible feature set that the kernel doesn't
+ * know about, it should refuse to mount the filesystem.
+ *
+ * e2fsck's requirements are more strict; if it doesn't know
+ * about a feature in either the compatible or incompatible
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+ __le32 s_first_ino; /* First non-reserved inode */
+ __le16 s_inode_size; /* size of inode structure */
+ __le16 s_block_group_nr; /* block group # of this superblock */
+ __le32 s_feature_compat; /* compatible feature set */
+ __le32 s_feature_incompat; /* incompatible feature set */
+ __le32 s_feature_ro_compat; /* readonly-compatible feature set */
+ __u8 s_uuid[16]; /* 128-bit uuid for volume */
+ char s_volume_name[16]; /* volume name */
+ char s_last_mounted[64]; /* directory where last mounted */
+ __le32 s_algorithm_usage_bitmap; /* For compression */
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT2_COMPAT_PREALLOC flag is on.
+ */
+ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate */
+ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
+ __u16 s_padding1;
+ /*
+ * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
+ __u32 s_journal_inum; /* inode number of journal file */
+ __u32 s_journal_dev; /* device number of journal file */
+ __u32 s_last_orphan; /* start of list of inodes to delete */
+ __u32 s_hash_seed[4]; /* HTREE hash seed */
+ __u8 s_def_hash_version; /* Default hash version to use */
+ __u8 s_reserved_char_pad;
+ __u16 s_reserved_word_pad;
+ __le32 s_default_mount_opts;
+ __le32 s_first_meta_bg; /* First metablock block group */
+ __u32 s_reserved[190]; /* Padding to the end of the block */
+};
+
+#endif /* __EXT2_FS_H */
diff --git a/usr/kinit/fstype/ext3_fs.h b/usr/kinit/fstype/ext3_fs.h
new file mode 100644
index 0000000..f958e5c
--- /dev/null
+++ b/usr/kinit/fstype/ext3_fs.h
@@ -0,0 +1,134 @@
+#ifndef __EXT3_FS_H
+#define __EXT3_FS_H
+
+/*
+ * The second extended file system magic number
+ */
+#define EXT3_SUPER_MAGIC 0xEF53
+
+#define EXT2_FLAGS_TEST_FILESYS 0x0004
+#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
+#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
+#define EXT2_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002
+#define EXT2_FEATURE_INCOMPAT_META_BG 0x0010
+#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004
+#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008
+#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004
+
+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040
+#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
+#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
+
+#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+ EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
+ EXT2_FEATURE_RO_COMPAT_BTREE_DIR)
+#define EXT3_FEATURE_RO_COMPAT_UNSUPPORTED ~EXT3_FEATURE_RO_COMPAT_SUPP
+#define EXT3_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \
+ EXT3_FEATURE_INCOMPAT_RECOVER| \
+ EXT2_FEATURE_INCOMPAT_META_BG)
+#define EXT3_FEATURE_INCOMPAT_UNSUPPORTED ~EXT3_FEATURE_INCOMPAT_SUPP
+
+
+
+/*
+ * Structure of the super block
+ */
+struct ext3_super_block {
+ /*00*/ __u32 s_inodes_count;
+ /* Inodes count */
+ __u32 s_blocks_count; /* Blocks count */
+ __u32 s_r_blocks_count; /* Reserved blocks count */
+ __u32 s_free_blocks_count; /* Free blocks count */
+ /*10*/ __u32 s_free_inodes_count;
+ /* Free inodes count */
+ __u32 s_first_data_block; /* First Data Block */
+ __u32 s_log_block_size; /* Block size */
+ __s32 s_log_frag_size; /* Fragment size */
+ /*20*/ __u32 s_blocks_per_group;
+ /* # Blocks per group */
+ __u32 s_frags_per_group; /* # Fragments per group */
+ __u32 s_inodes_per_group; /* # Inodes per group */
+ __u32 s_mtime; /* Mount time */
+ /*30*/ __u32 s_wtime;
+ /* Write time */
+ __u16 s_mnt_count; /* Mount count */
+ __s16 s_max_mnt_count; /* Maximal mount count */
+ __u16 s_magic; /* Magic signature */
+ __u16 s_state; /* File system state */
+ __u16 s_errors; /* Behaviour when detecting errors */
+ __u16 s_minor_rev_level; /* minor revision level */
+ /*40*/ __u32 s_lastcheck;
+ /* time of last check */
+ __u32 s_checkinterval; /* max. time between checks */
+ __u32 s_creator_os; /* OS */
+ __u32 s_rev_level; /* Revision level */
+ /*50*/ __u16 s_def_resuid;
+ /* Default uid for reserved blocks */
+ __u16 s_def_resgid; /* Default gid for reserved blocks */
+ /*
+ * These fields are for EXT3_DYNAMIC_REV superblocks only.
+ *
+ * Note: the difference between the compatible feature set and
+ * the incompatible feature set is that if there is a bit set
+ * in the incompatible feature set that the kernel doesn't
+ * know about, it should refuse to mount the filesystem.
+ *
+ * e2fsck's requirements are more strict; if it doesn't know
+ * about a feature in either the compatible or incompatible
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+ __u32 s_first_ino; /* First non-reserved inode */
+ __u16 s_inode_size; /* size of inode structure */
+ __u16 s_block_group_nr; /* block group # of this superblock */
+ __u32 s_feature_compat; /* compatible feature set */
+ /*60*/ __u32 s_feature_incompat;
+ /* incompatible feature set */
+ __u32 s_feature_ro_compat; /* readonly-compatible feature set */
+ /*68*/ __u8 s_uuid[16];
+ /* 128-bit uuid for volume */
+ /*78*/ char s_volume_name[16];
+ /* volume name */
+ /*88*/ char s_last_mounted[64];
+ /* directory where last mounted */
+ /*C8*/ __u32 s_algorithm_usage_bitmap;
+ /* For compression */
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+ */
+ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate */
+ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
+ __u16 s_padding1;
+ /*
+ * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+ /*D0*/ __u8 s_journal_uuid[16];
+ /* uuid of journal superblock */
+ /*E0*/ __u32 s_journal_inum;
+ /* inode number of journal file */
+ __u32 s_journal_dev; /* device number of journal file */
+ __u32 s_last_orphan; /* start of list of inodes to delete */
+ __u32 s_hash_seed[4]; /* HTREE hash seed */
+ __u8 s_def_hash_version; /* Default hash version to use */
+ __u8 s_jnl_backup_type;
+ __u16 s_reserved_word_pad;
+ __u32 s_default_mount_opts;
+ __u32 s_first_meta_bg;
+ __u32 s_mkfs_time;
+ __u32 s_jnl_blocks[17];
+ __u32 s_blocks_count_hi;
+ __u32 s_r_blocks_count_hi;
+ __u32 s_free_blocks_hi;
+ __u16 s_min_extra_isize;
+ __u16 s_want_extra_isize;
+ __u32 s_flags;
+ __u16 s_raid_stride;
+ __u16 s_mmp_interval;
+ __u64 s_mmp_block;
+ __u32 s_raid_stripe_width;
+ __u32 s_reserved[163];
+};
+
+#endif /* __EXT3_FS_H */
diff --git a/usr/kinit/fstype/fstype.c b/usr/kinit/fstype/fstype.c
new file mode 100644
index 0000000..aebccca
--- /dev/null
+++ b/usr/kinit/fstype/fstype.c
@@ -0,0 +1,445 @@
+/*
+ * by rmk
+ *
+ * Detect filesystem type (on stdin) and output strings for two
+ * environment variables:
+ * FSTYPE - filesystem type
+ * FSSIZE - filesystem size (if known)
+ *
+ * We currently detect the filesystems listed below in the struct
+ * "imagetype images" (in the order they are listed).
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <endian.h>
+#include <netinet/in.h>
+#include <sys/utsname.h>
+#include <sys/vfs.h>
+
+#define cpu_to_be32(x) __cpu_to_be32(x) /* Needed by romfs_fs.h */
+
+#include "btrfs.h"
+#include "cramfs_fs.h"
+#include "ext2_fs.h"
+#include "ext3_fs.h"
+#include "gfs2_fs.h"
+#include "iso9660_sb.h"
+#include "luks_fs.h"
+#include "lvm2_sb.h"
+#include "minix_fs.h"
+#include "nilfs_fs.h"
+#include "ocfs2_fs.h"
+#include "romfs_fs.h"
+#include "squashfs_fs.h"
+#include "xfs_sb.h"
+
+/*
+ * Slightly cleaned up version of jfs_superblock to
+ * avoid pulling in other kernel header files.
+ */
+#include "jfs_superblock.h"
+
+/*
+ * reiserfs_fs.h is too sick to include directly.
+ * Use a cleaned up version.
+ */
+#include "reiserfs_fs.h"
+#include "reiser4_fs.h"
+
+#include "fstype.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+#define BLOCK_SIZE 1024
+
+/* Swap needs the definition of block size */
+#include "swap_fs.h"
+
+static int gzip_image(const void *buf, unsigned long long *bytes)
+{
+ const unsigned char *p = buf;
+
+ if (p[0] == 037 && (p[1] == 0213 || p[1] == 0236)) {
+ /* The length of a gzip stream can only be determined
+ by processing the whole stream */
+ *bytes = 0ULL;
+ return 1;
+ }
+ return 0;
+}
+
+static int cramfs_image(const void *buf, unsigned long long *bytes)
+{
+ const struct cramfs_super *sb = (const struct cramfs_super *)buf;
+
+ if (sb->magic == CRAMFS_MAGIC) {
+ if (sb->flags & CRAMFS_FLAG_FSID_VERSION_2)
+ *bytes = (unsigned long long)sb->fsid.blocks << 10;
+ else
+ *bytes = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int romfs_image(const void *buf, unsigned long long *bytes)
+{
+ const struct romfs_super_block *sb =
+ (const struct romfs_super_block *)buf;
+
+ if (sb->word0 == ROMSB_WORD0 && sb->word1 == ROMSB_WORD1) {
+ *bytes = __be32_to_cpu(sb->size);
+ return 1;
+ }
+ return 0;
+}
+
+static int minix_image(const void *buf, unsigned long long *bytes)
+{
+ const struct minix_super_block *sb =
+ (const struct minix_super_block *)buf;
+
+ if (sb->s_magic == MINIX_SUPER_MAGIC ||
+ sb->s_magic == MINIX_SUPER_MAGIC2) {
+ *bytes = (unsigned long long)sb->s_nzones
+ << (sb->s_log_zone_size + 10);
+ return 1;
+ }
+ return 0;
+}
+
+static int ext4_image(const void *buf, unsigned long long *bytes)
+{
+ const struct ext3_super_block *sb =
+ (const struct ext3_super_block *)buf;
+
+ if (sb->s_magic != __cpu_to_le16(EXT2_SUPER_MAGIC))
+ return 0;
+
+ /* There is at least one feature not supported by ext3 */
+ if ((sb->s_feature_incompat
+ & __cpu_to_le32(EXT3_FEATURE_INCOMPAT_UNSUPPORTED)) ||
+ (sb->s_feature_ro_compat
+ & __cpu_to_le32(EXT3_FEATURE_RO_COMPAT_UNSUPPORTED))) {
+ *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count)
+ << (10 + __le32_to_cpu(sb->s_log_block_size));
+ return 1;
+ }
+ return 0;
+}
+
+static int ext3_image(const void *buf, unsigned long long *bytes)
+{
+ const struct ext3_super_block *sb =
+ (const struct ext3_super_block *)buf;
+
+ if (sb->s_magic == __cpu_to_le16(EXT2_SUPER_MAGIC) &&
+ sb->
+ s_feature_compat & __cpu_to_le32(EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+ *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count)
+ << (10 + __le32_to_cpu(sb->s_log_block_size));
+ return 1;
+ }
+ return 0;
+}
+
+static int ext2_image(const void *buf, unsigned long long *bytes)
+{
+ const struct ext2_super_block *sb =
+ (const struct ext2_super_block *)buf;
+
+ if (sb->s_magic == __cpu_to_le16(EXT2_SUPER_MAGIC)) {
+ *bytes = (unsigned long long)__le32_to_cpu(sb->s_blocks_count)
+ << (10 + __le32_to_cpu(sb->s_log_block_size));
+ return 1;
+ }
+ return 0;
+}
+
+static int reiserfs_image(const void *buf, unsigned long long *bytes)
+{
+ const struct reiserfs_super_block *sb =
+ (const struct reiserfs_super_block *)buf;
+
+ if (memcmp(REISERFS_MAGIC(sb), REISERFS_SUPER_MAGIC_STRING,
+ sizeof(REISERFS_SUPER_MAGIC_STRING) - 1) == 0 ||
+ memcmp(REISERFS_MAGIC(sb), REISER2FS_SUPER_MAGIC_STRING,
+ sizeof(REISER2FS_SUPER_MAGIC_STRING) - 1) == 0 ||
+ memcmp(REISERFS_MAGIC(sb), REISER2FS_JR_SUPER_MAGIC_STRING,
+ sizeof(REISER2FS_JR_SUPER_MAGIC_STRING) - 1) == 0) {
+ *bytes = (unsigned long long)REISERFS_BLOCK_COUNT(sb) *
+ REISERFS_BLOCKSIZE(sb);
+ return 1;
+ }
+ return 0;
+}
+
+static int reiser4_image(const void *buf, unsigned long long *bytes)
+{
+ const struct reiser4_master_sb *sb =
+ (const struct reiser4_master_sb *)buf;
+
+ if (memcmp(sb->ms_magic, REISER4_SUPER_MAGIC_STRING,
+ sizeof(REISER4_SUPER_MAGIC_STRING) - 1) == 0) {
+ *bytes = (unsigned long long) __le32_to_cpu(sb->ms_format) *
+ __le32_to_cpu(sb->ms_blksize);
+ return 1;
+ }
+ return 0;
+}
+
+static int xfs_image(const void *buf, unsigned long long *bytes)
+{
+ const struct xfs_sb *sb = (const struct xfs_sb *)buf;
+
+ if (__be32_to_cpu(sb->sb_magicnum) == XFS_SB_MAGIC) {
+ *bytes = __be64_to_cpu(sb->sb_dblocks) *
+ __be32_to_cpu(sb->sb_blocksize);
+ return 1;
+ }
+ return 0;
+}
+
+static int jfs_image(const void *buf, unsigned long long *bytes)
+{
+ const struct jfs_superblock *sb = (const struct jfs_superblock *)buf;
+
+ if (!memcmp(sb->s_magic, JFS_MAGIC, 4)) {
+ *bytes = __le64_to_cpu(sb->s_size)
+ << __le16_to_cpu(sb->s_l2pbsize);
+ return 1;
+ }
+ return 0;
+}
+
+static int luks_image(const void *buf, unsigned long long *blocks)
+{
+ const struct luks_partition_header *lph =
+ (const struct luks_partition_header *)buf;
+
+ if (!memcmp(lph->magic, LUKS_MAGIC, LUKS_MAGIC_L)) {
+ /* FSSIZE is dictated by the underlying fs, not by LUKS */
+ *blocks = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int swap_image(const void *buf, unsigned long long *blocks)
+{
+ const struct swap_super_block *ssb =
+ (const struct swap_super_block *)buf;
+
+ if (!memcmp(ssb->magic, SWAP_MAGIC_1, SWAP_MAGIC_L) ||
+ !memcmp(ssb->magic, SWAP_MAGIC_2, SWAP_MAGIC_L)) {
+ *blocks = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int suspend_image(const void *buf, unsigned long long *blocks)
+{
+ const struct swap_super_block *ssb =
+ (const struct swap_super_block *)buf;
+
+ if (!memcmp(ssb->magic, SUSP_MAGIC_1, SUSP_MAGIC_L) ||
+ !memcmp(ssb->magic, SUSP_MAGIC_2, SUSP_MAGIC_L) ||
+ !memcmp(ssb->magic, SUSP_MAGIC_U, SUSP_MAGIC_L)) {
+ *blocks = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int lvm2_image(const void *buf, unsigned long long *blocks)
+{
+ const struct lvm2_super_block *lsb;
+ int i;
+
+ /* We must check every 512 byte sector */
+ for (i = 0; i < BLOCK_SIZE; i += 0x200) {
+ lsb = (const struct lvm2_super_block *)(buf + i);
+
+ if (!memcmp(lsb->magic, LVM2_MAGIC, LVM2_MAGIC_L) &&
+ !memcmp(lsb->type, LVM2_TYPE, LVM2_TYPE_L)) {
+ /* This is just one of possibly many PV's */
+ *blocks = 0;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int iso_image(const void *buf, unsigned long long *blocks)
+{
+ const struct iso_volume_descriptor *isovd =
+ (const struct iso_volume_descriptor *)buf;
+ const struct iso_hs_volume_descriptor *isohsvd =
+ (const struct iso_hs_volume_descriptor *)buf;
+
+ if (!memcmp(isovd->id, ISO_MAGIC, ISO_MAGIC_L) ||
+ !memcmp(isohsvd->id, ISO_HS_MAGIC, ISO_HS_MAGIC_L)) {
+ *blocks = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int squashfs_image(const void *buf, unsigned long long *blocks)
+{
+ const struct squashfs_super_block *sb =
+ (const struct squashfs_super_block *)buf;
+
+ if (sb->s_magic == SQUASHFS_MAGIC
+ || sb->s_magic == SQUASHFS_MAGIC_SWAP
+ || sb->s_magic == SQUASHFS_MAGIC_LZMA
+ || sb->s_magic == SQUASHFS_MAGIC_LZMA_SWAP) {
+ *blocks = (unsigned long long) sb->bytes_used;
+ return 1;
+ }
+ return 0;
+}
+
+static int gfs2_image(const void *buf, unsigned long long *bytes)
+{
+ const struct gfs2_sb *sb =
+ (const struct gfs2_sb *)buf;
+
+ if (__be32_to_cpu(sb->sb_header.mh_magic) == GFS2_MAGIC
+ && (__be32_to_cpu(sb->sb_fs_format) == GFS2_FORMAT_FS
+ || __be32_to_cpu(sb->sb_fs_format) == GFS2_FORMAT_MULTI)) {
+ *bytes = 0; /* cpu_to_be32(sb->sb_bsize) * ?; */
+ return 1;
+ }
+ return 0;
+}
+
+static int ocfs2_image(const void *buf, unsigned long long *bytes)
+{
+ const struct ocfs2_dinode *sb =
+ (const struct ocfs2_dinode *)buf;
+
+ if (!memcmp(sb->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
+ sizeof(OCFS2_SUPER_BLOCK_SIGNATURE) - 1)) {
+ *bytes = 0;
+ return 1;
+ }
+ return 0;
+}
+
+static int nilfs2_image(const void *buf, unsigned long long *bytes)
+{
+ const struct nilfs_super_block *sb =
+ (const struct nilfs_super_block *)buf;
+
+ if (sb->s_magic == __cpu_to_le16(NILFS_SUPER_MAGIC) &&
+ sb->s_rev_level == __cpu_to_le32(2)) {
+ *bytes = (unsigned long long)__le64_to_cpu(sb->s_dev_size);
+ return 1;
+ }
+ return 0;
+}
+
+static int btrfs_image(const void *buf, unsigned long long *bytes)
+{
+ const struct btrfs_super_block *sb =
+ (const struct btrfs_super_block *)buf;
+
+ if (!memcmp(sb->magic, BTRFS_MAGIC, BTRFS_MAGIC_L)) {
+ *bytes = (unsigned long long)__le64_to_cpu(sb->total_bytes);
+ return 1;
+ }
+ return 0;
+}
+
+struct imagetype {
+ off_t block;
+ const char name[12];
+ int (*identify) (const void *, unsigned long long *);
+};
+
+/*
+ * Note:
+ *
+ * Minix test needs to come after ext3/ext2, since it's possible for
+ * ext3/ext2 to look like minix by pure random chance.
+ *
+ * LVM comes after all other filesystems since it's possible
+ * that an old lvm signature is left on the disk if pvremove
+ * is not used before creating the new fs.
+ *
+ * The same goes for LUKS as for LVM.
+ */
+static struct imagetype images[] = {
+ {0, "gzip", gzip_image},
+ {0, "cramfs", cramfs_image},
+ {0, "romfs", romfs_image},
+ {0, "xfs", xfs_image},
+ {0, "squashfs", squashfs_image},
+ {1, "ext4", ext4_image},
+ {1, "ext3", ext3_image},
+ {1, "ext2", ext2_image},
+ {1, "minix", minix_image},
+ {1, "nilfs2", nilfs2_image},
+ {2, "ocfs2", ocfs2_image},
+ {8, "reiserfs", reiserfs_image},
+ {64, "reiserfs", reiserfs_image},
+ {64, "reiser4", reiser4_image},
+ {64, "gfs2", gfs2_image},
+ {64, "btrfs", btrfs_image},
+ {32, "jfs", jfs_image},
+ {32, "iso9660", iso_image},
+ {0, "luks", luks_image},
+ {0, "lvm2", lvm2_image},
+ {1, "lvm2", lvm2_image},
+ {-1, "swap", swap_image},
+ {-1, "suspend", suspend_image},
+ {0, "", NULL}
+};
+
+int identify_fs(int fd, const char **fstype,
+ unsigned long long *bytes, off_t offset)
+{
+ uint64_t buf[BLOCK_SIZE >> 3]; /* 64-bit worst case alignment */
+ off_t cur_block = (off_t) -1;
+ struct imagetype *ip;
+ int ret;
+ unsigned long long dummy;
+
+ if (!bytes)
+ bytes = &dummy;
+
+ *fstype = NULL;
+ *bytes = 0;
+
+ for (ip = images; ip->identify; ip++) {
+ /* Hack for swap, which apparently is dependent on page size */
+ if (ip->block == -1)
+ ip->block = SWAP_OFFSET();
+
+ if (cur_block != ip->block) {
+ /*
+ * Read block.
+ */
+ cur_block = ip->block;
+ ret = pread(fd, buf, BLOCK_SIZE,
+ offset + cur_block * BLOCK_SIZE);
+ if (ret != BLOCK_SIZE)
+ return -1; /* error */
+ }
+
+ if (ip->identify(buf, bytes)) {
+ *fstype = ip->name;
+ return 0;
+ }
+ }
+
+ return 1; /* Unknown filesystem */
+}
diff --git a/usr/kinit/fstype/fstype.h b/usr/kinit/fstype/fstype.h
new file mode 100644
index 0000000..be2a3e4
--- /dev/null
+++ b/usr/kinit/fstype/fstype.h
@@ -0,0 +1,20 @@
+/*
+ * by rmk
+ *
+ * Detect filesystem type (on stdin) and output strings for two
+ * environment variables:
+ * FSTYPE - filesystem type
+ * FSSIZE - filesystem size (if known)
+ *
+ * We currently detect the fs listed in struct imagetype.
+ */
+
+#ifndef FSTYPE_H
+#define FSTYPE_H
+
+#include <unistd.h>
+
+int identify_fs(int fd, const char **fstype,
+ unsigned long long *bytes, off_t offset);
+
+#endif
diff --git a/usr/kinit/fstype/gfs2_fs.h b/usr/kinit/fstype/gfs2_fs.h
new file mode 100644
index 0000000..028e0c9
--- /dev/null
+++ b/usr/kinit/fstype/gfs2_fs.h
@@ -0,0 +1,56 @@
+#ifndef __GFS2_FS_H
+#define __GFS2_FS_H
+
+#define GFS2_MAGIC 0x01161970
+#define GFS2_FORMAT_FS 1801
+#define GFS2_FORMAT_MULTI 1900
+
+
+/*
+ * An on-disk inode number
+ */
+struct gfs2_inum {
+ __be64 no_formal_ino;
+ __be64 no_addr;
+};
+
+/*
+ * Generic metadata head structure
+ * Every inplace buffer logged in the journal must start with this.
+ */
+struct gfs2_meta_header {
+ uint32_t mh_magic;
+ uint32_t mh_type;
+ uint64_t __pad0; /* Was generation number in gfs1 */
+ uint32_t mh_format;
+ uint32_t __pad1; /* Was incarnation number in gfs1 */
+};
+
+/* Requirement: GFS2_LOCKNAME_LEN % 8 == 0
+ * Includes: the fencing zero at the end */
+#define GFS2_LOCKNAME_LEN 64
+
+/*
+ * super-block structure
+ */
+struct gfs2_sb {
+ struct gfs2_meta_header sb_header;
+
+ uint32_t sb_fs_format;
+ uint32_t sb_multihost_format;
+ uint32_t __pad0; /* Was superblock flags in gfs1 */
+
+ uint32_t sb_bsize;
+ uint32_t sb_bsize_shift;
+ uint32_t __pad1; /* Was journal segment size in gfs1 */
+
+ struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */
+ struct gfs2_inum __pad2; /* Was rindex dinode in gfs1 */
+ struct gfs2_inum sb_root_dir;
+
+ char sb_lockproto[GFS2_LOCKNAME_LEN];
+ char sb_locktable[GFS2_LOCKNAME_LEN];
+ /* In gfs1, quota and license dinodes followed */
+} __attribute__ ((__packed__));
+
+#endif /* __GFS2_FS_H */
diff --git a/usr/kinit/fstype/iso9660_sb.h b/usr/kinit/fstype/iso9660_sb.h
new file mode 100644
index 0000000..efe0733
--- /dev/null
+++ b/usr/kinit/fstype/iso9660_sb.h
@@ -0,0 +1,24 @@
+#ifndef __ISO9660_SB_H
+#define __ISO9660_SB_H
+
+#define ISO_MAGIC_L 5
+#define ISO_MAGIC "CD001"
+#define ISO_HS_MAGIC_L 5
+#define ISO_HS_MAGIC "CDROM"
+
+/* ISO9660 Volume Descriptor */
+struct iso_volume_descriptor {
+ __u8 type;
+ char id[ISO_MAGIC_L];
+ __u8 version;
+};
+
+/* High Sierra Volume Descriptor */
+struct iso_hs_volume_descriptor {
+ char foo[8];
+ __u8 type;
+ char id[ISO_HS_MAGIC_L];
+ __u8 version;
+};
+
+#endif
diff --git a/usr/kinit/fstype/jfs_superblock.h b/usr/kinit/fstype/jfs_superblock.h
new file mode 100644
index 0000000..63132a0
--- /dev/null
+++ b/usr/kinit/fstype/jfs_superblock.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2000-2003
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _H_JFS_SUPERBLOCK
+#define _H_JFS_SUPERBLOCK
+
+struct timestruc_t {
+ __le32 tv_sec;
+ __le32 tv_nsec;
+};
+
+/*
+ * make the magic number something a human could read
+ */
+#define JFS_MAGIC "JFS1" /* Magic word */
+
+#define JFS_VERSION 2 /* Version number: Version 2 */
+
+#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */
+
+/*
+ * aggregate superblock
+ *
+ * The name superblock is too close to super_block, so the name has been
+ * changed to jfs_superblock. The utilities are still using the old name.
+ */
+struct jfs_superblock {
+ char s_magic[4]; /* 4: magic number */
+ __le32 s_version; /* 4: version number */
+
+ __le64 s_size; /* 8: aggregate size in hardware/LVM blocks;
+ * VFS: number of blocks
+ */
+ __le32 s_bsize; /* 4: aggregate block size in bytes;
+ * VFS: fragment size
+ */
+ __le16 s_l2bsize; /* 2: log2 of s_bsize */
+ __le16 s_l2bfactor; /* 2: log2(s_bsize/hardware block size) */
+ __le32 s_pbsize; /* 4: hardware/LVM block size in bytes */
+ __le16 s_l2pbsize; /* 2: log2 of s_pbsize */
+ __le16 pad; /* 2: padding necessary for alignment */
+
+ __le32 s_agsize; /* 4: allocation group size in aggr. blocks */
+
+ __le32 s_flag; /* 4: aggregate attributes:
+ * see jfs_filsys.h
+ */
+ __le32 s_state; /* 4: mount/unmount/recovery state:
+ * see jfs_filsys.h
+ */
+ __le32 s_compress; /* 4: > 0 if data compression */
+
+ __le64 s_ait2; /* 8: first extent of secondary
+ * aggregate inode table
+ */
+
+ __le64 s_aim2; /* 8: first extent of secondary
+ * aggregate inode map
+ */
+ __le32 s_logdev; /* 4: device address of log */
+ __le32 s_logserial; /* 4: log serial number at aggregate mount */
+ __le64 s_logpxd; /* 8: inline log extent */
+
+ __le64 s_fsckpxd; /* 8: inline fsck work space extent */
+
+ struct timestruc_t s_time; /* 8: time last updated */
+
+ __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for
+ * the fsck service log.
+ * N.B. These blocks are divided among the
+ * versions kept. This is not a per
+ * version size.
+ * N.B. These blocks are included in the
+ * length field of s_fsckpxd.
+ */
+ char s_fscklog; /* 1: which fsck service log is most recent
+ * 0 => no service log data yet
+ * 1 => the first one
+ * 2 => the 2nd one
+ */
+ char s_fpack[11]; /* 11: file system volume name
+ * N.B. This must be 11 bytes to
+ * conform with the OS/2 BootSector
+ * requirements
+ * Only used when s_version is 1
+ */
+
+ /* extendfs() parameter under s_state & FM_EXTENDFS */
+ __le64 s_xsize; /* 8: extendfs s_size */
+ __le64 s_xfsckpxd; /* 8: extendfs fsckpxd */
+ __le64 s_xlogpxd; /* 8: extendfs logpxd */
+ /* - 128 byte boundary - */
+
+ char s_uuid[16]; /* 16: 128-bit uuid for volume */
+ char s_label[16]; /* 16: volume label */
+ char s_loguuid[16]; /* 16: 128-bit uuid for log device */
+
+};
+
+#endif /*_H_JFS_SUPERBLOCK */
diff --git a/usr/kinit/fstype/luks_fs.h b/usr/kinit/fstype/luks_fs.h
new file mode 100644
index 0000000..fd8de31
--- /dev/null
+++ b/usr/kinit/fstype/luks_fs.h
@@ -0,0 +1,44 @@
+#ifndef __LINUX_LUKS_FS_H
+#define __LINUX_LUKS_FS_H
+
+/* The basic structures of the luks partition header */
+#define LUKS_MAGIC_L 6
+#define LUKS_CIPHERNAME_L 32
+#define LUKS_CIPHERMODE_L 32
+#define LUKS_HASHSPEC_L 32
+#define LUKS_UUID_STRING_L 40
+
+#define LUKS_MAGIC "LUKS\xBA\xBE"
+#define LUKS_DIGESTSIZE 20
+#define LUKS_SALTSIZE 32
+#define LUKS_NUMKEYS 8
+#define LUKS_MKD_ITER 10
+#define LUKS_KEY_DISABLED 0x0000DEAD
+#define LUKS_KEY_ENABLED 0x00AC71F3
+#define LUKS_STRIPES 4000
+
+/* On-disk "super block" */
+struct luks_partition_header {
+ char magic[LUKS_MAGIC_L];
+ __be16 version;
+ char cipherName[LUKS_CIPHERNAME_L];
+ char cipherMode[LUKS_CIPHERMODE_L];
+ char hashSpec[LUKS_HASHSPEC_L];
+ __be32 payloadOffset;
+ __be32 keyBytes;
+ char mkDigest[LUKS_DIGESTSIZE];
+ char mkDigestSalt[LUKS_SALTSIZE];
+ __be32 mkDigestIterations;
+ char uuid[LUKS_UUID_STRING_L];
+
+ struct {
+ __be32 active;
+ /* Parameters for PBKDF2 processing */
+ __be32 passwordIterations;
+ char passwordSalt[LUKS_SALTSIZE];
+ __be32 keyMaterialOffset;
+ __be32 stripes;
+ } keyblock[LUKS_NUMKEYS];
+};
+
+#endif
diff --git a/usr/kinit/fstype/lvm2_sb.h b/usr/kinit/fstype/lvm2_sb.h
new file mode 100644
index 0000000..75dfc10
--- /dev/null
+++ b/usr/kinit/fstype/lvm2_sb.h
@@ -0,0 +1,18 @@
+#ifndef __LVM2_SB_H
+#define __LVM2_SB_H
+
+/* LVM2 super block definitions */
+#define LVM2_MAGIC_L 8
+#define LVM2_MAGIC "LABELONE"
+#define LVM2_TYPE_L 8
+#define LVM2_TYPE "LVM2 001"
+
+struct lvm2_super_block {
+ char magic[LVM2_MAGIC_L];
+ __be64 sector;
+ __be32 crc;
+ __be32 offset;
+ char type[LVM2_TYPE_L];
+};
+
+#endif
diff --git a/usr/kinit/fstype/main.c b/usr/kinit/fstype/main.c
new file mode 100644
index 0000000..9162bdf
--- /dev/null
+++ b/usr/kinit/fstype/main.c
@@ -0,0 +1,57 @@
+/*
+ * by rmk
+ *
+ * Detect filesystem type (on stdin) and output strings for two
+ * environment variables:
+ * FSTYPE - filesystem type
+ * FSSIZE - filesystem size (if known)
+ *
+ * We currently detect (in order):
+ * gzip, cramfs, romfs, xfs, minix, ext3, ext2, reiserfs, jfs
+ *
+ * MINIX, ext3 and Reiserfs bits are currently untested.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include "fstype.h"
+
+char *progname;
+
+int main(int argc, char *argv[])
+{
+ int fd = 0;
+ int rv;
+ const char *fstype;
+ const char *file = "stdin";
+ unsigned long long bytes;
+
+ progname = argv[0];
+
+ if (argc > 2) {
+ fprintf(stderr, "Usage: %s [file]\n", progname);
+ return 1;
+ }
+
+ if (argc > 1 && !(argv[1][0] == '-' && argv[1][1] == '\0')) {
+ fd = open(file = argv[1], O_RDONLY);
+ if (fd < 0) {
+ perror(argv[1]);
+ return 2;
+ }
+ }
+
+ rv = identify_fs(fd, &fstype, &bytes, 0);
+ if (rv == -1) {
+ perror(file);
+ return 2;
+ }
+
+ fstype = fstype ? fstype : "unknown";
+
+ fprintf(stdout, "FSTYPE=%s\nFSSIZE=%llu\n", fstype, bytes);
+ return rv;
+}
diff --git a/usr/kinit/fstype/minix_fs.h b/usr/kinit/fstype/minix_fs.h
new file mode 100644
index 0000000..e2899f0
--- /dev/null
+++ b/usr/kinit/fstype/minix_fs.h
@@ -0,0 +1,85 @@
+#ifndef _LINUX_MINIX_FS_H
+#define _LINUX_MINIX_FS_H
+
+/*
+ * The minix filesystem constants/structures
+ */
+
+/*
+ * Thanks to Kees J Bot for sending me the definitions of the new
+ * minix filesystem (aka V2) with bigger inodes and 32-bit block
+ * pointers.
+ */
+
+#define MINIX_ROOT_INO 1
+
+/* Not the same as the bogus LINK_MAX in <linux/limits.h>. Oh well. */
+#define MINIX_LINK_MAX 250
+#define MINIX2_LINK_MAX 65530
+
+#define MINIX_I_MAP_SLOTS 8
+#define MINIX_Z_MAP_SLOTS 64
+#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */
+#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */
+#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */
+#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */
+#define MINIX_VALID_FS 0x0001 /* Clean fs. */
+#define MINIX_ERROR_FS 0x0002 /* fs has errors. */
+
+#define MINIX_INODES_PER_BLOCK ((BLOCK_SIZE)/(sizeof (struct minix_inode)))
+#define MINIX2_INODES_PER_BLOCK ((BLOCK_SIZE)/(sizeof (struct minix2_inode)))
+
+/*
+ * This is the original minix inode layout on disk.
+ * Note the 8-bit gid and atime and ctime.
+ */
+struct minix_inode {
+ __u16 i_mode;
+ __u16 i_uid;
+ __u32 i_size;
+ __u32 i_time;
+ __u8 i_gid;
+ __u8 i_nlinks;
+ __u16 i_zone[9];
+};
+
+/*
+ * The new minix inode has all the time entries, as well as
+ * long block numbers and a third indirect block (7+1+1+1
+ * instead of 7+1+1). Also, some previously 8-bit values are
+ * now 16-bit. The inode is now 64 bytes instead of 32.
+ */
+struct minix2_inode {
+ __u16 i_mode;
+ __u16 i_nlinks;
+ __u16 i_uid;
+ __u16 i_gid;
+ __u32 i_size;
+ __u32 i_atime;
+ __u32 i_mtime;
+ __u32 i_ctime;
+ __u32 i_zone[10];
+};
+
+/*
+ * minix super-block data on disk
+ */
+struct minix_super_block {
+ __u16 s_ninodes;
+ __u16 s_nzones;
+ __u16 s_imap_blocks;
+ __u16 s_zmap_blocks;
+ __u16 s_firstdatazone;
+ __u16 s_log_zone_size;
+ __u32 s_max_size;
+ __u16 s_magic;
+ __u16 s_state;
+ __u32 s_zones;
+};
+
+struct minix_dir_entry {
+ __u16 inode;
+ char name[0];
+};
+
+#endif
diff --git a/usr/kinit/fstype/nilfs_fs.h b/usr/kinit/fstype/nilfs_fs.h
new file mode 100644
index 0000000..0845edf
--- /dev/null
+++ b/usr/kinit/fstype/nilfs_fs.h
@@ -0,0 +1,64 @@
+#ifndef __NILFS_FS_H
+#define __NILFS_FS_H
+
+#define NILFS_SUPER_MAGIC 0x3434 /* NILFS filesystem magic number */
+
+/*
+ * struct nilfs_super_block - structure of super block on disk
+ */
+struct nilfs_super_block {
+ __le32 s_rev_level; /* Revision level */
+ __le16 s_minor_rev_level; /* minor revision level */
+ __le16 s_magic; /* Magic signature */
+
+ __le16 s_bytes; /* Bytes count of CRC calculation
+ for this structure. s_reserved
+ is excluded. */
+ __le16 s_flags; /* flags */
+ __le32 s_crc_seed; /* Seed value of CRC calculation */
+ __le32 s_sum; /* Check sum of super block */
+
+ __le32 s_log_block_size; /* Block size represented as follows
+ blocksize = 1 << (s_log_block_size + 10) */
+ __le64 s_nsegments; /* Number of segments in filesystem */
+ __le64 s_dev_size; /* block device size in bytes */
+ __le64 s_first_data_block; /* 1st seg disk block number */
+ __le32 s_blocks_per_segment; /* number of blocks per full segment */
+ __le32 s_r_segments_percentage;/* Reserved segments percentage */ /* or __le16 */
+
+ __le64 s_last_cno; /* Last checkpoint number */
+ __le64 s_last_pseg; /* disk block addr pseg written last */
+ __le64 s_last_seq; /* seq. number of seg written last */
+ __le64 s_free_blocks_count; /* Free blocks count */
+
+ __le64 s_ctime; /* Creation time (execution time of newfs) */
+ __le64 s_mtime; /* Mount time */
+ __le64 s_wtime; /* Write time */
+ __le16 s_mnt_count; /* Mount count */
+ __le16 s_max_mnt_count; /* Maximal mount count */
+ __le16 s_state; /* File system state */
+ __le16 s_errors; /* Behaviour when detecting errors */
+ __le64 s_lastcheck; /* time of last check */
+
+ __le32 s_checkinterval; /* max. time between checks */
+ __le32 s_creator_os; /* OS */
+ __le16 s_def_resuid; /* Default uid for reserved blocks */
+ __le16 s_def_resgid; /* Default gid for reserved blocks */
+ __le32 s_first_ino; /* First non-reserved inode */ /* or __le16 */
+
+ __le16 s_inode_size; /* Size of an inode */
+ __le16 s_dat_entry_size; /* Size of a dat entry */
+ __le16 s_checkpoint_size; /* Size of a checkpoint */
+ __le16 s_segment_usage_size; /* Size of a segment usage */
+
+ __u8 s_uuid[16]; /* 128-bit uuid for volume */
+ char s_volume_name[16]; /* volume name */
+ char s_last_mounted[64]; /* directory where last mounted */
+
+ __le32 s_c_interval; /* Commit interval of segment */
+ __le32 s_c_block_max; /* Threshold of data amount for
+ the segment construction */
+ __u32 s_reserved[192]; /* padding to the end of the block */
+};
+
+#endif /* __NILFS_FS_H */
diff --git a/usr/kinit/fstype/ocfs2_fs.h b/usr/kinit/fstype/ocfs2_fs.h
new file mode 100644
index 0000000..b71cb61
--- /dev/null
+++ b/usr/kinit/fstype/ocfs2_fs.h
@@ -0,0 +1,90 @@
+#ifndef _OCFS2_FS_H
+#define _OCFS2_FS_H
+
+/* Object signatures */
+#define OCFS2_SUPER_BLOCK_SIGNATURE "OCFSV2"
+
+#define OCFS2_VOL_UUID_LEN 16
+#define OCFS2_MAX_VOL_LABEL_LEN 64
+
+/*
+ * On disk superblock for OCFS2
+ * Note that it is contained inside an ocfs2_dinode, so all offsets
+ * are relative to the start of ocfs2_dinode.id2.
+ */
+struct ocfs2_super_block {
+/*00*/ uint16_t s_major_rev_level;
+ uint16_t s_minor_rev_level;
+ uint16_t s_mnt_count;
+ int16_t s_max_mnt_count;
+ uint16_t s_state; /* File system state */
+ uint16_t s_errors; /* Behaviour when detecting errors */
+ uint32_t s_checkinterval; /* Max time between checks */
+/*10*/ uint64_t s_lastcheck; /* Time of last check */
+ uint32_t s_creator_os; /* OS */
+ uint32_t s_feature_compat; /* Compatible feature set */
+/*20*/ uint32_t s_feature_incompat; /* Incompatible feature set */
+ uint32_t s_feature_ro_compat; /* Readonly-compatible feature set */
+ uint64_t s_root_blkno; /* Offset, in blocks, of root directory
+ dinode */
+/*30*/ uint64_t s_system_dir_blkno; /* Offset, in blocks, of system
+ directory dinode */
+ uint32_t s_blocksize_bits; /* Blocksize for this fs */
+ uint32_t s_clustersize_bits; /* Clustersize for this fs */
+/*40*/ uint16_t s_max_slots; /* Max number of simultaneous mounts
+ before tunefs required */
+ uint16_t s_reserved1;
+ uint32_t s_reserved2;
+ uint64_t s_first_cluster_group; /* Block offset of 1st cluster
+ * group header */
+/*50*/ uint8_t s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
+/*90*/ uint8_t s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */
+/*A0*/
+} __attribute__ ((packed));
+
+/*
+ * On disk inode for OCFS2
+ */
+struct ocfs2_dinode {
+/*00*/ uint8_t i_signature[8]; /* Signature for validation */
+ uint32_t i_generation; /* Generation number */
+ uint16_t i_suballoc_slot; /* Slot suballocator this inode
+ belongs to */
+ int16_t i_suballoc_bit; /* Bit offset in suballocator
+ block group */
+/*10*/ uint32_t i_reserved0;
+ uint32_t i_clusters; /* Cluster count */
+ uint32_t i_uid; /* Owner UID */
+ uint32_t i_gid; /* Owning GID */
+/*20*/ uint64_t i_size; /* Size in bytes */
+ uint16_t i_mode; /* File mode */
+ uint16_t i_links_count; /* Links count */
+ uint32_t i_flags; /* File flags */
+/*30*/ uint64_t i_atime; /* Access time */
+ uint64_t i_ctime; /* Creation time */
+/*40*/ uint64_t i_mtime; /* Modification time */
+ uint64_t i_dtime; /* Deletion time */
+/*50*/ uint64_t i_blkno; /* Offset on disk, in blocks */
+ uint64_t i_last_eb_blk; /* Pointer to last extent
+ block */
+/*60*/ uint32_t i_fs_generation; /* Generation per fs-instance */
+ uint32_t i_atime_nsec;
+ uint32_t i_ctime_nsec;
+ uint32_t i_mtime_nsec;
+ uint32_t i_attr;
+ uint16_t i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
+ was set in i_flags */
+ uint16_t i_reserved1;
+/*70*/ uint64_t i_reserved2[8];
+/*B8*/ uint64_t i_pad1;
+ uint64_t i_rdev; /* Device number */
+ uint32_t i_used; /* Bits (ie, clusters) used */
+ uint32_t i_total; /* Total bits (clusters)
+ available */
+ uint32_t ij_flags; /* Mounted, version, etc. */
+ uint32_t ij_pad;
+/*C0*/ struct ocfs2_super_block i_super;
+/* Actual on-disk size is one block */
+} __attribute__ ((packed));
+
+#endif /* _OCFS2_FS_H */
diff --git a/usr/kinit/fstype/reiser4_fs.h b/usr/kinit/fstype/reiser4_fs.h
new file mode 100644
index 0000000..af6ccc4
--- /dev/null
+++ b/usr/kinit/fstype/reiser4_fs.h
@@ -0,0 +1,31 @@
+#ifndef __REISER4_FS_H
+#define __REISER4_FS_H
+
+#define SS_MAGIC_SIZE 16
+
+/* reiser4 filesystem structure
+ *
+ * Master super block structure. It is the same for all reiser4 filesystems,
+ * so, we can declare it here. It contains common for all format fields like
+ * block size etc.
+ */
+struct reiser4_master_sb {
+ /* Master super block magic. */
+ char ms_magic[SS_MAGIC_SIZE];
+
+ /* Disk format in use. */
+ __u16 ms_format;
+
+ /* Filesyetem block size in use. */
+ __u16 ms_blksize;
+
+ /* Filesyetm uuid in use. */
+ char ms_uuid[SS_MAGIC_SIZE];
+
+ /* Filesystem label in use. */
+ char ms_label[SS_MAGIC_SIZE];
+} __attribute__ ((packed));
+
+#define REISER4_SUPER_MAGIC_STRING "ReIsEr4"
+
+#endif /* __REISER4_FS_H */
diff --git a/usr/kinit/fstype/reiserfs_fs.h b/usr/kinit/fstype/reiserfs_fs.h
new file mode 100644
index 0000000..096d505
--- /dev/null
+++ b/usr/kinit/fstype/reiserfs_fs.h
@@ -0,0 +1,74 @@
+#ifndef __REISERFS_FS_H
+#define __REISERFS_FS_H
+
+struct journal_params {
+ __u32 jp_journal_1st_block; /* where does journal start from on its
+ * device */
+ __u32 jp_journal_dev; /* journal device st_rdev */
+ __u32 jp_journal_size; /* size of the journal */
+ __u32 jp_journal_trans_max; /* max number of blocks in a transaction. */
+ __u32 jp_journal_magic; /* random value made on fs creation (this
+ * was sb_journal_block_count) */
+ __u32 jp_journal_max_batch; /* max number of blocks to batch into a
+ * trans */
+ __u32 jp_journal_max_commit_age; /* in seconds, how old can an async
+ * commit be */
+ __u32 jp_journal_max_trans_age; /* in seconds, how old can a transaction
+ * be */
+};
+
+/* this is the super from 3.5.X, where X >= 10 */
+struct reiserfs_super_block_v1 {
+ __u32 s_block_count; /* blocks count */
+ __u32 s_free_blocks; /* free blocks count */
+ __u32 s_root_block; /* root block number */
+ struct journal_params s_journal;
+ __u16 s_blocksize; /* block size */
+ __u16 s_oid_maxsize; /* max size of object id array, see
+ * get_objectid() commentary */
+ __u16 s_oid_cursize; /* current size of object id array */
+ __u16 s_umount_state; /* this is set to 1 when filesystem was
+ * umounted, to 2 - when not */
+ char s_magic[10]; /* reiserfs magic string indicates that
+ * file system is reiserfs:
+ * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
+ __u16 s_fs_state; /* it is set to used by fsck to mark which
+ * phase of rebuilding is done */
+ __u32 s_hash_function_code; /* indicate, what hash function is being use
+ * to sort names in a directory*/
+ __u16 s_tree_height; /* height of disk tree */
+ __u16 s_bmap_nr; /* amount of bitmap blocks needed to address
+ * each block of file system */
+ __u16 s_version; /* this field is only reliable on filesystem
+ * with non-standard journal */
+ __u16 s_reserved_for_journal; /* size in blocks of journal area on main
+ * device, we need to keep after
+ * making fs with non-standard journal */
+} __attribute__ ((__packed__));
+
+/* this is the on disk super block */
+struct reiserfs_super_block {
+ struct reiserfs_super_block_v1 s_v1;
+ __u32 s_inode_generation;
+ __u32 s_flags; /* Right now used only by inode-attributes, if enabled */
+ unsigned char s_uuid[16]; /* filesystem unique identifier */
+ unsigned char s_label[16]; /* filesystem volume label */
+ char s_unused[88]; /* zero filled by mkreiserfs and
+ * reiserfs_convert_objectid_map_v1()
+ * so any additions must be updated
+ * there as well. */
+} __attribute__ ((__packed__));
+
+#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
+#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
+#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
+
+#define SB_V1_DISK_SUPER_BLOCK(s) (&((s)->s_v1))
+#define REISERFS_BLOCKSIZE(s) \
+ __le32_to_cpu((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize))
+#define REISERFS_BLOCK_COUNT(s) \
+ __le32_to_cpu((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count))
+#define REISERFS_MAGIC(s) \
+ (SB_V1_DISK_SUPER_BLOCK(s)->s_magic)
+
+#endif /* __REISERFS_FS_H */
diff --git a/usr/kinit/fstype/romfs_fs.h b/usr/kinit/fstype/romfs_fs.h
new file mode 100644
index 0000000..c490fbc
--- /dev/null
+++ b/usr/kinit/fstype/romfs_fs.h
@@ -0,0 +1,56 @@
+#ifndef __LINUX_ROMFS_FS_H
+#define __LINUX_ROMFS_FS_H
+
+/* The basic structures of the romfs filesystem */
+
+#define ROMBSIZE BLOCK_SIZE
+#define ROMBSBITS BLOCK_SIZE_BITS
+#define ROMBMASK (ROMBSIZE-1)
+#define ROMFS_MAGIC 0x7275
+
+#define ROMFS_MAXFN 128
+
+#define __mkw(h,l) (((h)&0x00ff)<< 8|((l)&0x00ff))
+#define __mkl(h,l) (((h)&0xffff)<<16|((l)&0xffff))
+#define __mk4(a,b,c,d) cpu_to_be32(__mkl(__mkw(a,b),__mkw(c,d)))
+#define ROMSB_WORD0 __mk4('-','r','o','m')
+#define ROMSB_WORD1 __mk4('1','f','s','-')
+
+/* On-disk "super block" */
+
+struct romfs_super_block {
+ __be32 word0;
+ __be32 word1;
+ __be32 size;
+ __be32 checksum;
+ char name[0]; /* volume name */
+};
+
+/* On disk inode */
+
+struct romfs_inode {
+ __be32 next; /* low 4 bits see ROMFH_ */
+ __be32 spec;
+ __be32 size;
+ __be32 checksum;
+ char name[0];
+};
+
+#define ROMFH_TYPE 7
+#define ROMFH_HRD 0
+#define ROMFH_DIR 1
+#define ROMFH_REG 2
+#define ROMFH_SYM 3
+#define ROMFH_BLK 4
+#define ROMFH_CHR 5
+#define ROMFH_SCK 6
+#define ROMFH_FIF 7
+#define ROMFH_EXEC 8
+
+/* Alignment */
+
+#define ROMFH_SIZE 16
+#define ROMFH_PAD (ROMFH_SIZE-1)
+#define ROMFH_MASK (~ROMFH_PAD)
+
+#endif
diff --git a/usr/kinit/fstype/squashfs_fs.h b/usr/kinit/fstype/squashfs_fs.h
new file mode 100644
index 0000000..c18365d
--- /dev/null
+++ b/usr/kinit/fstype/squashfs_fs.h
@@ -0,0 +1,48 @@
+#ifndef __SQUASHFS_FS_H
+#define __SQUASHFS_FS_H
+
+/*
+ * Squashfs
+ */
+
+#define SQUASHFS_MAGIC 0x73717368
+#define SQUASHFS_MAGIC_SWAP 0x68737173
+
+/*
+ * Squashfs + LZMA
+ */
+
+#define SQUASHFS_MAGIC_LZMA 0x71736873
+#define SQUASHFS_MAGIC_LZMA_SWAP 0x73687371
+
+/* definitions for structures on disk */
+struct squashfs_super_block {
+ unsigned int s_magic;
+ unsigned int inodes;
+ unsigned int bytes_used_2;
+ unsigned int uid_start_2;
+ unsigned int guid_start_2;
+ unsigned int inode_table_start_2;
+ unsigned int directory_table_start_2;
+ unsigned int s_major:16;
+ unsigned int s_minor:16;
+ unsigned int block_size_1:16;
+ unsigned int block_log:16;
+ unsigned int flags:8;
+ unsigned int no_uids:8;
+ unsigned int no_guids:8;
+ unsigned int mkfs_time /* time of filesystem creation */;
+ long long root_inode;
+ unsigned int block_size;
+ unsigned int fragments;
+ unsigned int fragment_table_start_2;
+ long long bytes_used;
+ long long uid_start;
+ long long guid_start;
+ long long inode_table_start;
+ long long directory_table_start;
+ long long fragment_table_start;
+ long long lookup_table_start;
+} __attribute__ ((packed));
+
+#endif /* __SQUASHFS_FS_H */
diff --git a/usr/kinit/fstype/swap_fs.h b/usr/kinit/fstype/swap_fs.h
new file mode 100644
index 0000000..7b7fddb
--- /dev/null
+++ b/usr/kinit/fstype/swap_fs.h
@@ -0,0 +1,25 @@
+#ifndef __LINUX_SWAP_FS_H
+#define __LINUX_SWAP_FS_H
+
+/* The basic structures of the swap super block */
+#define SWAP_MAGIC_L 10
+#define SWAP_RESERVED_L (1024 - SWAP_MAGIC_L)
+#define SWAP_MAGIC_1 "SWAP-SPACE"
+#define SWAP_MAGIC_2 "SWAPSPACE2"
+
+/* Suspend signatures, located at same addr as swap magic */
+#define SUSP_MAGIC_L 9
+#define SUSP_MAGIC_1 "S1SUSPEND"
+#define SUSP_MAGIC_2 "S2SUSPEND"
+#define SUSP_MAGIC_U "ULSUSPEND"
+
+/* The superblock is the last block in the first page */
+#define SWAP_OFFSET() ((getpagesize() - 1024) >> 10)
+
+/* On-disk "super block" */
+struct swap_super_block {
+ char reserved[SWAP_RESERVED_L];
+ char magic[SWAP_MAGIC_L];
+};
+
+#endif
diff --git a/usr/kinit/fstype/xfs_sb.h b/usr/kinit/fstype/xfs_sb.h
new file mode 100644
index 0000000..fd54bc4
--- /dev/null
+++ b/usr/kinit/fstype/xfs_sb.h
@@ -0,0 +1,21 @@
+#ifndef __XFS_SB_H
+#define __XFS_SB_H
+
+/*
+ * Super block
+ * Fits into a sector-sized buffer at address 0 of each allocation group.
+ * Only the first of these is ever updated except during growfs.
+ */
+
+struct xfs_buf;
+struct xfs_mount;
+
+#define XFS_SB_MAGIC 0x58465342 /* 'XFSB' */
+
+typedef struct xfs_sb {
+ __u32 sb_magicnum; /* magic number == XFS_SB_MAGIC */
+ __u32 sb_blocksize; /* logical block size, bytes */
+ __u64 sb_dblocks; /* number of data blocks */
+} xfs_sb_t;
+
+#endif /* __XFS_SB_H */
diff --git a/usr/kinit/getarg.c b/usr/kinit/getarg.c
new file mode 100644
index 0000000..fcce247
--- /dev/null
+++ b/usr/kinit/getarg.c
@@ -0,0 +1,57 @@
+#include <string.h>
+#include "kinit.h"
+
+/*
+ * Routines that hunt for a specific argument. Please note that
+ * they actually search the array backwards. That is because on the
+ * kernel command lines, it's legal to override an earlier argument
+ * with a later argument.
+ */
+
+/*
+ * Was this boolean argument passed? If so return the index in the
+ * argv array for it. For conflicting boolean options, use the
+ * one with the higher index. The only case when the return value
+ * can be equal, is when they're both zero; so equality can be used
+ * as the default option choice.
+ *
+ * In other words, if two options "a" and "b" are opposites, and "a"
+ * is the default, this can be coded as:
+ *
+ * if (get_flag(argc,argv,"a") >= get_flag(argc,argv,"b"))
+ * do_a_stuff();
+ * else
+ * do_b_stuff();
+ */
+int get_flag(int argc, char *argv[], const char *name)
+{
+ int i;
+
+ for (i = argc-1; i > 0; i--) {
+ if (!strcmp(argv[i], name))
+ return i;
+ }
+ return 0;
+}
+
+/*
+ * Was this textual parameter (foo=option) passed?
+ *
+ * This returns the latest instance of such an option in the argv array.
+ */
+char *get_arg(int argc, char *argv[], const char *name)
+{
+ int len = strlen(name);
+ char *ret = NULL;
+ int i;
+
+ for (i = argc-1; i > 0; i--) {
+ if (argv[i] && strncmp(argv[i], name, len) == 0 &&
+ (argv[i][len] != '\0')) {
+ ret = argv[i] + len;
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/usr/kinit/getintfile.c b/usr/kinit/getintfile.c
new file mode 100644
index 0000000..41ba475
--- /dev/null
+++ b/usr/kinit/getintfile.c
@@ -0,0 +1,30 @@
+/*
+ * Open a file and read it, assuming it contains a single long value.
+ * Return 0 if we read a valid value, otherwise -1.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "kinit.h"
+
+int getintfile(const char *path, long *val)
+{
+ char buffer[64];
+ char *ep;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f)
+ return -1;
+
+ ep = buffer + fread(buffer, 1, sizeof buffer - 1, f);
+ fclose(f);
+ *ep = '\0';
+
+ *val = strtol(buffer, &ep, 0);
+ if (*ep && *ep != '\n')
+ return -1;
+ else
+ return 0;
+}
diff --git a/usr/kinit/initrd.c b/usr/kinit/initrd.c
new file mode 100644
index 0000000..5833f2f
--- /dev/null
+++ b/usr/kinit/initrd.c
@@ -0,0 +1,204 @@
+/*
+ * Handle initrd, thus putting the backwards into backwards compatible
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include "do_mounts.h"
+#include "kinit.h"
+#include "xpio.h"
+
+#define BUF_SIZE 65536 /* Should be a power of 2 */
+
+/*
+ * Copy the initrd to /dev/ram0, copy from the end to the beginning
+ * to avoid taking 2x the memory.
+ */
+static int rd_copy_uncompressed(int ffd, int dfd)
+{
+ char buffer[BUF_SIZE];
+ off_t bytes;
+ struct stat st;
+
+ dprintf("kinit: uncompressed initrd\n");
+
+ if (ffd < 0 || fstat(ffd, &st) || !S_ISREG(st.st_mode) ||
+ (bytes = st.st_size) == 0)
+ return -1;
+
+ while (bytes) {
+ ssize_t blocksize = ((bytes - 1) & (BUF_SIZE - 1)) + 1;
+ off_t offset = bytes - blocksize;
+
+ dprintf("kinit: copying %zd bytes at offset %llu\n",
+ blocksize, offset);
+
+ if (xpread(ffd, buffer, blocksize, offset) != blocksize ||
+ xpwrite(dfd, buffer, blocksize, offset) != blocksize)
+ return -1;
+
+ ftruncate(ffd, offset); /* Free up memory */
+ bytes = offset;
+ }
+ return 0;
+}
+
+static int rd_copy_image(const char *path)
+{
+ int ffd = open(path, O_RDONLY);
+ int rv = -1;
+ unsigned char gzip_magic[2];
+
+ if (ffd < 0)
+ goto barf;
+
+ if (xpread(ffd, gzip_magic, 2, 0) == 2 &&
+ gzip_magic[0] == 037 && gzip_magic[1] == 0213) {
+ FILE *wfd = fopen("/dev/ram0", "w");
+ if (!wfd)
+ goto barf;
+ rv = load_ramdisk_compressed(path, wfd, 0);
+ fclose(wfd);
+ } else {
+ int dfd = open("/dev/ram0", O_WRONLY);
+ if (dfd < 0)
+ goto barf;
+ rv = rd_copy_uncompressed(ffd, dfd);
+ close(dfd);
+ }
+
+barf:
+ if (ffd >= 0)
+ close(ffd);
+ return rv;
+}
+
+/*
+ * Run /linuxrc, for emulation of old-style initrd
+ */
+static int run_linuxrc(int argc, char *argv[], dev_t root_dev)
+{
+ int root_fd, old_fd;
+ pid_t pid;
+ long realroot = Root_RAM0;
+ const char *ramdisk_name = "/dev/ram0";
+ FILE *fp;
+
+ dprintf("kinit: mounting initrd\n");
+ mkdir("/root", 0700);
+ if (!mount_block(ramdisk_name, "/root", NULL, MS_VERBOSE, NULL))
+ return -errno;
+
+ /* Write the current "real root device" out to procfs */
+ dprintf("kinit: real_root_dev = %#x\n", root_dev);
+ fp = fopen("/proc/sys/kernel/real-root-dev", "w");
+ fprintf(fp, "%u", root_dev);
+ fclose(fp);
+
+ mkdir("/old", 0700);
+ root_fd = open("/", O_RDONLY|O_DIRECTORY|O_CLOEXEC, 0);
+ old_fd = open("/old", O_RDONLY|O_DIRECTORY|O_CLOEXEC, 0);
+
+ if (root_fd < 0 || old_fd < 0)
+ return -errno;
+
+ if (chdir("/root") ||
+ mount(".", "/", NULL, MS_MOVE, NULL) || chroot("."))
+ return -errno;
+
+ pid = vfork();
+ if (pid == 0) {
+ setsid();
+ /* Looks like linuxrc doesn't get the init environment
+ or parameters. Weird, but so is the whole linuxrc bit. */
+ execl("/linuxrc", "linuxrc", NULL);
+ _exit(255);
+ } else if (pid > 0) {
+ dprintf("kinit: Waiting for linuxrc to complete...\n");
+ while (waitpid(pid, NULL, 0) != pid)
+ ;
+ dprintf("kinit: linuxrc done\n");
+ } else {
+ return -errno;
+ }
+
+ if (fchdir(old_fd) ||
+ mount("/", ".", NULL, MS_MOVE, NULL) ||
+ fchdir(root_fd) || chroot("."))
+ return -errno;
+
+ close(root_fd);
+ close(old_fd);
+
+ getintfile("/proc/sys/kernel/real-root-dev", &realroot);
+
+ /* If realroot is Root_RAM0, then the initrd did any necessary work */
+ if (realroot == Root_RAM0) {
+ if (mount("/old", "/root", NULL, MS_MOVE, NULL))
+ return -errno;
+ } else {
+ mount_root(argc, argv, (dev_t) realroot, NULL);
+
+ /* If /root/initrd exists, move the initrd there, otherwise discard */
+ if (!mount("/old", "/root/initrd", NULL, MS_MOVE, NULL)) {
+ /* We're good */
+ } else {
+ int olddev = open(ramdisk_name, O_RDWR);
+ umount2("/old", MNT_DETACH);
+ if (olddev < 0 ||
+ ioctl(olddev, BLKFLSBUF, 0) ||
+ close(olddev)) {
+ fprintf(stderr,
+ "%s: Cannot flush initrd contents\n",
+ progname);
+ }
+ }
+ }
+
+ rmdir("/old");
+ return 0;
+}
+
+int initrd_load(int argc, char *argv[], dev_t root_dev)
+{
+ if (access("/initrd.image", R_OK))
+ return 0; /* No initrd */
+
+ dprintf("kinit: initrd found\n");
+
+ create_dev("/dev/ram0", Root_RAM0);
+
+ if (rd_copy_image("/initrd.image") || unlink("/initrd.image")) {
+ fprintf(stderr, "%s: initrd installation failed (too big?)\n",
+ progname);
+ return 0; /* Failed to copy initrd */
+ }
+
+ dprintf("kinit: initrd copied\n");
+
+ if (root_dev == Root_MULTI) {
+ dprintf("kinit: skipping linuxrc: incompatible with multiple roots\n");
+ /* Mounting initrd as ordinary root */
+ return 0;
+ }
+
+ if (root_dev != Root_RAM0) {
+ int err;
+ dprintf("kinit: running linuxrc\n");
+ err = run_linuxrc(argc, argv, root_dev);
+ if (err)
+ fprintf(stderr, "%s: running linuxrc: %s\n", progname,
+ strerror(-err));
+ return 1; /* initrd is root, or run_linuxrc took care of it */
+ } else {
+ dprintf("kinit: permament (or pivoting) initrd, not running linuxrc\n");
+ return 0; /* Mounting initrd as ordinary root */
+ }
+}
diff --git a/usr/kinit/ipconfig/Kbuild b/usr/kinit/ipconfig/Kbuild
new file mode 100644
index 0000000..686b03b
--- /dev/null
+++ b/usr/kinit/ipconfig/Kbuild
@@ -0,0 +1,35 @@
+#
+# Kbuild file for ipconfig
+#
+
+static-y := static/ipconfig
+shared-y := shared/ipconfig
+
+# common .o files
+objs := main.o netdev.o packet.o
+# dhcp
+objs += dhcp_proto.o
+# bootp
+objs += bootp_proto.o
+
+
+# TODO - do we want a stripped version
+# TODO - do we want the static.g + shared.g directories?
+
+
+# Create built-in.o with all object files (used by kinit)
+lib-y := $(objs)
+
+# .o files used to built executables
+static/ipconfig-y := $(objs)
+shared/ipconfig-y := $(objs)
+
+# Cleaning
+clean-dirs := static shared
+
+# install binary
+ifdef KLIBCSHAREDFLAGS
+install-y := $(shared-y)
+else
+install-y := $(static-y)
+endif
diff --git a/usr/kinit/ipconfig/README.ipconfig b/usr/kinit/ipconfig/README.ipconfig
new file mode 100644
index 0000000..5ee87e5
--- /dev/null
+++ b/usr/kinit/ipconfig/README.ipconfig
@@ -0,0 +1,120 @@
+BOOTP/DHCP client for klibc
+---------------------------
+
+Usage:
+
+ipconfig [-c proto] [-d interface] [-i identifier]
+ [-n] [-p port] [-t timeout] [interface ...]
+
+-c proto Use PROTO as the configuration protocol for all
+ interfaces, unless overridden by specific interfaces.
+-d interface Either the name of an interface, or a long spec.
+-i identifier DHCP vendor class identifier. The default is
+ "Linux ipconfig".
+-n Do nothing - just print the configuration that would
+ be performed.
+-p port Send bootp/dhcp broadcasts from PORT, to PORT - 1.
+-t timeout Give up on all unconfigured interfaces after TIMEOUT secs.
+
+You can configure multiple interfaces by passing multiple interface
+specs on the command line, or by using the special interface name
+"all". If you're autoconfiguring any interfaces, ipconfig will wait
+until either all such interfaces have been configured, or the timeout
+passes.
+
+PROTO can be one of the following, which selects the autoconfiguration
+protocol to use:
+
+not specified use all protocols (the default)
+dhcp use bootp and dhcp
+bootp use bootp only
+rarp use rarp (not currently supported)
+none no autoconfiguration - either static config, or none at all
+
+An interface spec can be either short form, which is just the name of
+an interface (eth0 or whatever), or long form. The long form consists
+of two or more fields, separated by colons:
+
+<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
+ <dns0-ip>:<dns1-ip>:<ntp0-ip>:...
+
+ <client-ip> IP address of the client. If empty, the address will
+ either be determined by RARP/BOOTP/DHCP. What protocol
+ is used de- pends on the <autoconf> parameter. If this
+ parameter is not empty, autoconf will be used.
+
+ <server-ip> IP address of the NFS server. If RARP is used to
+ determine the client address and this parameter is NOT
+ empty only replies from the specified server are
+ accepted. To use different RARP and NFS server,
+ specify your RARP server here (or leave it blank), and
+ specify your NFS server in the `nfsroot' parameter
+ (see above). If this entry is blank the address of the
+ server is used which answered the RARP/BOOTP/DHCP
+ request.
+
+ <gw-ip> IP address of a gateway if the server is on a different
+ subnet. If this entry is empty no gateway is used and the
+ server is assumed to be on the local network, unless a
+ value has been received by BOOTP/DHCP.
+
+ <netmask> Netmask for local network interface. If this is empty,
+ the netmask is derived from the client IP address assuming
+ classful addressing, unless overridden in BOOTP/DHCP reply.
+
+ <hostname> Name of the client. If empty, the client IP address is
+ used in ASCII notation, or the value received by
+ BOOTP/DHCP.
+
+ <device> Name of network device to use. If this is empty, all
+ devices are used for RARP/BOOTP/DHCP requests, and the
+ first one we receive a reply on is configured. If you
+ have only one device, you can safely leave this blank.
+
+ <autoconf> Method to use for autoconfiguration. If this is either
+ 'rarp', 'bootp', or 'dhcp' the specified protocol is
+ used. If the value is 'both', 'all' or empty, all
+ protocols are used. 'off', 'static' or 'none' means
+ no autoconfiguration.
+
+ <dns0-ip> IP address of primary nameserver.
+
+ Default: None if not using autoconfiguration; determined
+ automatically if using autoconfiguration.
+
+ <dns1-ip> IP address of secondary nameserver.
+ See <dns0-ip>.
+
+ <ntp0-ip> IP address of a Network Time Protocol (NTP) server.
+ Currently ignored.
+
+ ... Additional fields will be ignored.
+
+IP addresses and netmasks must be either absent (defaulting to zero)
+or presented in dotted-quad notation.
+
+An interface spec can be prefixed with either "ip=", "nfsaddrs=", both
+of which are ignored. These (along with the ugliness of the long
+form) are present for compatibility with the in-kernel ipconfig code
+from 2.4 and earlier kernels.
+
+Here are a few examples of valid ipconfig command lines.
+
+Enable the loopback interface:
+ ipconfig 127.0.0.1:::::lo:none
+
+Try to configure eth0 using bootp for up to 30 seconds:
+ ipconfig -t 30 -c bootp eth0
+
+Configure eth0 and eth1 using dhcp or bootp, and eth2 statically:
+ ipconfig -c any eth0 eth1 192.168.1.1:::::eth2:none
+
+--
+
+From Russell's original README, and still true:
+
+The code in main.c is yucky imho. Needs cleaning.
+
+--
+Russell King (2002/10/22)
+Bryan O'Sullivan (2003/04/29)
diff --git a/usr/kinit/ipconfig/bootp_packet.h b/usr/kinit/ipconfig/bootp_packet.h
new file mode 100644
index 0000000..1d5bd0d
--- /dev/null
+++ b/usr/kinit/ipconfig/bootp_packet.h
@@ -0,0 +1,44 @@
+#ifndef BOOTP_PACKET_H
+#define BOOTP_PACKET_H
+
+#include <sys/uio.h>
+
+struct netdev;
+
+/* packet ops */
+#define BOOTP_REQUEST 1
+#define BOOTP_REPLY 2
+
+/* your basic bootp packet */
+struct bootp_hdr {
+ uint8_t op;
+ uint8_t htype;
+ uint8_t hlen;
+ uint8_t hops;
+ uint32_t xid;
+ uint16_t secs;
+ uint16_t flags;
+ uint32_t ciaddr;
+ uint32_t yiaddr;
+ uint32_t siaddr;
+ uint32_t giaddr;
+ uint8_t chaddr[16];
+ char server_name[64];
+ char boot_file[128];
+ /* 312 bytes of extensions */
+};
+
+/*
+ * memory size of BOOTP Vendor Extensions/DHCP Options for receiving
+ *
+ * generic_ether_mtu:1500, min_sizeof(ip_hdr):20, sizeof(udp_hdr):8
+ *
+ * #define BOOTP_EXTS_SIZE (1500 - 20 - 8 - sizeof(struct bootp_hdr))
+ */
+/* larger size for backward compatibility of ipconfig */
+#define BOOTP_EXTS_SIZE 1500
+
+/* minimum length of BOOTP/DHCP packet on sending */
+#define BOOTP_MIN_LEN 300
+
+#endif /* BOOTP_PACKET_H */
diff --git a/usr/kinit/ipconfig/bootp_proto.c b/usr/kinit/ipconfig/bootp_proto.c
new file mode 100644
index 0000000..f6f9dd4
--- /dev/null
+++ b/usr/kinit/ipconfig/bootp_proto.c
@@ -0,0 +1,565 @@
+/*
+ * BOOTP packet protocol handling.
+ */
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <netinet/in.h>
+
+#include "ipconfig.h"
+#include "netdev.h"
+#include "bootp_packet.h"
+#include "bootp_proto.h"
+#include "packet.h"
+
+static uint8_t bootp_options[312] = {
+ [ 0] = 99, 130, 83, 99,/* RFC1048 magic cookie */
+ [ 4] = 1, 4, /* 4- 9 subnet mask */
+ [ 10] = 3, 4, /* 10- 15 default gateway */
+ [ 16] = 5, 8, /* 16- 25 nameserver */
+ [ 26] = 12, 32, /* 26- 59 host name */
+ [ 60] = 40, 32, /* 60- 95 nis domain name */
+ [ 96] = 17, 40, /* 96-137 boot path */
+ [138] = 57, 2, 1, 150, /* 138-141 extension buffer */
+ [142] = 255, /* end of list */
+};
+
+/*
+ * Send a plain bootp request packet with options
+ */
+int bootp_send_request(struct netdev *dev)
+{
+ struct bootp_hdr bootp;
+ struct iovec iov[] = {
+ /* [0] = ip + udp headers */
+ [1] = {&bootp, sizeof(bootp)},
+ [2] = {bootp_options, 312}
+ };
+
+ memset(&bootp, 0, sizeof(struct bootp_hdr));
+
+ bootp.op = BOOTP_REQUEST, bootp.htype = dev->hwtype;
+ bootp.hlen = dev->hwlen;
+ bootp.xid = dev->bootp.xid;
+ bootp.ciaddr = dev->ip_addr;
+ bootp.secs = htons(time(NULL) - dev->open_time);
+ memcpy(bootp.chaddr, dev->hwaddr, 16);
+
+ dprintf("-> bootp xid 0x%08x secs 0x%08x ",
+ bootp.xid, ntohs(bootp.secs));
+
+ return packet_send(dev, iov, 2);
+}
+
+/*
+ * DESCRIPTION
+ * bootp_ext119_decode() decodes Domain Search Option data.
+ * The decoded string is separated with ' '.
+ * For example, it is either "foo.bar.baz. bar.baz.", "foo.bar.", or "foo.".
+ *
+ * ARGUMENTS
+ * const uint8_t *ext
+ * *ext is a pointer to a DHCP Domain Search Option data. *ext does not
+ * include a tag(code) octet and a length octet in DHCP options.
+ * For example, if *ext is {3, 'f', 'o', 'o', 0}, this function returns
+ * a pointer to a "foo." string.
+ *
+ * int16_t ext_size
+ * ext_size is the memory size of *ext. For example,
+ * if *ext is {3, 'f', 'o', 'o', 0}, ext_size must be 5.
+ *
+ * uint8_t *tmp
+ * *tmp is a pointer to a temporary memory space for decoding.
+ * The memory size must be equal to or more than ext_size.
+ * 'memset(tmp, 0, sizeof(tmp));' is not required, but values in *tmp
+ * are changed in decoding process.
+ *
+ * RETURN VALUE
+ * if OK, a pointer to a decoded string malloc-ed
+ * else , NULL
+ *
+ * SEE ALSO RFC3397
+ */
+static char *bootp_ext119_decode(const void *ext, int16_t ext_size, void *tmp)
+{
+ uint8_t *u8ext;
+ int_fast32_t i;
+ int_fast32_t decoded_size;
+ int_fast8_t currentdomain_is_singledot;
+
+ /* only for validating *ext */
+ uint8_t *is_pointee;
+ int_fast32_t is_pointee_size;
+
+ /* only for structing a decoded string */
+ char *decoded_str;
+ int_fast32_t dst_i;
+
+ if (ext == NULL || ext_size <= 0 || tmp == NULL)
+ return NULL;
+
+ u8ext = (uint8_t *)ext;
+ is_pointee = tmp;
+ memset(is_pointee, 0, (size_t)ext_size);
+ is_pointee_size = 0;
+
+ /*
+ * validate the format of *ext and
+ * calculate the memory size for a decoded string
+ */
+ i = 0;
+ decoded_size = 0;
+ currentdomain_is_singledot = 1;
+ while (1) {
+ if (i >= ext_size)
+ return NULL;
+
+ if (u8ext[i] == 0) {
+ /* Zero-ending */
+ if (currentdomain_is_singledot)
+ decoded_size++; /* for '.' */
+ decoded_size++; /* for ' ' or '\0' */
+ currentdomain_is_singledot = 1;
+ i++;
+ if (i == ext_size)
+ break;
+ is_pointee_size = i;
+ } else if (u8ext[i] < 0x40) {
+ /* Label(sub-domain string) */
+ int j;
+
+ /* loosely validate characters for domain names */
+ if (i + u8ext[i] >= ext_size)
+ return NULL;
+ for (j = i + 1; j <= i + u8ext[i]; j++)
+ if (!(u8ext[j] == '-' ||
+ ('0' <= u8ext[j] && u8ext[j] <= '9') ||
+ ('A' <= u8ext[j] && u8ext[j] <= 'Z') ||
+ ('a' <= u8ext[j] && u8ext[j] <= 'z')))
+ return NULL;
+
+ is_pointee[i] = 1;
+ decoded_size += u8ext[i] + 1; /* for Label + '.' */
+ currentdomain_is_singledot = 0;
+ i += u8ext[i] + 1;
+ } else if (u8ext[i] < 0xc0)
+ return NULL;
+
+ else {
+ /* Compression-pointer (to a prior Label) */
+ int_fast32_t p;
+
+ if (i + 1 >= ext_size)
+ return NULL;
+
+ p = ((0x3f & u8ext[i]) << 8) + u8ext[i + 1];
+ if (!(p < is_pointee_size && is_pointee[p]))
+ return NULL;
+
+ while (1) {
+ /* u8ext[p] was validated */
+ if (u8ext[p] == 0) {
+ /* Zero-ending */
+ decoded_size++;
+ break;
+ } else if (u8ext[p] < 0x40) {
+ /* Label(sub-domain string) */
+ decoded_size += u8ext[p] + 1;
+ p += u8ext[p] + 1;
+ } else {
+ /* Compression-pointer */
+ p = ((0x3f & u8ext[p]) << 8)
+ + u8ext[p + 1];
+ }
+ }
+
+ currentdomain_is_singledot = 1;
+ i += 2;
+ if (i == ext_size)
+ break;
+ is_pointee_size = i;
+ }
+ }
+
+
+ /*
+ * construct a decoded string
+ */
+ decoded_str = malloc(decoded_size);
+ if (decoded_str == NULL)
+ return NULL;
+
+ i = 0;
+ dst_i = 0;
+ currentdomain_is_singledot = 1;
+ while (1) {
+ if (u8ext[i] == 0) {
+ /* Zero-ending */
+ if (currentdomain_is_singledot) {
+ if (dst_i != 0)
+ dst_i++;
+ decoded_str[dst_i] = '.';
+ }
+ dst_i++;
+ decoded_str[dst_i] = ' ';
+
+ currentdomain_is_singledot = 1;
+ i++;
+ if (i == ext_size)
+ break;
+ } else if (u8ext[i] < 0x40) {
+ /* Label(sub-domain string) */
+ if (dst_i != 0)
+ dst_i++;
+ memcpy(&decoded_str[dst_i], &u8ext[i + 1],
+ (size_t)u8ext[i]);
+ dst_i += u8ext[i];
+ decoded_str[dst_i] = '.';
+
+ currentdomain_is_singledot = 0;
+ i += u8ext[i] + 1;
+ } else {
+ /* Compression-pointer (to a prior Label) */
+ int_fast32_t p;
+
+ p = ((0x3f & u8ext[i]) << 8) + u8ext[i + 1];
+ while (1) {
+ if (u8ext[p] == 0) {
+ /* Zero-ending */
+ decoded_str[dst_i++] = '.';
+ decoded_str[dst_i] = ' ';
+ break;
+ } else if (u8ext[p] < 0x40) {
+ /* Label(sub-domain string) */
+ dst_i++;
+ memcpy(&decoded_str[dst_i],
+ &u8ext[p + 1],
+ (size_t)u8ext[p]);
+ dst_i += u8ext[p];
+ decoded_str[dst_i] = '.';
+
+ p += u8ext[p] + 1;
+ } else {
+ /* Compression-pointer */
+ p = ((0x3f & u8ext[p]) << 8)
+ + u8ext[p + 1];
+ }
+ }
+
+ currentdomain_is_singledot = 1;
+ i += 2;
+ if (i == ext_size)
+ break;
+ }
+ }
+ decoded_str[dst_i] = '\0';
+#ifdef DEBUG
+ if (dst_i + 1 != decoded_size) {
+ dprintf("bug:%s():bottom: malloc(%ld), write(%ld)\n",
+ __func__, (long)decoded_size, (long)(dst_i + 1));
+ exit(1);
+ }
+#endif
+ return decoded_str;
+}
+
+/*
+ * DESCRIPTION
+ * bootp_ext121_decode() decodes Classless Route Option data.
+ *
+ * ARGUMENTS
+ * const uint8_t *ext
+ * *ext is a pointer to a DHCP Classless Route Option data.
+ * For example, if *ext is {16, 192, 168, 192, 168, 42, 1},
+ * this function returns a pointer to
+ * {
+ * subnet = 192.168.0.0;
+ * netmask_width = 16;
+ * gateway = 192.168.42.1;
+ * next = NULL;
+ * }
+ *
+ * int16_t ext_size
+ * ext_size is the memory size of *ext. For example,
+ * if *ext is {16, 192, 168, 192, 168, 42, 1}, ext_size must be 7.
+ *
+ * RETURN VALUE
+ * if OK, a pointer to a decoded struct route malloc-ed
+ * else , NULL
+ *
+ * SEE ALSO RFC3442
+ */
+struct route *bootp_ext121_decode(const uint8_t *ext, int16_t ext_size)
+{
+ int16_t index = 0;
+ uint8_t netmask_width;
+ uint8_t significant_octets;
+ struct route *routes = NULL;
+ struct route *prev_route = NULL;
+
+ while (index < ext_size) {
+ netmask_width = ext[index];
+ index++;
+ if (netmask_width > 32) {
+ printf("IP-Config: Given Classless Route Option subnet mask width '%u' "
+ "exceeds IPv4 limit of 32. Ignoring remaining option.\n",
+ netmask_width);
+ return routes;
+ }
+ significant_octets = netmask_width / 8 + (netmask_width % 8 > 0);
+ if (ext_size - index < significant_octets + 4) {
+ printf("IP-Config: Given Classless Route Option remaining lengths (%u octets) "
+ "is shorter than the expected %u octets. Ignoring remaining options.\n",
+ ext_size - index, significant_octets + 4);
+ return routes;
+ }
+
+ struct route *route = malloc(sizeof(struct route));
+ if (route == NULL)
+ return routes;
+
+ /* convert only significant octets from byte array into integer in network byte order */
+ route->subnet = 0;
+ memcpy(&route->subnet, &ext[index], significant_octets);
+ index += significant_octets;
+ /* RFC3442 demands: After deriving a subnet number and subnet mask from
+ each destination descriptor, the DHCP client MUST zero any bits in
+ the subnet number where the corresponding bit in the mask is zero. */
+ route->subnet &= netdev_genmask(netmask_width);
+
+ /* convert octet array into network byte order */
+ memcpy(&route->gateway, &ext[index], 4);
+ index += 4;
+
+ route->netmask_width = netmask_width;
+ route->next = NULL;
+
+ if (prev_route == NULL) {
+ routes = route;
+ } else {
+ prev_route->next = route;
+ }
+ prev_route = route;
+ }
+ return routes;
+}
+
+/*
+ * Parse a bootp reply packet
+ */
+int bootp_parse(struct netdev *dev, struct bootp_hdr *hdr,
+ uint8_t *exts, int extlen)
+{
+ uint8_t ext119_buf[BOOTP_EXTS_SIZE];
+ int16_t ext119_len = 0;
+ uint8_t ext121_buf[BOOTP_EXTS_SIZE];
+ int16_t ext121_len = 0;
+
+ dev->bootp.gateway = hdr->giaddr;
+ dev->ip_addr = hdr->yiaddr;
+ dev->ip_server = hdr->siaddr;
+ dev->ip_netmask = INADDR_ANY;
+ dev->ip_broadcast = INADDR_ANY;
+ dev->ip_gateway = hdr->giaddr;
+ dev->ip_nameserver[0] = INADDR_ANY;
+ dev->ip_nameserver[1] = INADDR_ANY;
+ dev->hostname[0] = '\0';
+ dev->nisdomainname[0] = '\0';
+ dev->bootpath[0] = '\0';
+ memcpy(&dev->filename, &hdr->boot_file, FNLEN);
+
+ if (extlen >= 4 && exts[0] == 99 && exts[1] == 130 &&
+ exts[2] == 83 && exts[3] == 99) {
+ uint8_t *ext;
+
+ for (ext = exts + 4; ext - exts < extlen;) {
+ int len;
+ uint8_t opt = *ext++;
+
+ if (opt == 0)
+ continue;
+ else if (opt == 255)
+ break;
+
+ if (ext - exts >= extlen)
+ break;
+ len = *ext++;
+
+ if (ext - exts + len > extlen)
+ break;
+ switch (opt) {
+ case 1: /* subnet mask */
+ if (len == 4)
+ memcpy(&dev->ip_netmask, ext, 4);
+ break;
+ case 3: /* default gateway */
+ if (len >= 4)
+ memcpy(&dev->ip_gateway, ext, 4);
+ break;
+ case 6: /* DNS server */
+ if (len >= 4)
+ memcpy(&dev->ip_nameserver, ext,
+ len >= 8 ? 8 : 4);
+ break;
+ case 12: /* host name */
+ if (len > sizeof(dev->hostname) - 1)
+ len = sizeof(dev->hostname) - 1;
+ memcpy(&dev->hostname, ext, len);
+ dev->hostname[len] = '\0';
+ break;
+ case 15: /* domain name */
+ if (len > sizeof(dev->dnsdomainname) - 1)
+ len = sizeof(dev->dnsdomainname) - 1;
+ memcpy(&dev->dnsdomainname, ext, len);
+ dev->dnsdomainname[len] = '\0';
+ break;
+ case 17: /* root path */
+ if (len > sizeof(dev->bootpath) - 1)
+ len = sizeof(dev->bootpath) - 1;
+ memcpy(&dev->bootpath, ext, len);
+ dev->bootpath[len] = '\0';
+ break;
+ case 26: /* interface MTU */
+ if (len == 2)
+ dev->mtu = (ext[0] << 8) + ext[1];
+ break;
+ case 28: /* broadcast addr */
+ if (len == 4)
+ memcpy(&dev->ip_broadcast, ext, 4);
+ break;
+ case 40: /* NIS domain name */
+ if (len > sizeof(dev->nisdomainname) - 1)
+ len = sizeof(dev->nisdomainname) - 1;
+ memcpy(&dev->nisdomainname, ext, len);
+ dev->nisdomainname[len] = '\0';
+ break;
+ case 54: /* server identifier */
+ if (len == 4 && !dev->ip_server)
+ memcpy(&dev->ip_server, ext, 4);
+ break;
+ case 119: /* Domain Search Option */
+ if (ext119_len >= 0 &&
+ ext119_len + len <= sizeof(ext119_buf)) {
+ memcpy(ext119_buf + ext119_len,
+ ext, len);
+ ext119_len += len;
+ } else
+ ext119_len = -1;
+
+ break;
+ case 121: /* Classless Static Route Option (RFC3442) */
+ if (ext121_len >= 0 &&
+ ext121_len + len <= sizeof(ext121_buf)) {
+ memcpy(ext121_buf + ext121_len,
+ ext, len);
+ ext121_len += len;
+ } else
+ ext121_len = -1;
+
+ break;
+ }
+
+ ext += len;
+ }
+ }
+ if (ext119_len > 0) {
+ char *ret;
+ uint8_t ext119_tmp[BOOTP_EXTS_SIZE];
+
+ ret = bootp_ext119_decode(ext119_buf, ext119_len, ext119_tmp);
+ if (ret != NULL) {
+ if (dev->domainsearch != NULL)
+ free(dev->domainsearch);
+ dev->domainsearch = ret;
+ }
+ }
+
+ if (ext121_len > 0) {
+ struct route *ret;
+
+ ret = bootp_ext121_decode(ext121_buf, ext121_len);
+ if (ret != NULL) {
+ struct route *cur = dev->routes;
+ struct route *next;
+ while (cur != NULL) {
+ next = cur->next;
+ free(cur);
+ cur = next;
+ }
+ dev->routes = ret;
+ }
+ }
+
+ /*
+ * Got packet.
+ */
+ return 1;
+}
+
+/*
+ * Receive a bootp reply and parse packet
+ * Returns:
+ *-1 = Error in packet_recv, try again later
+ * 0 = Unexpected packet, discarded
+ * 1 = Correctly received and parsed packet
+ */
+int bootp_recv_reply(struct netdev *dev)
+{
+ struct bootp_hdr bootp;
+ uint8_t bootp_options[BOOTP_EXTS_SIZE];
+ struct iovec iov[] = {
+ /* [0] = ip + udp headers */
+ [1] = {&bootp, sizeof(struct bootp_hdr)},
+ [2] = {bootp_options, sizeof(bootp_options)}
+ };
+ int ret;
+
+ ret = packet_recv(dev, iov, 3);
+ if (ret <= 0)
+ return ret;
+
+ if (ret < sizeof(struct bootp_hdr) ||
+ bootp.op != BOOTP_REPLY || /* RFC951 7.5 */
+ bootp.xid != dev->bootp.xid ||
+ memcmp(bootp.chaddr, dev->hwaddr, 16))
+ return 0;
+
+ ret -= sizeof(struct bootp_hdr);
+
+ return bootp_parse(dev, &bootp, bootp_options, ret);
+}
+
+/*
+ * Initialise interface for bootp.
+ */
+int bootp_init_if(struct netdev *dev)
+{
+ short flags;
+
+ /*
+ * Get the device flags
+ */
+ if (netdev_getflags(dev, &flags))
+ return -1;
+
+ /*
+ * We can't do DHCP nor BOOTP if this device
+ * doesn't support broadcast.
+ */
+ if (dev->mtu < 364 || (flags & IFF_BROADCAST) == 0) {
+ dev->caps &= ~(CAP_BOOTP | CAP_DHCP);
+ return 0;
+ }
+
+ /*
+ * Get a random XID
+ */
+ dev->bootp.xid = (uint32_t) lrand48();
+ dev->open_time = time(NULL);
+
+ return 0;
+}
diff --git a/usr/kinit/ipconfig/bootp_proto.h b/usr/kinit/ipconfig/bootp_proto.h
new file mode 100644
index 0000000..60873ce
--- /dev/null
+++ b/usr/kinit/ipconfig/bootp_proto.h
@@ -0,0 +1,10 @@
+#ifndef IPCONFIG_BOOTP_PROTO_H
+#define IPCONFIG_BOOTP_PROTO_H
+
+int bootp_send_request(struct netdev *dev);
+int bootp_recv_reply(struct netdev *dev);
+int bootp_parse(struct netdev *dev, struct bootp_hdr *hdr, uint8_t * exts,
+ int extlen);
+int bootp_init_if(struct netdev *dev);
+
+#endif /* IPCONFIG_BOOTP_PROTO_H */
diff --git a/usr/kinit/ipconfig/dhcp_proto.c b/usr/kinit/ipconfig/dhcp_proto.c
new file mode 100644
index 0000000..4e560b8
--- /dev/null
+++ b/usr/kinit/ipconfig/dhcp_proto.c
@@ -0,0 +1,301 @@
+/*
+ * DHCP RFC 2131 and 2132
+ */
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+#include "ipconfig.h"
+#include "netdev.h"
+#include "bootp_packet.h"
+#include "bootp_proto.h"
+#include "dhcp_proto.h"
+#include "packet.h"
+
+static uint8_t dhcp_params[] = {
+ 1, /* subnet mask */
+ 3, /* default gateway */
+ 6, /* DNS server */
+ 12, /* host name */
+ 15, /* domain name */
+ 17, /* root path */
+ 26, /* interface mtu */
+ 28, /* broadcast addr */
+ 40, /* NIS domain name (why?) */
+ 119, /* Domain Search Option */
+ 121, /* Classless Static Route Option (RFC3442) */
+};
+
+static uint8_t dhcp_discover_hdr[] = {
+ 99, 130, 83, 99, /* bootp cookie */
+ 53, 1, DHCPDISCOVER, /* dhcp message type */
+ 55, sizeof(dhcp_params), /* parameter list */
+};
+
+static uint8_t dhcp_request_hdr[] = {
+ 99, 130, 83, 99, /* boot cookie */
+ 53, 1, DHCPREQUEST, /* dhcp message type */
+#define SERVER_IP_OFF 9
+ 54, 4, 0, 0, 0, 0, /* server IP */
+#define REQ_IP_OFF 15
+ 50, 4, 0, 0, 0, 0, /* requested IP address */
+ 55, sizeof(dhcp_params), /* parameter list */
+};
+
+static uint8_t dhcp_end[] = {
+ 255,
+};
+
+/* Both iovecs below have to have the same structure, since dhcp_send()
+ pokes at the internals */
+#define DHCP_IOV_LEN 8
+
+static struct iovec dhcp_discover_iov[DHCP_IOV_LEN] = {
+ /* [0] = ip + udp header */
+ /* [1] = bootp header */
+ [2] = {dhcp_discover_hdr, sizeof(dhcp_discover_hdr)},
+ [3] = {dhcp_params, sizeof(dhcp_params)},
+ /* [4] = optional vendor class */
+ /* [5] = optional hostname */
+ /* [6] = {dhcp_end, sizeof(dhcp_end)} */
+ /* [7] = optional padding */
+};
+
+static struct iovec dhcp_request_iov[DHCP_IOV_LEN] = {
+ /* [0] = ip + udp header */
+ /* [1] = bootp header */
+ [2] = {dhcp_request_hdr, sizeof(dhcp_request_hdr)},
+ [3] = {dhcp_params, sizeof(dhcp_params)},
+ /* [4] = optional vendor class */
+ /* [5] = optional hostname */
+ /* [6] = {dhcp_end, sizeof(dhcp_end)} */
+ /* [7] = optional padding */
+};
+
+/*
+ * Parse a DHCP response packet
+ * Returns:
+ * 0 = Unexpected packet, not parsed
+ * 2 = DHCPOFFER (from dhcp_proto.h)
+ * 5 = DHCPACK
+ * 6 = DHCPNACK
+ */
+static int dhcp_parse(struct netdev *dev, struct bootp_hdr *hdr,
+ uint8_t *exts, int extlen)
+{
+ uint8_t type = 0;
+ uint32_t serverid = INADDR_NONE;
+ uint32_t leasetime = 0;
+ int ret = 0;
+
+ if (extlen >= 4 && exts[0] == 99 && exts[1] == 130 &&
+ exts[2] == 83 && exts[3] == 99) {
+ uint8_t *ext;
+
+ for (ext = exts + 4; ext - exts < extlen;) {
+ int len;
+ uint8_t opt = *ext++;
+
+ if (opt == 0)
+ continue;
+ else if (opt == 255)
+ break;
+
+ if (ext - exts >= extlen)
+ break;
+ len = *ext++;
+
+ if (ext - exts + len > extlen)
+ break;
+ switch (opt) {
+ case 51: /* IP Address Lease Time */
+ if (len == 4)
+ leasetime = ntohl(*(uint32_t *)ext);
+ break;
+ case 53: /* DHCP Message Type */
+ if (len == 1)
+ type = *ext;
+ break;
+ case 54: /* Server Identifier */
+ if (len == 4)
+ memcpy(&serverid, ext, 4);
+ break;
+ }
+ ext += len;
+ }
+ }
+
+ switch (type) {
+ case DHCPOFFER:
+ ret = bootp_parse(dev, hdr, exts, extlen) ? DHCPOFFER : 0;
+ if (ret == DHCPOFFER && serverid != INADDR_NONE)
+ dev->serverid = serverid;
+ dprintf("\n dhcp offer\n");
+ break;
+
+ case DHCPACK:
+ dev->dhcpleasetime = leasetime;
+ ret = bootp_parse(dev, hdr, exts, extlen) ? DHCPACK : 0;
+ dprintf("\n dhcp ack\n");
+ break;
+
+ case DHCPNAK:
+ ret = DHCPNAK;
+ dprintf("\n dhcp nak\n");
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Receive and parse a DHCP packet
+ * Returns:
+ *-1 = Error in packet_recv, try again later
+ * 0 = Unexpected packet, discarded
+ * 2 = DHCPOFFER (from dhcp_proto.h)
+ * 5 = DHCPACK
+ * 6 = DHCPNACK
+ */
+static int dhcp_recv(struct netdev *dev)
+{
+ struct bootp_hdr bootp;
+ uint8_t dhcp_options[BOOTP_EXTS_SIZE];
+ struct iovec iov[] = {
+ /* [0] = ip + udp header */
+ [1] = {&bootp, sizeof(struct bootp_hdr)},
+ [2] = {dhcp_options, sizeof(dhcp_options)}
+ };
+ int ret;
+
+ ret = packet_recv(dev, iov, 3);
+ if (ret <= 0)
+ return ret;
+
+ dprintf("\n dhcp xid %08x ", dev->bootp.xid);
+
+ if (ret < sizeof(struct bootp_hdr) || bootp.op != BOOTP_REPLY ||
+ /* RFC951 7.5 */ bootp.xid != dev->bootp.xid ||
+ memcmp(bootp.chaddr, dev->hwaddr, 16))
+ return 0;
+
+ ret -= sizeof(struct bootp_hdr);
+
+ return dhcp_parse(dev, &bootp, dhcp_options, ret);
+}
+
+static int dhcp_send(struct netdev *dev, struct iovec *vec)
+{
+ struct bootp_hdr bootp;
+ char dhcp_hostname[SYS_NMLN+2];
+ uint8_t padding[BOOTP_MIN_LEN - sizeof(struct bootp_hdr)];
+ int padding_len;
+ int i = 4;
+ int j;
+
+ memset(&bootp, 0, sizeof(struct bootp_hdr));
+
+ bootp.op = BOOTP_REQUEST;
+ bootp.htype = dev->hwtype;
+ bootp.hlen = dev->hwlen;
+ bootp.xid = dev->bootp.xid;
+ bootp.ciaddr = INADDR_ANY;
+ /* yiaddr should always be set to 0 for the messages we're likely
+ * to send as a DHCP client: DHCPDISCOVER, DHCPREQUEST, DHCPDECLINE,
+ * DHCPINFORM, DHCPRELEASE
+ * cf. RFC2131 section 4.1.1, table 5.
+ */
+ bootp.yiaddr = INADDR_ANY;
+ bootp.giaddr = INADDR_ANY;
+ bootp.flags = htons(0x8000);
+ bootp.secs = htons(time(NULL) - dev->open_time);
+ memcpy(bootp.chaddr, dev->hwaddr, 16);
+
+ vec[1].iov_base = &bootp;
+ vec[1].iov_len = sizeof(struct bootp_hdr);
+
+ dprintf("xid %08x secs %d ", bootp.xid, ntohs(bootp.secs));
+
+ if (vendor_class_identifier_len > 2) {
+ vec[i].iov_base = vendor_class_identifier;
+ vec[i].iov_len = vendor_class_identifier_len;
+ i++;
+
+ dprintf("vendor_class_identifier \"%.*s\" ",
+ vendor_class_identifier_len-2,
+ vendor_class_identifier+2);
+ }
+
+ if (dev->reqhostname[0] != '\0') {
+ int len = strlen(dev->reqhostname);
+ dhcp_hostname[0] = 12;
+ dhcp_hostname[1] = len;
+ memcpy(dhcp_hostname+2, dev->reqhostname, len);
+
+ vec[i].iov_base = dhcp_hostname;
+ vec[i].iov_len = len+2;
+ i++;
+
+ printf("hostname %.*s ", len, dhcp_hostname+2);
+ }
+
+ vec[i].iov_base = dhcp_end;
+ vec[i].iov_len = sizeof(dhcp_end);
+
+ /* Append padding if DHCP packet length is shorter than BOOTP_MIN_LEN */
+ padding_len = sizeof(padding);
+ for (j = 2; j <= i; j++)
+ padding_len -= vec[j].iov_len;
+ if (padding_len > 0) {
+ memset(padding, 0, padding_len);
+ i++;
+ vec[i].iov_base = padding;
+ vec[i].iov_len = padding_len;
+ }
+
+ return packet_send(dev, vec, i + 1);
+}
+
+/*
+ * Send a DHCP discover packet
+ */
+int dhcp_send_discover(struct netdev *dev)
+{
+ dev->ip_addr = INADDR_ANY;
+ dev->ip_gateway = INADDR_ANY;
+
+ dprintf("-> dhcp discover ");
+
+ return dhcp_send(dev, dhcp_discover_iov);
+}
+
+/*
+ * Receive a DHCP offer packet
+ */
+int dhcp_recv_offer(struct netdev *dev)
+{
+ return dhcp_recv(dev);
+}
+
+/*
+ * Send a DHCP request packet
+ */
+int dhcp_send_request(struct netdev *dev)
+{
+ memcpy(&dhcp_request_hdr[SERVER_IP_OFF], &dev->serverid, 4);
+ memcpy(&dhcp_request_hdr[REQ_IP_OFF], &dev->ip_addr, 4);
+
+ dprintf("-> dhcp request ");
+
+ return dhcp_send(dev, dhcp_request_iov);
+}
+
+/*
+ * Receive a DHCP ack packet
+ */
+int dhcp_recv_ack(struct netdev *dev)
+{
+ return dhcp_recv(dev);
+}
diff --git a/usr/kinit/ipconfig/dhcp_proto.h b/usr/kinit/ipconfig/dhcp_proto.h
new file mode 100644
index 0000000..0fba92f
--- /dev/null
+++ b/usr/kinit/ipconfig/dhcp_proto.h
@@ -0,0 +1,19 @@
+#ifndef IPCONFIG_DHCP_PROTO_H
+#define IPCONFIG_DHCP_PROTO_H
+
+/* DHCP message types */
+#define DHCPDISCOVER 1
+#define DHCPOFFER 2
+#define DHCPREQUEST 3
+#define DHCPDECLINE 4
+#define DHCPACK 5
+#define DHCPNAK 6
+#define DHCPRELEASE 7
+#define DHCPINFORM 8
+
+int dhcp_send_discover(struct netdev *dev);
+int dhcp_recv_offer(struct netdev *dev);
+int dhcp_send_request(struct netdev *dev);
+int dhcp_recv_ack(struct netdev *dev);
+
+#endif /* IPCONFIG_DHCP_PROTO_H */
diff --git a/usr/kinit/ipconfig/ipconfig.h b/usr/kinit/ipconfig/ipconfig.h
new file mode 100644
index 0000000..d1d7e42
--- /dev/null
+++ b/usr/kinit/ipconfig/ipconfig.h
@@ -0,0 +1,25 @@
+#ifndef IPCONFIG_IPCONFIG_H
+#define IPCONFIG_IPCONFIG_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#define LOCAL_PORT 68
+#define REMOTE_PORT (LOCAL_PORT - 1)
+
+extern uint16_t cfg_local_port;
+extern uint16_t cfg_remote_port;
+
+extern char vendor_class_identifier[];
+extern int vendor_class_identifier_len;
+
+int ipconfig_main(int argc, char *argv[]);
+uint32_t ipconfig_server_address(void *next);
+
+#ifdef DEBUG
+# define dprintf printf
+#else
+# define dprintf(...) ((void)0)
+#endif
+
+#endif /* IPCONFIG_IPCONFIG_H */
diff --git a/usr/kinit/ipconfig/main.c b/usr/kinit/ipconfig/main.c
new file mode 100644
index 0000000..64c5398
--- /dev/null
+++ b/usr/kinit/ipconfig/main.c
@@ -0,0 +1,924 @@
+#include <poll.h>
+#include <limits.h>
+#include <setjmp.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <time.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/sysinfo.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <unistd.h> /* for getopts */
+
+#include <net/if_arp.h>
+
+#include "ipconfig.h"
+#include "netdev.h"
+#include "bootp_packet.h"
+#include "bootp_proto.h"
+#include "dhcp_proto.h"
+#include "packet.h"
+
+static const char sysfs_class_net[] = "/sys/class/net";
+static const char *progname;
+static jmp_buf abort_buf;
+static char do_not_config;
+static unsigned int default_caps = CAP_DHCP | CAP_BOOTP | CAP_RARP;
+static int loop_timeout = -1;
+static int configured;
+static int bringup_first = 0;
+static int n_devices = 0;
+
+/* DHCP vendor class identifier */
+char vendor_class_identifier[260];
+int vendor_class_identifier_len;
+
+struct state {
+ int state;
+ int restart_state;
+ time_t expire;
+ int retry_period;
+
+ struct netdev *dev;
+ struct state *next;
+};
+
+/* #define PROTO_x : for uint8_t proto of struct netdev */
+struct protoinfo {
+ char *name;
+} protoinfos[] = {
+#define PROTO_NONE 0
+ {"none"},
+#define PROTO_BOOTP 1
+ {"bootp"},
+#define PROTO_DHCP 2
+ {"dhcp"},
+#define PROTO_RARP 3
+ {"rarp"}
+};
+
+static inline const char *my_inet_ntoa(uint32_t addr)
+{
+ struct in_addr a;
+
+ a.s_addr = addr;
+
+ return inet_ntoa(a);
+}
+
+static void print_device_config(struct netdev *dev)
+{
+ int dns0_spaces;
+ int dns1_spaces;
+ printf("IP-Config: %s complete", dev->name);
+ if (dev->proto == PROTO_BOOTP || dev->proto == PROTO_DHCP)
+ printf(" (%s from %s)", protoinfos[dev->proto].name,
+ my_inet_ntoa(dev->serverid ?
+ dev->serverid : dev->ip_server));
+
+ printf(":\n address: %-16s ", my_inet_ntoa(dev->ip_addr));
+ printf("broadcast: %-16s ", my_inet_ntoa(dev->ip_broadcast));
+ printf("netmask: %-16s\n", my_inet_ntoa(dev->ip_netmask));
+ if (dev->routes != NULL) {
+ struct route *cur;
+ char *delim = "";
+ printf(" routes :");
+ for (cur = dev->routes; cur != NULL; cur = cur->next) {
+ printf("%s %s/%u", delim, my_inet_ntoa(cur->subnet), cur->netmask_width);
+ if (cur->gateway != 0) {
+ printf(" via %s", my_inet_ntoa(cur->gateway));
+ }
+ delim = ",";
+ }
+ printf("\n");
+ dns0_spaces = 3;
+ dns1_spaces = 5;
+ } else {
+ printf(" gateway: %-16s", my_inet_ntoa(dev->ip_gateway));
+ dns0_spaces = 5;
+ dns1_spaces = 3;
+ }
+ printf(" dns0%*c: %-16s", dns0_spaces, ' ', my_inet_ntoa(dev->ip_nameserver[0]));
+ printf(" dns1%*c: %-16s\n", dns1_spaces, ' ', my_inet_ntoa(dev->ip_nameserver[1]));
+ if (dev->hostname[0])
+ printf(" host : %-64s\n", dev->hostname);
+ if (dev->dnsdomainname[0])
+ printf(" domain : %-64s\n", dev->dnsdomainname);
+ if (dev->nisdomainname[0])
+ printf(" nisdomain: %-64s\n", dev->nisdomainname);
+ printf(" rootserver: %s ", my_inet_ntoa(dev->ip_server));
+ printf("rootpath: %s\n", dev->bootpath);
+ printf(" filename : %s\n", dev->filename);
+}
+
+static void configure_device(struct netdev *dev)
+{
+ if (do_not_config)
+ return;
+
+ if (netdev_setmtu(dev))
+ printf("IP-Config: failed to set MTU on %s to %u\n",
+ dev->name, dev->mtu);
+
+ if (netdev_setaddress(dev))
+ printf("IP-Config: failed to set addresses on %s\n",
+ dev->name);
+ if (netdev_setroutes(dev))
+ printf("IP-Config: failed to set routes on %s\n",
+ dev->name);
+ if (dev->hostname[0] &&
+ sethostname(dev->hostname, strlen(dev->hostname)))
+ printf("IP-Config: failed to set hostname '%s' from %s\n",
+ dev->hostname, dev->name);
+}
+
+/*
+ * Escape shell varialbes in git style:
+ * Always start with a single quote ('), then leave all characters
+ * except ' and ! unchanged.
+ */
+static void write_option(FILE *f, const char *name, const char *chr)
+{
+
+ fprintf(f, "%s='", name);
+ while (*chr) {
+ switch (*chr) {
+ case '!':
+ case '\'':
+ fprintf(f, "'\\%c'", *chr);
+ break;
+ default:
+ fprintf(f, "%c", *chr);
+ break;
+ }
+ ++chr;
+ }
+ fprintf(f, "'\n");
+}
+
+static void dump_device_config(struct netdev *dev)
+{
+ char fn[40];
+ FILE *f;
+ /*
+ * char UINT64_MAX[] = "18446744073709551615";
+ * sizeof(UINT64_MAX)==21
+ */
+ char buf21[21];
+ const char path[] = "/run/";
+
+ snprintf(fn, sizeof(fn), "%snet-%s.conf", path, dev->name);
+ f = fopen(fn, "w");
+ if (f) {
+ write_option(f, "DEVICE", dev->name);
+ write_option(f, "PROTO", protoinfos[dev->proto].name);
+ write_option(f, "IPV4ADDR",
+ my_inet_ntoa(dev->ip_addr));
+ write_option(f, "IPV4BROADCAST",
+ my_inet_ntoa(dev->ip_broadcast));
+ write_option(f, "IPV4NETMASK",
+ my_inet_ntoa(dev->ip_netmask));
+ if (dev->routes != NULL) {
+ /* Use 6 digits to encode the index */
+ char key[23];
+ char value[19];
+ int i = 0;
+ struct route *cur;
+ for (cur = dev->routes; cur != NULL; cur = cur->next) {
+ snprintf(key, sizeof(key), "IPV4ROUTE%iSUBNET", i);
+ snprintf(value, sizeof(value), "%s/%u", my_inet_ntoa(cur->subnet), cur->netmask_width);
+ write_option(f, key, value);
+ snprintf(key, sizeof(key), "IPV4ROUTE%iGATEWAY", i);
+ write_option(f, key, my_inet_ntoa(cur->gateway));
+ i++;
+ }
+ } else {
+ write_option(f, "IPV4GATEWAY",
+ my_inet_ntoa(dev->ip_gateway));
+ }
+ write_option(f, "IPV4DNS0",
+ my_inet_ntoa(dev->ip_nameserver[0]));
+ write_option(f, "IPV4DNS1",
+ my_inet_ntoa(dev->ip_nameserver[1]));
+ write_option(f, "HOSTNAME", dev->hostname);
+ write_option(f, "DNSDOMAIN", dev->dnsdomainname);
+ write_option(f, "NISDOMAIN", dev->nisdomainname);
+ write_option(f, "ROOTSERVER",
+ my_inet_ntoa(dev->ip_server));
+ write_option(f, "ROOTPATH", dev->bootpath);
+ write_option(f, "filename", dev->filename);
+ sprintf(buf21, "%ld", (long)dev->uptime);
+ write_option(f, "UPTIME", buf21);
+ sprintf(buf21, "%u", (unsigned int)dev->dhcpleasetime);
+ write_option(f, "DHCPLEASETIME", buf21);
+ write_option(f, "DOMAINSEARCH", dev->domainsearch == NULL ?
+ "" : dev->domainsearch);
+ fclose(f);
+ }
+}
+
+static uint32_t inet_class_netmask(uint32_t ip)
+{
+ ip = ntohl(ip);
+ if (IN_CLASSA(ip))
+ return htonl(IN_CLASSA_NET);
+ if (IN_CLASSB(ip))
+ return htonl(IN_CLASSB_NET);
+ if (IN_CLASSC(ip))
+ return htonl(IN_CLASSC_NET);
+ return INADDR_ANY;
+}
+
+static void postprocess_device(struct netdev *dev)
+{
+ if (dev->ip_netmask == INADDR_ANY) {
+ dev->ip_netmask = inet_class_netmask(dev->ip_addr);
+ printf("IP-Config: %s guessed netmask %s\n",
+ dev->name, my_inet_ntoa(dev->ip_netmask));
+ }
+ if (dev->ip_broadcast == INADDR_ANY) {
+ dev->ip_broadcast =
+ (dev->ip_addr & dev->ip_netmask) | ~dev->ip_netmask;
+ printf("IP-Config: %s guessed broadcast address %s\n",
+ dev->name, my_inet_ntoa(dev->ip_broadcast));
+ }
+}
+
+static void complete_device(struct netdev *dev)
+{
+ struct sysinfo info;
+
+ if (!sysinfo(&info))
+ dev->uptime = info.uptime;
+ postprocess_device(dev);
+ configure_device(dev);
+ dump_device_config(dev);
+ print_device_config(dev);
+ packet_close(dev);
+
+ ++configured;
+
+ dev->next = ifaces;
+ ifaces = dev;
+}
+
+/*
+ * Returns:
+ * 0 = Not handled, try again later
+ * 1 = Handled
+ */
+static int process_receive_event(struct state *s, time_t now)
+{
+ int handled = 1;
+
+ switch (s->state) {
+ case DEVST_ERROR:
+ return 0; /* Not handled */
+ case DEVST_COMPLETE:
+ return 0; /* Not handled as already configured */
+
+ case DEVST_BOOTP:
+ s->restart_state = DEVST_BOOTP;
+ switch (bootp_recv_reply(s->dev)) {
+ case -1:
+ s->state = DEVST_ERROR;
+ break;
+ case 0:
+ handled = 0;
+ break;
+ case 1:
+ s->state = DEVST_COMPLETE;
+ s->dev->proto = PROTO_BOOTP;
+ dprintf("\n bootp reply\n");
+ break;
+ }
+ break;
+
+ case DEVST_DHCPDISC:
+ s->restart_state = DEVST_DHCPDISC;
+ switch (dhcp_recv_offer(s->dev)) {
+ case -1:
+ s->state = DEVST_ERROR;
+ break;
+ case 0:
+ handled = 0;
+ break;
+ case DHCPOFFER: /* Offer received */
+ s->state = DEVST_DHCPREQ;
+ dhcp_send_request(s->dev);
+ break;
+ }
+ break;
+
+ case DEVST_DHCPREQ:
+ s->restart_state = DEVST_DHCPDISC;
+ switch (dhcp_recv_ack(s->dev)) {
+ case -1: /* error */
+ s->state = DEVST_ERROR;
+ break;
+ case 0:
+ handled = 0;
+ break;
+ case DHCPACK: /* ACK received */
+ s->state = DEVST_COMPLETE;
+ s->dev->proto = PROTO_DHCP;
+ break;
+ case DHCPNAK: /* NAK received */
+ s->state = DEVST_DHCPDISC;
+ break;
+ }
+ break;
+
+ default:
+ dprintf("\n");
+ handled = 0;
+ break;
+ }
+
+ switch (s->state) {
+ case DEVST_COMPLETE:
+ complete_device(s->dev);
+ break;
+
+ case DEVST_ERROR:
+ /* error occurred, try again in 10 seconds */
+ s->expire = now + 10;
+ break;
+ }
+
+ return handled;
+}
+
+static void process_timeout_event(struct state *s, time_t now)
+{
+ int ret = 0;
+
+ /*
+ * If we had an error, restore a sane state to
+ * restart from.
+ */
+ if (s->state == DEVST_ERROR)
+ s->state = s->restart_state;
+
+ /*
+ * Now send a packet depending on our state.
+ */
+ switch (s->state) {
+ case DEVST_BOOTP:
+ ret = bootp_send_request(s->dev);
+ s->restart_state = DEVST_BOOTP;
+ break;
+
+ case DEVST_DHCPDISC:
+ ret = dhcp_send_discover(s->dev);
+ s->restart_state = DEVST_DHCPDISC;
+ break;
+
+ case DEVST_DHCPREQ:
+ ret = dhcp_send_request(s->dev);
+ s->restart_state = DEVST_DHCPDISC;
+ break;
+ }
+
+ if (ret == -1) {
+ s->state = DEVST_ERROR;
+ s->expire = now + 1;
+ } else {
+ s->expire = now + s->retry_period;
+
+ s->retry_period *= 2;
+ if (s->retry_period > 60)
+ s->retry_period = 60;
+ }
+}
+
+static void process_error_event(struct state *s, time_t now)
+{
+ s->state = DEVST_ERROR;
+ s->expire = now + 1;
+}
+
+static struct state *slist;
+struct netdev *ifaces;
+
+/*
+ * Returns:
+ * 0 = No dhcp/bootp packet was received
+ * 1 = A packet was received and handled
+ */
+static int do_pkt_recv(int nr, struct pollfd *fds, time_t now)
+{
+ int i, ret = 0;
+ struct state *s;
+
+ for (i = 0, s = slist; s && nr; s = s->next) {
+ if (s->dev->pkt_fd != fds[i].fd)
+ continue;
+ if (fds[i].revents) {
+ if (fds[i].revents & POLLRDNORM)
+ ret |= process_receive_event(s, now);
+ else
+ process_error_event(s, now);
+ nr--;
+ }
+ i++;
+ }
+ return ret;
+}
+
+static int loop(void)
+{
+ struct pollfd *fds;
+ struct state *s;
+ int i, nr = 0, rc = 0;
+ struct timeval now, prev;
+ time_t start;
+
+ fds = malloc(sizeof(struct pollfd) * n_devices);
+ if (!fds) {
+ fprintf(stderr, "malloc failed\n");
+ rc = -1;
+ goto bail;
+ }
+
+ memset(fds, 0, sizeof(*fds));
+
+ gettimeofday(&now, NULL);
+ start = now.tv_sec;
+ while (1) {
+ int timeout = 60;
+ int pending = 0;
+ int done = 0;
+ int timeout_ms;
+ int x;
+
+ for (i = 0, s = slist; s; s = s->next) {
+ dprintf("%s: state = %d\n", s->dev->name, s->state);
+
+ if (s->state == DEVST_COMPLETE) {
+ done++;
+ continue;
+ }
+
+ pending++;
+
+ if (s->expire - now.tv_sec <= 0) {
+ dprintf("timeout\n");
+ process_timeout_event(s, now.tv_sec);
+ }
+
+ if (s->state != DEVST_ERROR) {
+ fds[i].fd = s->dev->pkt_fd;
+ fds[i].events = POLLRDNORM;
+ i++;
+ }
+
+ if (timeout > s->expire - now.tv_sec)
+ timeout = s->expire - now.tv_sec;
+ }
+
+ if (pending == 0 || (bringup_first && done))
+ break;
+
+ timeout_ms = timeout * 1000;
+
+ for (x = 0; x < 2; x++) {
+ int delta_ms;
+
+ if (timeout_ms <= 0)
+ timeout_ms = 100;
+
+ nr = poll(fds, i, timeout_ms);
+ prev = now;
+ gettimeofday(&now, NULL);
+
+ if ((nr > 0) && do_pkt_recv(nr, fds, now.tv_sec))
+ break;
+
+ if (loop_timeout >= 0 &&
+ now.tv_sec - start >= loop_timeout) {
+ printf("IP-Config: no response after %d "
+ "secs - giving up\n", loop_timeout);
+ rc = -1;
+ goto bail;
+ }
+
+ delta_ms = (now.tv_sec - prev.tv_sec) * 1000;
+ delta_ms += (now.tv_usec - prev.tv_usec) / 1000;
+
+ dprintf("Delta: %d ms\n", delta_ms);
+
+ timeout_ms -= delta_ms;
+ }
+ }
+bail:
+ if (fds)
+ free(fds);
+ return rc;
+}
+
+static int add_one_dev(struct netdev *dev)
+{
+ struct state *state;
+
+ state = malloc(sizeof(struct state));
+ if (!state)
+ return -1;
+
+ state->dev = dev;
+ state->expire = time(NULL);
+ state->retry_period = 1;
+
+ /*
+ * Select the state that we start from.
+ */
+ if (dev->caps & CAP_DHCP && dev->ip_addr == INADDR_ANY)
+ state->restart_state = state->state = DEVST_DHCPDISC;
+ else if (dev->caps & CAP_DHCP)
+ state->restart_state = state->state = DEVST_DHCPREQ;
+ else if (dev->caps & CAP_BOOTP)
+ state->restart_state = state->state = DEVST_BOOTP;
+
+ state->next = slist;
+ slist = state;
+
+ n_devices++;
+
+ return 0;
+}
+
+static void parse_addr(uint32_t *addr, const char *ip)
+{
+ struct in_addr in;
+ if (inet_aton(ip, &in) == 0) {
+ fprintf(stderr, "%s: can't parse IP address '%s'\n",
+ progname, ip);
+ longjmp(abort_buf, 1);
+ }
+ *addr = in.s_addr;
+}
+
+static unsigned int parse_proto(const char *ip)
+{
+ unsigned int caps = 0;
+
+ if (*ip == '\0' || strcmp(ip, "on") == 0 || strcmp(ip, "any") == 0)
+ caps = CAP_BOOTP | CAP_DHCP | CAP_RARP;
+ else if (strcmp(ip, "both") == 0)
+ caps = CAP_BOOTP | CAP_RARP;
+ else if (strcmp(ip, "dhcp") == 0)
+ caps = CAP_BOOTP | CAP_DHCP;
+ else if (strcmp(ip, "bootp") == 0)
+ caps = CAP_BOOTP;
+ else if (strcmp(ip, "rarp") == 0)
+ caps = CAP_RARP;
+ else if (strcmp(ip, "none") == 0 || strcmp(ip, "static") == 0
+ || strcmp(ip, "off") == 0)
+ goto bail;
+ else {
+ fprintf(stderr, "%s: invalid protocol '%s'\n", progname, ip);
+ longjmp(abort_buf, 1);
+ }
+bail:
+ return caps;
+}
+
+static int add_all_devices(struct netdev *template);
+
+static int parse_device(struct netdev *dev, char *ip)
+{
+ char *cp;
+ int opt;
+ int is_ip = 0;
+
+ dprintf("IP-Config: parse_device: \"%s\"\n", ip);
+
+ if (strncmp(ip, "ip=", 3) == 0) {
+ ip += 3;
+ is_ip = 1;
+ } else if (strncmp(ip, "nfsaddrs=", 9) == 0) {
+ ip += 9;
+ is_ip = 1; /* Not sure about this...? */
+ }
+
+ if (!strchr(ip, ':')) {
+ /* Only one option, e.g. "ip=dhcp", or an interface name */
+ if (is_ip) {
+ dev->caps = parse_proto(ip);
+ bringup_first = 1;
+ } else {
+ dev->name = ip;
+ }
+ } else {
+ for (opt = 0; ip && *ip; ip = cp, opt++) {
+ if ((cp = strchr(ip, ':'))) {
+ *cp++ = '\0';
+ }
+ if (*ip == '\0')
+ continue;
+ dprintf("IP-Config: opt #%d: '%s'\n", opt, ip);
+ switch (opt) {
+ case 0:
+ parse_addr(&dev->ip_addr, ip);
+ dev->caps = 0;
+ break;
+ case 1:
+ parse_addr(&dev->ip_server, ip);
+ break;
+ case 2:
+ parse_addr(&dev->ip_gateway, ip);
+ break;
+ case 3:
+ parse_addr(&dev->ip_netmask, ip);
+ break;
+ case 4:
+ strncpy(dev->hostname, ip, SYS_NMLN - 1);
+ dev->hostname[SYS_NMLN - 1] = '\0';
+ memcpy(dev->reqhostname, dev->hostname,
+ SYS_NMLN);
+ break;
+ case 5:
+ dev->name = ip;
+ break;
+ case 6:
+ dev->caps = parse_proto(ip);
+ break;
+ case 7:
+ parse_addr(&dev->ip_nameserver[0], ip);
+ break;
+ case 8:
+ parse_addr(&dev->ip_nameserver[1], ip);
+ break;
+ case 9:
+ /* NTP server - ignore */
+ break;
+ }
+ }
+ }
+
+ if (dev->name == NULL ||
+ dev->name[0] == '\0' || strcmp(dev->name, "all") == 0) {
+ add_all_devices(dev);
+ bringup_first = 1;
+ return 0;
+ }
+ return 1;
+}
+
+static void bringup_device(struct netdev *dev)
+{
+ if (netdev_up(dev) == 0) {
+ if (dev->caps)
+ add_one_dev(dev);
+ else {
+ dev->proto = PROTO_NONE;
+ complete_device(dev);
+ }
+ }
+}
+
+static void bringup_one_dev(struct netdev *template, struct netdev *dev)
+{
+ if (template->ip_addr != INADDR_NONE)
+ dev->ip_addr = template->ip_addr;
+ if (template->ip_server != INADDR_NONE)
+ dev->ip_server = template->ip_server;
+ if (template->ip_gateway != INADDR_NONE)
+ dev->ip_gateway = template->ip_gateway;
+ if (template->ip_netmask != INADDR_NONE)
+ dev->ip_netmask = template->ip_netmask;
+ if (template->ip_nameserver[0] != INADDR_NONE)
+ dev->ip_nameserver[0] = template->ip_nameserver[0];
+ if (template->ip_nameserver[1] != INADDR_NONE)
+ dev->ip_nameserver[1] = template->ip_nameserver[1];
+ if (template->hostname[0] != '\0')
+ strcpy(dev->hostname, template->hostname);
+ if (template->reqhostname[0] != '\0')
+ strcpy(dev->reqhostname, template->reqhostname);
+ dev->caps &= template->caps;
+
+ bringup_device(dev);
+}
+
+static struct netdev *add_device(char *info)
+{
+ struct netdev *dev;
+ int i;
+
+ dev = malloc(sizeof(struct netdev));
+ if (dev == NULL) {
+ fprintf(stderr, "%s: out of memory\n", progname);
+ longjmp(abort_buf, 1);
+ }
+
+ memset(dev, 0, sizeof(struct netdev));
+ dev->caps = default_caps;
+
+ if (parse_device(dev, info) == 0)
+ goto bail;
+
+ if (netdev_init_if(dev) == -1)
+ goto bail;
+
+ if (bootp_init_if(dev) == -1)
+ goto bail;
+
+ if (packet_open(dev) == -1)
+ goto bail;
+
+ printf("IP-Config: %s hardware address", dev->name);
+ for (i = 0; i < dev->hwlen; i++)
+ printf("%c%02x", i == 0 ? ' ' : ':', dev->hwaddr[i]);
+ printf(" mtu %d%s%s\n", dev->mtu,
+ dev->caps & CAP_DHCP ? " DHCP" :
+ dev->caps & CAP_BOOTP ? " BOOTP" : "",
+ dev->caps & CAP_RARP ? " RARP" : "");
+ return dev;
+bail:
+ free(dev);
+ return NULL;
+}
+
+static int add_all_devices(struct netdev *template)
+{
+ DIR *d;
+ struct dirent *de;
+ struct netdev *dev;
+ char t[PATH_MAX], p[255];
+ int i, fd;
+ unsigned long flags;
+
+ d = opendir(sysfs_class_net);
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ /* This excludes devices beginning with dots or "dummy",
+ as well as . or .. */
+ if (de->d_name[0] == '.' || !strcmp(de->d_name, ".."))
+ continue;
+ i = snprintf(t, PATH_MAX - 1, "%s/%s/flags", sysfs_class_net,
+ de->d_name);
+ if (i < 0 || i >= PATH_MAX - 1)
+ continue;
+ t[i] = '\0';
+ fd = open(t, O_RDONLY);
+ if (fd < 0) {
+ perror(t);
+ continue;
+ }
+ i = read(fd, &p, sizeof(p) - 1);
+ close(fd);
+ if (i < 0) {
+ perror(t);
+ continue;
+ }
+ p[i] = '\0';
+ flags = strtoul(p, NULL, 0);
+ /* Heuristic for if this is a reasonable boot interface.
+ This is the same
+ logic the in-kernel ipconfig uses... */
+ if (!(flags & IFF_LOOPBACK) &&
+ (flags & (IFF_BROADCAST | IFF_POINTOPOINT))) {
+ dprintf("Trying to bring up %s\n", de->d_name);
+
+ dev = add_device(de->d_name);
+ if (!dev)
+ continue;
+ bringup_one_dev(template, dev);
+ }
+ }
+ closedir(d);
+ return 1;
+}
+
+static int check_autoconfig(void)
+{
+ int ndev = 0, nauto = 0;
+ struct state *s;
+
+ for (s = slist; s; s = s->next) {
+ ndev++;
+ if (s->dev->caps)
+ nauto++;
+ }
+
+ if (ndev == 0) {
+ if (configured == 0) {
+ fprintf(stderr, "%s: no devices to configure\n",
+ progname);
+ longjmp(abort_buf, 1);
+ }
+ }
+
+ return nauto;
+}
+
+static void set_vendor_identifier(const char *id)
+{
+ int len = strlen(id);
+ if (len >= 255) {
+ fprintf(stderr,
+ "%s: invalid vendor class identifier: "
+ "%s\n", progname, id);
+ longjmp(abort_buf, 1);
+ }
+ memcpy(vendor_class_identifier+2, id, len);
+ vendor_class_identifier[0] = 60;
+ vendor_class_identifier[1] = len;
+ vendor_class_identifier_len = len+2;
+}
+
+int main(int argc, char *argv[])
+ __attribute__ ((weak, alias("ipconfig_main")));
+
+int ipconfig_main(int argc, char *argv[])
+{
+ struct netdev *dev;
+ int c, port;
+ int err = 0;
+
+ /* If progname is set we're invoked from another program */
+ if (!progname) {
+ struct timeval now;
+ progname = argv[0];
+ gettimeofday(&now, NULL);
+ srand48(now.tv_usec ^ (now.tv_sec << 24));
+ }
+
+ if ((err = setjmp(abort_buf)))
+ return err;
+
+ /* Default vendor identifier */
+ set_vendor_identifier("Linux ipconfig");
+
+ do {
+ c = getopt(argc, argv, "c:d:i:onp:t:");
+ if (c == EOF)
+ break;
+
+ switch (c) {
+ case 'c':
+ default_caps = parse_proto(optarg);
+ break;
+ case 'p':
+ port = atoi(optarg);
+ if (port <= 0 || port > USHRT_MAX) {
+ fprintf(stderr,
+ "%s: invalid port number %d\n",
+ progname, port);
+ longjmp(abort_buf, 1);
+ }
+ cfg_local_port = port;
+ cfg_remote_port = cfg_local_port - 1;
+ break;
+ case 't':
+ loop_timeout = atoi(optarg);
+ if (loop_timeout < 0) {
+ fprintf(stderr,
+ "%s: invalid timeout %d\n",
+ progname, loop_timeout);
+ longjmp(abort_buf, 1);
+ }
+ break;
+ case 'i':
+ set_vendor_identifier(optarg);
+ break;
+ case 'o':
+ bringup_first = 1;
+ break;
+ case 'n':
+ do_not_config = 1;
+ break;
+ case 'd':
+ dev = add_device(optarg);
+ if (dev)
+ bringup_device(dev);
+ break;
+ case '?':
+ fprintf(stderr, "%s: invalid option -%c\n",
+ progname, optopt);
+ longjmp(abort_buf, 1);
+ }
+ } while (1);
+
+ for (c = optind; c < argc; c++) {
+ dev = add_device(argv[c]);
+ if (dev)
+ bringup_device(dev);
+ }
+
+ if (check_autoconfig()) {
+ if (cfg_local_port != LOCAL_PORT) {
+ printf("IP-Config: binding source port to %d, "
+ "dest to %d\n",
+ cfg_local_port, cfg_remote_port);
+ }
+ err = loop();
+ }
+
+ return err;
+}
diff --git a/usr/kinit/ipconfig/netdev.c b/usr/kinit/ipconfig/netdev.c
new file mode 100644
index 0000000..de87f96
--- /dev/null
+++ b/usr/kinit/ipconfig/netdev.c
@@ -0,0 +1,279 @@
+/*
+ * ioctl-based device configuration
+ */
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <netinet/in.h>
+#include <linux/route.h>
+#include <linux/sockios.h>
+
+#include "netdev.h"
+
+static int cfd = -1;
+
+static void copy_name(struct netdev *dev, struct ifreq *ifr)
+{
+ strncpy(ifr->ifr_name, dev->name, sizeof(ifr->ifr_name));
+ ifr->ifr_name[sizeof(ifr->ifr_name) - 1] = '\0';
+}
+
+int netdev_getflags(struct netdev *dev, short *flags)
+{
+ struct ifreq ifr;
+
+ copy_name(dev, &ifr);
+
+ if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) {
+ perror("SIOCGIFFLAGS");
+ return -1;
+ }
+
+ *flags = ifr.ifr_flags;
+ return 0;
+}
+
+static int netdev_sif_addr(struct ifreq *ifr, int cmd, uint32_t addr)
+{
+ struct sockaddr_in sin;
+
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = addr;
+
+ memcpy(&ifr->ifr_addr, &sin, sizeof sin);
+
+ return ioctl(cfd, cmd, ifr);
+}
+
+int netdev_setaddress(struct netdev *dev)
+{
+ struct ifreq ifr;
+
+ copy_name(dev, &ifr);
+
+ if (dev->ip_addr != INADDR_ANY &&
+ netdev_sif_addr(&ifr, SIOCSIFADDR, dev->ip_addr) == -1) {
+ perror("SIOCSIFADDR");
+ return -1;
+ }
+
+ if (dev->ip_broadcast != INADDR_ANY &&
+ netdev_sif_addr(&ifr, SIOCSIFBRDADDR, dev->ip_broadcast) == -1) {
+ perror("SIOCSIFBRDADDR");
+ return -1;
+ }
+
+ if (dev->ip_netmask != INADDR_ANY &&
+ netdev_sif_addr(&ifr, SIOCSIFNETMASK, dev->ip_netmask) == -1) {
+ perror("SIOCSIFNETMASK");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void set_s_addr(struct sockaddr *saddr, uint32_t ipaddr)
+{
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = ipaddr,
+ };
+ memcpy(saddr, &sin, sizeof sin);
+}
+
+int netdev_setroutes(struct netdev *dev)
+{
+ struct rtentry r;
+
+ /* RFC3442 demands:
+ If the DHCP server returns both a Classless Static Routes option and
+ a Router option, the DHCP client MUST ignore the Router option. */
+ if (dev->routes != NULL) {
+ struct route *cur;
+ for (cur = dev->routes; cur != NULL; cur = cur->next) {
+ memset(&r, 0, sizeof(r));
+
+ r.rt_dev = dev->name;
+ set_s_addr(&r.rt_dst, cur->subnet);
+ set_s_addr(&r.rt_gateway, cur->gateway);
+ set_s_addr(&r.rt_genmask, netdev_genmask(cur->netmask_width));
+ r.rt_flags = RTF_UP;
+ if (cur->gateway != 0) {
+ r.rt_flags |= RTF_GATEWAY;
+ }
+
+ if (ioctl(cfd, SIOCADDRT, &r) == -1 && errno != EEXIST) {
+ perror("SIOCADDRT");
+ return -1;
+ }
+ }
+ } else if (dev->ip_gateway != INADDR_ANY) {
+ memset(&r, 0, sizeof(r));
+
+ set_s_addr(&r.rt_dst, INADDR_ANY);
+ set_s_addr(&r.rt_gateway, dev->ip_gateway);
+ set_s_addr(&r.rt_genmask, INADDR_ANY);
+ r.rt_flags = RTF_UP | RTF_GATEWAY;
+
+ if (ioctl(cfd, SIOCADDRT, &r) == -1 && errno != EEXIST) {
+ perror("SIOCADDRT");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int netdev_setmtu(struct netdev *dev)
+{
+ struct ifreq ifr;
+
+ copy_name(dev, &ifr);
+ ifr.ifr_mtu = dev->mtu;
+
+ return ioctl(cfd, SIOCSIFMTU, &ifr);
+}
+
+static int netdev_gif_addr(struct ifreq *ifr, int cmd, uint32_t * ptr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
+
+ if (ioctl(cfd, cmd, ifr) == -1)
+ return -1;
+
+ *ptr = sin->sin_addr.s_addr;
+
+ return 0;
+}
+
+int netdev_up(struct netdev *dev)
+{
+ struct ifreq ifr;
+
+ copy_name(dev, &ifr);
+
+ if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) {
+ perror("SIOCGIFFLAGS");
+ return -1;
+ }
+
+ ifr.ifr_flags |= IFF_UP;
+
+ if (ioctl(cfd, SIOCSIFFLAGS, &ifr) == -1) {
+ perror("SIOCSIFFLAGS");
+ return -1;
+ }
+ return 0;
+}
+
+int netdev_down(struct netdev *dev)
+{
+ struct ifreq ifr;
+
+ copy_name(dev, &ifr);
+
+ if (ioctl(cfd, SIOCGIFFLAGS, &ifr) == -1) {
+ perror("SIOCGIFFLAGS");
+ return -1;
+ }
+
+ ifr.ifr_flags &= ~IFF_UP;
+
+ if (ioctl(cfd, SIOCSIFFLAGS, &ifr) == -1) {
+ perror("SIOCSIFFLAGS");
+ return -1;
+ }
+ return 0;
+}
+
+int netdev_init_if(struct netdev *dev)
+{
+ struct ifreq ifr;
+
+ if (cfd == -1)
+ cfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (cfd == -1) {
+ fprintf(stderr, "ipconfig: %s: socket(AF_INET): %s\n",
+ dev->name, strerror(errno));
+ return -1;
+ }
+
+ copy_name(dev, &ifr);
+
+ if (ioctl(cfd, SIOCGIFINDEX, &ifr) == -1) {
+ fprintf(stderr, "ipconfig: %s: SIOCGIFINDEX: %s\n",
+ dev->name, strerror(errno));
+ return -1;
+ }
+
+ dev->ifindex = ifr.ifr_ifindex;
+
+ if (ioctl(cfd, SIOCGIFMTU, &ifr) == -1) {
+ fprintf(stderr, "ipconfig: %s: SIOCGIFMTU: %s\n",
+ dev->name, strerror(errno));
+ return -1;
+ }
+
+ dev->mtu = ifr.ifr_mtu;
+
+ if (ioctl(cfd, SIOCGIFHWADDR, &ifr) == -1) {
+ fprintf(stderr, "ipconfig: %s: SIOCGIFHWADDR: %s\n",
+ dev->name, strerror(errno));
+ return -1;
+ }
+
+ dev->hwtype = ifr.ifr_hwaddr.sa_family;
+ dev->hwlen = 0;
+
+ switch (dev->hwtype) {
+ case ARPHRD_ETHER:
+ dev->hwlen = 6;
+ break;
+ case ARPHRD_EUI64:
+ dev->hwlen = 8;
+ break;
+ case ARPHRD_LOOPBACK:
+ dev->hwlen = 0;
+ break;
+ default:
+ return -1;
+ }
+
+ memcpy(dev->hwaddr, ifr.ifr_hwaddr.sa_data, dev->hwlen);
+ memset(dev->hwbrd, 0xff, dev->hwlen);
+
+ /*
+ * Try to get the current interface information.
+ */
+ if (dev->ip_addr == INADDR_NONE &&
+ netdev_gif_addr(&ifr, SIOCGIFADDR, &dev->ip_addr) == -1) {
+ fprintf(stderr, "ipconfig: %s: SIOCGIFADDR: %s\n",
+ dev->name, strerror(errno));
+ dev->ip_addr = 0;
+ dev->ip_broadcast = 0;
+ dev->ip_netmask = 0;
+ return 0;
+ }
+
+ if (dev->ip_broadcast == INADDR_NONE &&
+ netdev_gif_addr(&ifr, SIOCGIFBRDADDR, &dev->ip_broadcast) == -1) {
+ fprintf(stderr, "ipconfig: %s: SIOCGIFBRDADDR: %s\n",
+ dev->name, strerror(errno));
+ dev->ip_broadcast = 0;
+ }
+
+ if (dev->ip_netmask == INADDR_NONE &&
+ netdev_gif_addr(&ifr, SIOCGIFNETMASK, &dev->ip_netmask) == -1) {
+ fprintf(stderr, "ipconfig: %s: SIOCGIFNETMASK: %s\n",
+ dev->name, strerror(errno));
+ dev->ip_netmask = 0;
+ }
+
+ return 0;
+}
diff --git a/usr/kinit/ipconfig/netdev.h b/usr/kinit/ipconfig/netdev.h
new file mode 100644
index 0000000..dbc80cd
--- /dev/null
+++ b/usr/kinit/ipconfig/netdev.h
@@ -0,0 +1,107 @@
+#ifndef IPCONFIG_NETDEV_H
+#define IPCONFIG_NETDEV_H
+
+#include <arpa/inet.h>
+#include <sys/utsname.h>
+#include <net/if.h>
+
+#define BPLEN 256
+#define FNLEN 128 /* from DHCP RFC 2131 */
+
+struct route {
+ uint32_t subnet; /* subnet */
+ uint32_t netmask_width; /* subnet mask width */
+ uint32_t gateway; /* gateway */
+ struct route *next;
+};
+
+struct netdev {
+ char *name; /* Device name */
+ unsigned int ifindex; /* interface index */
+ unsigned int hwtype; /* ARPHRD_xxx */
+ unsigned int hwlen; /* HW address length */
+ uint8_t hwaddr[16]; /* HW address */
+ uint8_t hwbrd[16]; /* Broadcast HW address */
+ unsigned int mtu; /* Device mtu */
+ unsigned int caps; /* Capabilities */
+ time_t open_time;
+
+ struct { /* BOOTP/DHCP info */
+ int fd;
+ uint32_t xid;
+ uint32_t gateway; /* BOOTP/DHCP gateway */
+ } bootp;
+
+ struct { /* RARP information */
+ int fd;
+ } rarp;
+
+ uint8_t proto; /* a protocol used (e.g. PROTO_DHCP) */
+ uint32_t ip_addr; /* my address */
+ uint32_t ip_broadcast; /* broadcast address */
+ uint32_t ip_server; /* server address */
+ uint32_t ip_netmask; /* my subnet mask */
+ uint32_t ip_gateway; /* my gateway */
+ uint32_t ip_nameserver[2]; /* two nameservers */
+ uint32_t serverid; /* dhcp serverid */
+ uint32_t dhcpleasetime; /* duration in seconds */
+ char reqhostname[SYS_NMLN]; /* requested hostname */
+ char hostname[SYS_NMLN]; /* hostname */
+ char dnsdomainname[SYS_NMLN]; /* dns domain name */
+ char nisdomainname[SYS_NMLN]; /* nis domain name */
+ char bootpath[BPLEN]; /* boot path */
+ char filename[FNLEN]; /* filename */
+ char *domainsearch; /* decoded, NULL or malloc-ed */
+ struct route *routes; /* decoded, NULL or malloc-ed list */
+ long uptime; /* when complete configuration */
+ int pkt_fd; /* packet socket for this interface */
+ struct netdev *next; /* next configured i/f */
+};
+
+extern struct netdev *ifaces;
+
+/*
+ * Device capabilities
+ */
+#define CAP_BOOTP (1<<0)
+#define CAP_DHCP (1<<1)
+#define CAP_RARP (1<<2)
+
+/*
+ * Device states
+ */
+#define DEVST_UP 0
+#define DEVST_BOOTP 1
+#define DEVST_DHCPDISC 2
+#define DEVST_DHCPREQ 3
+#define DEVST_COMPLETE 4
+#define DEVST_ERROR 5
+
+int netdev_getflags(struct netdev *dev, short *flags);
+int netdev_setaddress(struct netdev *dev);
+int netdev_setroutes(struct netdev *dev);
+int netdev_up(struct netdev *dev);
+int netdev_down(struct netdev *dev);
+int netdev_init_if(struct netdev *dev);
+int netdev_setmtu(struct netdev *dev);
+
+static inline int netdev_running(struct netdev *dev)
+{
+ short flags;
+ int ret = netdev_getflags(dev, &flags);
+
+ return ret ? 0 : !!(flags & IFF_RUNNING);
+}
+
+static inline uint32_t netdev_genmask(uint32_t netmask_width)
+{
+ /* Map netmask width to network mask in network byte order.
+ Example: 24 -> "255.255.255.0" -> htonl(0xFFFFFF00) */
+ if (netmask_width == 0) {
+ return 0;
+ } else {
+ return htonl(~((1u << (32 - netmask_width)) - 1));
+ }
+}
+
+#endif /* IPCONFIG_NETDEV_H */
diff --git a/usr/kinit/ipconfig/packet.c b/usr/kinit/ipconfig/packet.c
new file mode 100644
index 0000000..2e1487d
--- /dev/null
+++ b/usr/kinit/ipconfig/packet.c
@@ -0,0 +1,278 @@
+#include <errno.h>/*XXX*/
+/*
+ * Packet socket handling glue.
+ */
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <net/if_packet.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netpacket/packet.h>
+#include <asm/byteorder.h>
+#include <arpa/inet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+#include "ipconfig.h"
+#include "netdev.h"
+#include "packet.h"
+
+uint16_t cfg_local_port = LOCAL_PORT;
+uint16_t cfg_remote_port = REMOTE_PORT;
+
+int packet_open(struct netdev *dev)
+{
+ struct sockaddr_ll sll;
+ int fd, rv, one = 1;
+
+ /*
+ * Get a PACKET socket for IP traffic.
+ */
+ fd = socket(AF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
+ if (fd == -1) {
+ perror("socket");
+ return -1;
+ }
+
+ /*
+ * We want to broadcast
+ */
+ if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &one,
+ sizeof(one)) == -1) {
+ perror("SO_BROADCAST");
+ close(fd);
+ return -1;
+ }
+
+ memset(&sll, 0, sizeof(sll));
+ sll.sll_family = AF_PACKET;
+ sll.sll_ifindex = dev->ifindex;
+
+ rv = bind(fd, (struct sockaddr *)&sll, sizeof(sll));
+ if (-1 == rv) {
+ perror("bind");
+ close(fd);
+ return -1;
+ }
+
+ dev->pkt_fd = fd;
+ return fd;
+}
+
+void packet_close(struct netdev *dev)
+{
+ close(dev->pkt_fd);
+ dev->pkt_fd = -1;
+}
+
+static unsigned int ip_checksum(uint16_t *hdr, int len)
+{
+ unsigned int chksum = 0;
+
+ while (len) {
+ chksum += *hdr++;
+ chksum += *hdr++;
+ len--;
+ }
+ chksum = (chksum & 0xffff) + (chksum >> 16);
+ chksum = (chksum & 0xffff) + (chksum >> 16);
+ return (~chksum) & 0xffff;
+}
+
+struct header {
+ struct iphdr ip;
+ struct udphdr udp;
+} __attribute__ ((packed, aligned(4)));
+
+static struct header ipudp_hdrs = {
+ .ip = {
+ .ihl = 5,
+ .version = IPVERSION,
+ .frag_off = __constant_htons(IP_DF),
+ .ttl = 64,
+ .protocol = IPPROTO_UDP,
+ .saddr = INADDR_ANY,
+ .daddr = INADDR_BROADCAST,
+ },
+ .udp = {
+ .source = __constant_htons(LOCAL_PORT),
+ .dest = __constant_htons(REMOTE_PORT),
+ .len = 0,
+ .check = 0,
+ },
+};
+
+#ifdef DEBUG /* Only used with dprintf() */
+static char *ntoa(uint32_t addr)
+{
+ struct in_addr in = { addr };
+ return inet_ntoa(in);
+}
+#endif /* DEBUG */
+
+/*
+ * Send a packet. The options are listed in iov[1...iov_len-1].
+ * iov[0] is reserved for the bootp packet header.
+ */
+int packet_send(struct netdev *dev, struct iovec *iov, int iov_len)
+{
+ struct sockaddr_ll sll;
+ struct msghdr msg;
+ int i, len = 0;
+
+ memset(&sll, 0, sizeof(sll));
+ msg.msg_name = &sll;
+ msg.msg_namelen = sizeof(sll);
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ if (cfg_local_port != LOCAL_PORT) {
+ ipudp_hdrs.udp.source = htons(cfg_local_port);
+ ipudp_hdrs.udp.dest = htons(cfg_remote_port);
+ }
+
+ dprintf("\n udp src %d dst %d", ntohs(ipudp_hdrs.udp.source),
+ ntohs(ipudp_hdrs.udp.dest));
+
+ dprintf("\n ip src %s ", ntoa(ipudp_hdrs.ip.saddr));
+ dprintf("dst %s ", ntoa(ipudp_hdrs.ip.daddr));
+
+ /*
+ * Glue in the ip+udp header iovec
+ */
+ iov[0].iov_base = &ipudp_hdrs;
+ iov[0].iov_len = sizeof(struct header);
+
+ for (i = 0; i < iov_len; i++)
+ len += iov[i].iov_len;
+
+ sll.sll_family = AF_PACKET;
+ sll.sll_protocol = htons(ETH_P_IP);
+ sll.sll_ifindex = dev->ifindex;
+ sll.sll_hatype = dev->hwtype;
+ sll.sll_pkttype = PACKET_BROADCAST;
+ sll.sll_halen = dev->hwlen;
+ memcpy(sll.sll_addr, dev->hwbrd, dev->hwlen);
+
+ ipudp_hdrs.ip.tot_len = htons(len);
+ ipudp_hdrs.ip.check = 0;
+ ipudp_hdrs.ip.check = ip_checksum((uint16_t *) &ipudp_hdrs.ip,
+ ipudp_hdrs.ip.ihl);
+
+ ipudp_hdrs.udp.len = htons(len - sizeof(struct iphdr));
+
+ dprintf("\n bytes %d\n", len);
+
+ return sendmsg(dev->pkt_fd, &msg, 0);
+}
+
+void packet_discard(struct netdev *dev)
+{
+ struct iphdr iph;
+ struct sockaddr_ll sll;
+ socklen_t sllen = sizeof(sll);
+
+ sll.sll_ifindex = dev->ifindex;
+
+ recvfrom(dev->pkt_fd, &iph, sizeof(iph), 0,
+ (struct sockaddr *)&sll, &sllen);
+}
+
+/*
+ * Receive a bootp packet. The options are listed in iov[1...iov_len].
+ * iov[0] must point to the bootp packet header.
+ * Returns:
+ * -1 = Error, try again later
+* 0 = Discarded packet (non-DHCP/BOOTP traffic)
+ * >0 = Size of packet
+ */
+int packet_recv(struct netdev *dev, struct iovec *iov, int iov_len)
+{
+ struct iphdr *ip, iph;
+ struct udphdr *udp;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = iov,
+ .msg_iovlen = iov_len,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ int ret, iphl;
+ struct sockaddr_ll sll;
+ socklen_t sllen = sizeof(sll);
+
+ sll.sll_ifindex = dev->ifindex;
+ msg.msg_name = &sll;
+ msg.msg_namelen = sllen;
+
+ ret = recvfrom(dev->pkt_fd, &iph, sizeof(struct iphdr),
+ MSG_PEEK, (struct sockaddr *)&sll, &sllen);
+ if (ret == -1)
+ return -1;
+
+ if (iph.ihl < 5 || iph.version != IPVERSION)
+ goto discard_pkt;
+
+ iphl = iph.ihl * 4;
+
+ ip = malloc(iphl + sizeof(struct udphdr));
+ if (!ip)
+ goto discard_pkt;
+
+ udp = (struct udphdr *)((char *)ip + iphl);
+
+ iov[0].iov_base = ip;
+ iov[0].iov_len = iphl + sizeof(struct udphdr);
+
+ ret = recvmsg(dev->pkt_fd, &msg, 0);
+ if (ret == -1)
+ goto free_pkt;
+
+ dprintf("<- bytes %d ", ret);
+
+ if (ip_checksum((uint16_t *) ip, ip->ihl) != 0)
+ goto free_pkt;
+
+ dprintf("\n ip src %s ", ntoa(ip->saddr));
+ dprintf("dst %s ", ntoa(ip->daddr));
+
+ if (ntohs(ip->tot_len) > ret || ip->protocol != IPPROTO_UDP)
+ goto free_pkt;
+
+ ret -= 4 * ip->ihl;
+
+ dprintf("\n udp src %d dst %d ", ntohs(udp->source),
+ ntohs(udp->dest));
+
+ if (udp->source != htons(cfg_remote_port) ||
+ udp->dest != htons(cfg_local_port))
+ goto free_pkt;
+
+ if (ntohs(udp->len) > ret)
+ goto free_pkt;
+
+ ret -= sizeof(struct udphdr);
+
+ free(ip);
+
+ return ret;
+
+free_pkt:
+ dprintf("freed\n");
+ free(ip);
+ return 0;
+
+discard_pkt:
+ dprintf("discarded\n");
+ packet_discard(dev);
+ return 0;
+}
diff --git a/usr/kinit/ipconfig/packet.h b/usr/kinit/ipconfig/packet.h
new file mode 100644
index 0000000..4367efe
--- /dev/null
+++ b/usr/kinit/ipconfig/packet.h
@@ -0,0 +1,12 @@
+#ifndef IPCONFIG_PACKET_H
+#define IPCONFIG_PACKET_H
+
+struct iovec;
+
+int packet_open(struct netdev *dev);
+void packet_close(struct netdev *dev);
+int packet_send(struct netdev *dev, struct iovec *iov, int iov_len);
+void packet_discard(struct netdev *dev);
+int packet_recv(struct netdev *dev, struct iovec *iov, int iov_len);
+
+#endif /* IPCONFIG_PACKET_H */
diff --git a/usr/kinit/kinit.c b/usr/kinit/kinit.c
new file mode 100644
index 0000000..28d2953
--- /dev/null
+++ b/usr/kinit/kinit.c
@@ -0,0 +1,331 @@
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <alloca.h>
+#include <limits.h>
+#include <ctype.h>
+#include <termios.h>
+
+#include "kinit.h"
+#include "ipconfig.h"
+#include "run-init.h"
+#include "resume.h"
+
+const char *progname = "kinit";
+int mnt_procfs;
+int mnt_sysfs;
+
+#ifdef DEBUG
+void dump_args(int argc, char *argv[])
+{
+ int i;
+
+ printf(" argc == %d\n", argc);
+
+ for (i = 0; i < argc; i++)
+ printf(" argv[%d]: \"%s\"\n", i, argv[i]);
+
+ if (argv[argc] != NULL)
+ printf(" argv[%d]: \"%s\" (SHOULD BE NULL)\n",
+ argc, argv[argc]);
+}
+#endif /* DEBUG */
+
+
+static int do_ipconfig(int argc, char *argv[])
+{
+ int i, a = 0;
+ char **args = alloca((argc + 3) * sizeof(char *));
+
+ if (!args)
+ return -1;
+
+ args[a++] = (char *)"IP-Config";
+ args[a++] = (char *)"-i";
+ args[a++] = (char *)"Linux kinit";
+
+ dprintf("Running ipconfig\n");
+
+ for (i = 1; i < argc; i++) {
+ if (strncmp(argv[i], "ip=", 3) == 0 ||
+ strncmp(argv[i], "nfsaddrs=", 9) == 0) {
+ args[a++] = argv[i];
+ }
+ }
+
+ if (a > 1) {
+ args[a] = NULL;
+ dump_args(a, args);
+ return ipconfig_main(a, args);
+ }
+
+ return 0;
+}
+
+static int split_cmdline(int cmdcmax, char *cmdv[], char *argv0,
+ char *cmdlines[], char *args[])
+{
+ int was_space;
+ char c, *p;
+ int vmax = cmdcmax;
+ int v = 1;
+ int space;
+
+ if (cmdv)
+ cmdv[0] = argv0;
+
+ /* First, add the parsable command lines */
+
+ while (*cmdlines) {
+ p = *cmdlines++;
+ was_space = 1;
+ while (v < vmax) {
+ c = *p;
+ space = isspace(c);
+ if ((space || !c) && !was_space) {
+ if (cmdv)
+ *p = '\0';
+ v++;
+ } else if (was_space) {
+ if (cmdv)
+ cmdv[v] = p;
+ }
+
+ if (!c)
+ break;
+
+ was_space = space;
+ p++;
+ }
+ }
+
+ /* Second, add the explicit command line arguments */
+
+ while (*args && v < vmax) {
+ if (cmdv)
+ cmdv[v] = *args;
+ v++;
+ args++;
+ }
+
+ if (cmdv)
+ cmdv[v] = NULL;
+
+ return v;
+}
+
+static int mount_sys_fs(const char *check, const char *fsname,
+ const char *fstype)
+{
+ struct stat st;
+
+ if (stat(check, &st) == 0)
+ return 0;
+
+ mkdir(fsname, 0555);
+
+ if (mount("none", fsname, fstype, 0, NULL) == -1) {
+ fprintf(stderr, "%s: could not mount %s as %s\n",
+ progname, fsname, fstype);
+ return -1;
+ }
+
+ return 1;
+}
+
+static void check_path(const char *path)
+{
+ struct stat st;
+
+ if (stat(path, &st) == -1) {
+ if (errno != ENOENT) {
+ perror("stat");
+ exit(1);
+ }
+ if (mkdir(path, 0755) == -1) {
+ perror("mkdir");
+ exit(1);
+ }
+ } else if (!S_ISDIR(st.st_mode)) {
+ fprintf(stderr, "%s: '%s' not a directory\n", progname, path);
+ exit(1);
+ }
+}
+
+static const char *find_init(const char *root, const char *user)
+{
+ const char *init_paths[] = {
+ "/sbin/init", "/bin/init", "/etc/init", "/bin/sh", NULL
+ };
+ const char **p;
+ const char *path;
+
+ if (chdir(root)) {
+ perror("chdir");
+ exit(1);
+ }
+
+ if (user)
+ dprintf("Checking for init: %s\n", user);
+
+ if (user && user[0] == '/' && !access(user+1, X_OK)) {
+ path = user;
+ } else {
+ for (p = init_paths; *p; p++) {
+ dprintf("Checking for init: %s\n", *p);
+ if (!access(*p+1, X_OK))
+ break;
+ }
+ path = *p;
+ }
+ chdir("/");
+ return path;
+}
+
+/* This is the argc and argv we pass to init */
+const char *init_path;
+int init_argc;
+char **init_argv;
+
+extern ssize_t readfile(const char *, char **);
+
+int main(int argc, char *argv[])
+{
+ char **cmdv, **args;
+ char *cmdlines[3];
+ int i;
+ const char *errmsg;
+ int ret = 0;
+ int cmdc;
+ int fd;
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+ srand48(now.tv_usec ^ (now.tv_sec << 24));
+
+ /* Default parameters for anything init-like we execute */
+ init_argc = argc;
+ init_argv = alloca((argc+1)*sizeof(char *));
+ memcpy(init_argv, argv, (argc+1)*sizeof(char *));
+
+ if ((fd = open("/dev/console", O_RDWR)) != -1) {
+ dup2(fd, STDIN_FILENO);
+ dup2(fd, STDOUT_FILENO);
+ dup2(fd, STDERR_FILENO);
+
+ if (fd > STDERR_FILENO)
+ close(fd);
+ }
+
+ mnt_procfs = mount_sys_fs("/proc/cmdline", "/proc", "proc") >= 0;
+ if (!mnt_procfs) {
+ ret = 1;
+ goto bail;
+ }
+
+ mnt_sysfs = mount_sys_fs("/sys/bus", "/sys", "sysfs") >= 0;
+ if (!mnt_sysfs) {
+ ret = 1;
+ goto bail;
+ }
+
+ /* Construct the effective kernel command line. The
+ effective kernel command line consists of /arch.cmd, if
+ it exists, /proc/cmdline, plus any arguments after an --
+ argument on the proper command line, in that order. */
+
+ ret = readfile("/arch.cmd", &cmdlines[0]);
+ if (ret < 0)
+ cmdlines[0] = "";
+
+ ret = readfile("/proc/cmdline", &cmdlines[1]);
+ if (ret < 0) {
+ fprintf(stderr, "%s: cannot read /proc/cmdline\n", progname);
+ ret = 1;
+ goto bail;
+ }
+
+ cmdlines[2] = NULL;
+
+ /* Find an -- argument, and if so append to the command line */
+ for (i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "--")) {
+ i++;
+ break;
+ }
+ }
+ args = &argv[i]; /* Points either to first argument past -- or
+ to the final NULL */
+
+ /* Count the number of arguments */
+ cmdc = split_cmdline(INT_MAX, NULL, argv[0], cmdlines, args);
+
+ /* Actually generate the cmdline array */
+ cmdv = (char **)alloca((cmdc+1)*sizeof(char *));
+ if (split_cmdline(cmdc, cmdv, argv[0], cmdlines, args) != cmdc) {
+ ret = 1;
+ goto bail;
+ }
+
+ /* Debugging... */
+ dump_args(cmdc, cmdv);
+
+ /* Resume from suspend-to-disk, if appropriate */
+ /* If successful, does not return */
+ do_resume(cmdc, cmdv);
+
+ /* Initialize networking, if applicable */
+ do_ipconfig(cmdc, cmdv);
+
+ check_path("/root");
+ do_mounts(cmdc, cmdv);
+
+ if (mnt_procfs) {
+ umount2("/proc", 0);
+ mnt_procfs = 0;
+ }
+
+ if (mnt_sysfs) {
+ umount2("/sys", 0);
+ mnt_sysfs = 0;
+ }
+
+ init_path = find_init("/root", get_arg(cmdc, cmdv, "init="));
+ if (!init_path) {
+ fprintf(stderr, "%s: init not found!\n", progname);
+ ret = 2;
+ goto bail;
+ }
+
+ init_argv[0] = strrchr(init_path, '/') + 1;
+
+ errmsg = run_init("/root", "/dev/console",
+ get_arg(cmdc, cmdv, "drop_capabilities="), false,
+ false, init_path, init_argv);
+
+ /* If run_init returned, something went bad */
+ fprintf(stderr, "%s: %s: %s\n", progname, errmsg, strerror(errno));
+ ret = 2;
+ goto bail;
+
+bail:
+ if (mnt_procfs)
+ umount2("/proc", 0);
+
+ if (mnt_sysfs)
+ umount2("/sys", 0);
+
+ /*
+ * If we get here, something bad probably happened, and the kernel
+ * will most likely panic. Drain console output so the user can
+ * figure out what happened.
+ */
+ tcdrain(2);
+ tcdrain(1);
+
+ return ret;
+}
diff --git a/usr/kinit/kinit.h b/usr/kinit/kinit.h
new file mode 100644
index 0000000..ee006f4
--- /dev/null
+++ b/usr/kinit/kinit.h
@@ -0,0 +1,70 @@
+/*
+ * kinit/kinit.h
+ */
+
+#ifndef KINIT_H
+#define KINIT_H
+
+#include <stddef.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+int do_mounts(int argc, char *argv[]);
+int mount_nfs_root(int argc, char *argv[], int flags);
+int ramdisk_load(int argc, char *argv[]);
+void md_run(int argc, char *argv[]);
+const char *bdevname(dev_t dev);
+
+extern int mnt_procfs;
+extern int mnt_sysfs;
+
+extern int init_argc;
+extern char **init_argv;
+extern const char *progname;
+
+char *get_arg(int argc, char *argv[], const char *name);
+int get_flag(int argc, char *argv[], const char *name);
+
+int getintfile(const char *path, long *val);
+
+ssize_t readfile(const char *path, char **pptr);
+ssize_t freadfile(FILE *f, char **pptr);
+
+/*
+ * min()/max() macros that also do
+ * strict type-checking.. See the
+ * "unnecessary" pointer comparison.
+ * From the Linux kernel.
+ */
+#define min(x, y) ({ \
+ typeof(x) _x = (x); \
+ typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x < _y ? _x : _y; })
+
+#define max(x, y) ({ \
+ typeof(x) _x = (x); \
+ typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x > _y ? _x : _y; })
+
+
+#ifdef DEBUG
+# define dprintf printf
+#else
+# define dprintf(...) ((void)0)
+#endif
+
+#ifdef DEBUG
+void dump_args(int argc, char *argv[]);
+#else
+static inline void dump_args(int argc, char *argv[])
+{
+ (void)argc;
+ (void)argv;
+}
+#endif
+
+int drop_capabilities(const char *caps);
+
+#endif /* KINIT_H */
diff --git a/usr/kinit/name_to_dev.c b/usr/kinit/name_to_dev.c
new file mode 100644
index 0000000..c57b7ce
--- /dev/null
+++ b/usr/kinit/name_to_dev.c
@@ -0,0 +1,276 @@
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <alloca.h>
+#include <inttypes.h>
+
+#include "do_mounts.h"
+#include "kinit.h"
+
+#define BUF_SZ 65536
+
+/* Find dev_t for e.g. "hda,NULL" or "hdb,2" */
+static dev_t try_name(char *name, int part)
+{
+ char path[BUF_SZ];
+ char buf[BUF_SZ];
+ int range;
+ unsigned int major_num, minor_num;
+ dev_t res;
+ char *s;
+ int len;
+ int fd;
+
+ /* read device number from /sys/block/.../dev */
+ snprintf(path, sizeof(path), "/sys/block/%s/dev", name);
+ fd = open(path, 0, 0);
+ if (fd < 0)
+ goto fail;
+ len = read(fd, buf, BUF_SZ);
+ close(fd);
+
+ if (len <= 0 || len == BUF_SZ || buf[len - 1] != '\n')
+ goto fail;
+ buf[len - 1] = '\0';
+ major_num = strtoul(buf, &s, 10);
+ if (*s != ':')
+ goto fail;
+ minor_num = strtoul(s + 1, &s, 10);
+ if (*s)
+ goto fail;
+ res = makedev(major_num, minor_num);
+
+ /* if it's there and we are not looking for a partition - that's it */
+ if (!part)
+ return res;
+
+ /* otherwise read range from .../range */
+ snprintf(path, sizeof(path), "/sys/block/%s/range", name);
+ fd = open(path, 0, 0);
+ if (fd < 0)
+ goto fail;
+ len = read(fd, buf, 32);
+ close(fd);
+ if (len <= 0 || len == 32 || buf[len - 1] != '\n')
+ goto fail;
+ buf[len - 1] = '\0';
+ range = strtoul(buf, &s, 10);
+ if (*s)
+ goto fail;
+
+ /* if partition is within range - we got it */
+ if (part < range) {
+ dprintf("kinit: try_name %s,%d = %s\n", name, part,
+ bdevname(res + part));
+ return res + part;
+ }
+
+fail:
+ return (dev_t) 0;
+}
+
+/*
+ * Find dev_t for a block device based on the provided GPT partlabel.
+ * The partlabel to block device mapping is found by scanning all
+ * the entries in /sys/dev/block/, opening the uevent file and picking
+ * the device where the PARTNAME= entry matches partlabel.
+ */
+static dev_t partlabel_to_dev_t(const char *plabel)
+{
+ char path[BUF_SZ];
+ DIR *dir;
+ FILE *fp;
+ struct dirent *dent;
+ char *ret;
+ char line[BUF_SZ];
+ int match_label, major, minor;
+
+ dir = opendir("/sys/dev/block");
+ if (!dir) {
+ dprintf(stderr, "%s: error %i (%s) opening /sys/dev/block\n",
+ __func__, errno, strerror(errno));
+ goto fail;
+ }
+
+ while ((dent = readdir(dir)) != NULL) {
+ if (!strncmp(dent->d_name, ".", 1))
+ continue;
+ snprintf(path, sizeof(path), "/sys/dev/block/%s/uevent",
+ dent->d_name);
+
+ fp = fopen(path, "r");
+ if (fp == NULL) {
+ dprintf(stderr, "kinit %s: error %i (%s) opening %s",
+ __func__, errno, strerror(errno), path);
+ continue;
+ }
+
+ major = 0;
+ minor = 0;
+ match_label = 0;
+ while (!feof(fp)) {
+ ret = fgets(line, sizeof(line), fp);
+ if (ret == NULL)
+ continue;
+ if (!strncmp(line, "MAJOR=", 6))
+ major = atoi(line+6);
+ if (!strncmp(line, "MINOR=", 6))
+ minor = atoi(line+6);
+ if (!strncmp(line, "PARTNAME=", 9)) {
+ line[strcspn(line, "\n")] = 0;
+ if (!strncmp(line + 9, plabel, sizeof(line)-9))
+ match_label = 1;
+ }
+ if (match_label && major && minor) {
+ fclose(fp);
+ closedir(dir);
+ return makedev(major, minor);
+ }
+ }
+ fclose(fp);
+ }
+ closedir(dir);
+
+fail:
+ return (dev_t) 0;
+}
+
+/*
+ * Convert a name into device number. We accept the following variants:
+ *
+ * 1) device number in hexadecimal represents itself
+ * 2) device number in major:minor decimal represents itself
+ * 3) /dev/nfs represents Root_NFS
+ * 4) /dev/<disk_name> represents the device number of disk
+ * 5) /dev/<disk_name><decimal> represents the device number
+ * of partition - device number of disk plus the partition number
+ * 6) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * used when disk name of partitioned disk ends on a digit.
+ * 7) an actual block device node in the initramfs filesystem
+ * 8) PARTLABEL=<name> with name being the GPT partition label.
+ *
+ * If name doesn't have fall into the categories above, we return 0.
+ * Driverfs is used to check if something is a disk name - it has
+ * all known disks under bus/block/devices. If the disk name
+ * contains slashes, name of driverfs node has them replaced with
+ * dots. try_name() does the actual checks, assuming that driverfs
+ * is mounted on rootfs /sys.
+ */
+
+static inline dev_t name_to_dev_t_real(const char *name)
+{
+ char *p;
+ dev_t res = 0;
+ char *s;
+ int part;
+ struct stat st;
+ int len;
+ const char *devname;
+ char *cptr, *e1, *e2;
+ int major_num, minor_num;
+
+ /* Are we a multi root line? */
+ if (strchr(name, ','))
+ return Root_MULTI;
+
+ if (!strncmp(name, "PARTLABEL=", 10))
+ return partlabel_to_dev_t(name + 10);
+
+ if (name[0] == '/') {
+ devname = name;
+ } else {
+ char *dname = alloca(strlen(name) + 6);
+ sprintf(dname, "/dev/%s", name);
+ devname = dname;
+ }
+
+ if (!stat(devname, &st) && S_ISBLK(st.st_mode))
+ return st.st_rdev;
+
+ if (strncmp(name, "/dev/", 5)) {
+ cptr = strchr(devname+5, ':');
+ if (cptr && cptr[1] != '\0') {
+ /* Colon-separated decimal device number */
+ *cptr = '\0';
+ major_num = strtoul(devname+5, &e1, 10);
+ minor_num = strtoul(cptr+1, &e2, 10);
+ if (!*e1 && !*e2)
+ return makedev(major_num, minor_num);
+ *cptr = ':';
+ } else {
+ /* Hexadecimal device number */
+ res = (dev_t) strtoul(name, &p, 16);
+ if (!*p)
+ return res;
+ }
+ } else {
+ name += 5;
+ }
+
+ if (!strcmp(name, "nfs"))
+ return Root_NFS;
+
+ if (!strcmp(name, "ram")) /* /dev/ram - historic alias for /dev/ram0 */
+ return Root_RAM0;
+
+ if (!strncmp(name, "mtd", 3))
+ return Root_MTD;
+
+ len = strlen(name);
+ s = alloca(len + 1);
+ memcpy(s, name, len + 1);
+
+ for (p = s; *p; p++)
+ if (*p == '/')
+ *p = '!';
+ res = try_name(s, 0);
+ if (res)
+ return res;
+
+ while (p > s && isdigit(p[-1]))
+ p--;
+ if (p == s || !*p || *p == '0')
+ goto fail;
+ part = strtoul(p, NULL, 10);
+ *p = '\0';
+ res = try_name(s, part);
+ if (res)
+ return res;
+
+ if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
+ goto fail;
+ p[-1] = '\0';
+ res = try_name(s, part);
+ return res;
+
+fail:
+ return (dev_t) 0;
+}
+
+dev_t name_to_dev_t(const char *name)
+{
+ dev_t dev = name_to_dev_t_real(name);
+
+ dprintf("kinit: name_to_dev_t(%s) = %s\n", name, bdevname(dev));
+ return dev;
+}
+
+#ifdef TEST_NAMETODEV /* Standalone test */
+
+int main(int argc, char *argv[])
+{
+ int i;
+
+ for (i = 1; i < argc; i++)
+ name_to_dev_t(argv[i]);
+
+ return 0;
+}
+
+#endif
diff --git a/usr/kinit/nfsmount/Kbuild b/usr/kinit/nfsmount/Kbuild
new file mode 100644
index 0000000..5f34950
--- /dev/null
+++ b/usr/kinit/nfsmount/Kbuild
@@ -0,0 +1,31 @@
+#
+# kbuild file for nfsmount
+#
+
+static-y := static/nfsmount
+#FIXME - build is broken static-y := dummypmap
+shared-y := shared/nfsmount
+
+objs := main.o mount.o portmap.o dummypmap.o sunrpc.o
+
+# Create built-in.o with all .o files (used by kinit)
+lib-y := $(objs)
+
+# .o files used for executables
+static/nfsmount-y := $(objs)
+shared/nfsmount-y := $(objs)
+
+# dummypmap uses a single .o file (rename src file?)
+dummypmap-y := dummypmap_test.o
+
+# TODO - do we want a stripped version
+# TODO - do we want the static.g + shared.g directories?
+
+clean-dirs := static shared
+
+# Install binary
+ifdef KLIBCSHAREDFLAGS
+install-y := $(shared-y)
+else
+install-y := $(static-y)
+endif
diff --git a/usr/kinit/nfsmount/README.locking b/usr/kinit/nfsmount/README.locking
new file mode 100644
index 0000000..bf2e8e7
--- /dev/null
+++ b/usr/kinit/nfsmount/README.locking
@@ -0,0 +1,26 @@
+I have implemented portmap spoofing in klibc nfsmount (released as
+klibc-0.144) This is basically a vestigial portmap daemon which gets
+launched before the mount() call and then just records any
+transactions it gets to a file and sends back an affirmative reply.
+
+There are two ways to use it (this belongs in a README file, but it's
+too late at night right now):
+
+a) Set a fixed portnumber in /proc/sys/nfs/nlm_tcpport and
+/proc/sys/nfs/nlm_udpport before calling nfsmount; once the portmapper
+starts feed that fixed portnumber to pmap_set(8). In this case the
+pmap_file can be /dev/null.
+
+b) Allow the kernel to bind to any port and use the file produced by
+nfsroot to feed to pmap_set (it should be directly compatible); this
+means the file needs to be transferred to a place where the "real
+root" can find it before run-init.
+
+In either case, it is imperative that the real portmapper is launched
+before any program actually tries to create locks!
+
+To use it:
+
+ # We need the loopback device to be up before we do this!
+ ipconfig 127.0.0.1:::::lo:none
+ nfsroot -p pmap_file -o lock server:/pathname /realpath
diff --git a/usr/kinit/nfsmount/dummypmap.c b/usr/kinit/nfsmount/dummypmap.c
new file mode 100644
index 0000000..07210c5
--- /dev/null
+++ b/usr/kinit/nfsmount/dummypmap.c
@@ -0,0 +1,281 @@
+/*
+ * Enough portmapper functionality that mount doesn't hang trying
+ * to start lockd. Enables nfsroot with locking functionality.
+ *
+ * Note: the kernel will only speak to the local portmapper
+ * using RPC over UDP.
+ */
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "dummypmap.h"
+#include "sunrpc.h"
+
+extern const char *progname;
+
+struct portmap_args {
+ uint32_t program;
+ uint32_t version;
+ uint32_t proto;
+ uint32_t port;
+};
+
+struct portmap_call {
+ struct rpc_call rpc;
+ struct portmap_args args;
+};
+
+struct portmap_reply {
+ struct rpc_reply rpc;
+ uint32_t port;
+};
+
+static int bind_portmap(void)
+{
+ int sock = socket(PF_INET, SOCK_DGRAM, 0);
+ struct sockaddr_in sin;
+
+ if (sock < 0)
+ return -1;
+
+ memset(&sin, 0, sizeof sin);
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(0x7f000001); /* 127.0.0.1 */
+ sin.sin_port = htons(RPC_PMAP_PORT);
+ if (bind(sock, (struct sockaddr *)&sin, sizeof sin) < 0) {
+ int err = errno;
+ close(sock);
+ errno = err;
+ return -1;
+ }
+
+ return sock;
+}
+
+static const char *protoname(uint32_t proto)
+{
+ switch (ntohl(proto)) {
+ case IPPROTO_TCP:
+ return "tcp";
+ case IPPROTO_UDP:
+ return "udp";
+ default:
+ return NULL;
+ }
+}
+
+static void *get_auth(struct rpc_auth *auth)
+{
+ switch (ntohl(auth->flavor)) {
+ case AUTH_NULL:
+ /* Fallthrough */
+ case AUTH_UNIX:
+ return (char *)&auth->body + ntohl(auth->len);
+ default:
+ return NULL;
+ }
+}
+
+static int check_unix_cred(struct rpc_auth *cred)
+{
+ uint32_t len;
+ int quad_len;
+ uint32_t node_name_len;
+ int quad_name_len;
+ uint32_t *base;
+ uint32_t *pos;
+ int ret = -1;
+
+ len = ntohl(cred->len);
+ quad_len = (len + 3) >> 2;
+ if (quad_len < 6)
+ /* Malformed creds */
+ goto out;
+
+ base = pos = cred->body;
+
+ /* Skip timestamp */
+ pos++;
+
+ /* Skip node name: only localhost can succeed. */
+ node_name_len = ntohl(*pos++);
+ quad_name_len = (node_name_len + 3) >> 2;
+ if (pos + quad_name_len + 3 > base + quad_len)
+ /* Malformed creds */
+ goto out;
+ pos += quad_name_len;
+
+ /* uid must be 0 */
+ if (*pos++ != 0)
+ goto out;
+
+ /* gid must be 0 */
+ if (*pos++ != 0)
+ goto out;
+
+ /* Skip remaining gids */
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int check_cred(struct rpc_auth *cred)
+{
+ switch (ntohl(cred->flavor)) {
+ case AUTH_NULL:
+ return 0;
+ case AUTH_UNIX:
+ return check_unix_cred(cred);
+ default:
+ return -1;
+ }
+}
+
+static int check_vrf(struct rpc_auth *vrf)
+{
+ return (vrf->flavor == htonl(AUTH_NULL)) ? 0 : -1;
+}
+
+#define MAX_UDP_PACKET 65536
+
+static int dummy_portmap(int sock, FILE *portmap_file)
+{
+ enum { PAYLOAD_SIZE = MAX_UDP_PACKET + offsetof(struct rpc_header, udp) };
+ struct sockaddr_in sin;
+ int pktlen, addrlen;
+ union {
+ struct rpc_call rpc;
+ /* Max UDP packet size + unused TCP fragment size */
+ char payload[PAYLOAD_SIZE];
+ } pkt;
+ struct rpc_call *rpc = &pkt.rpc;
+ struct rpc_auth *cred;
+ struct rpc_auth *vrf;
+ struct portmap_args *args;
+ struct portmap_reply rply;
+
+ for (;;) {
+ addrlen = sizeof sin;
+ pktlen = recvfrom(sock, &rpc->hdr.udp, MAX_UDP_PACKET,
+ 0, (struct sockaddr *)&sin, &addrlen);
+
+ if (pktlen < 0) {
+ if (errno == EINTR)
+ continue;
+
+ return -1;
+ }
+
+ /* +4 to skip the TCP fragment header */
+ if (pktlen + 4 < sizeof(struct portmap_call))
+ continue; /* Bad packet */
+
+ if (rpc->hdr.udp.msg_type != htonl(RPC_CALL))
+ continue; /* Bad packet */
+
+ memset(&rply, 0, sizeof rply);
+
+ rply.rpc.hdr.udp.xid = rpc->hdr.udp.xid;
+ rply.rpc.hdr.udp.msg_type = htonl(RPC_REPLY);
+
+ cred = (struct rpc_auth *) &rpc->cred_flavor;
+ if (rpc->rpc_vers != htonl(2)) {
+ rply.rpc.reply_state = htonl(REPLY_DENIED);
+ /* state <- RPC_MISMATCH == 0 */
+ } else if (rpc->program != htonl(PORTMAP_PROGRAM)) {
+ rply.rpc.reply_state = htonl(PROG_UNAVAIL);
+ } else if (rpc->prog_vers != htonl(2)) {
+ rply.rpc.reply_state = htonl(PROG_MISMATCH);
+ } else if (!(vrf = get_auth(cred)) ||
+ (char *)vrf > ((char *)&rpc->hdr.udp + pktlen - 8 -
+ sizeof(*args)) ||
+ !(args = get_auth(vrf)) ||
+ (char *)args > ((char *)&rpc->hdr.udp + pktlen -
+ sizeof(*args)) ||
+ check_cred(cred) || check_vrf(vrf)) {
+ /* Can't deal with credentials data; the kernel
+ won't send them */
+ rply.rpc.reply_state = htonl(SYSTEM_ERR);
+ } else {
+ switch (ntohl(rpc->proc)) {
+ case PMAP_PROC_NULL:
+ break;
+ case PMAP_PROC_SET:
+ if (args->proto == htonl(IPPROTO_TCP) ||
+ args->proto == htonl(IPPROTO_UDP)) {
+ if (portmap_file)
+ fprintf(portmap_file,
+ "%u %u %s %u\n",
+ ntohl(args->program),
+ ntohl(args->version),
+ protoname(args->proto),
+ ntohl(args->port));
+ rply.port = htonl(1); /* TRUE = success */
+ }
+ break;
+ case PMAP_PROC_UNSET:
+ rply.port = htonl(1); /* TRUE = success */
+ break;
+ case PMAP_PROC_GETPORT:
+ break;
+ case PMAP_PROC_DUMP:
+ break;
+ default:
+ rply.rpc.reply_state = htonl(PROC_UNAVAIL);
+ break;
+ }
+ }
+
+ sendto(sock, &rply.rpc.hdr.udp, sizeof rply - 4, 0,
+ (struct sockaddr *)&sin, addrlen);
+ }
+}
+
+pid_t start_dummy_portmap(const char *file)
+{
+ FILE *portmap_filep;
+ int sock;
+ pid_t spoof_portmap;
+
+ portmap_filep = fopen(file, "w");
+ if (!portmap_filep) {
+ fprintf(stderr, "%s: cannot write portmap file: %s\n",
+ progname, file);
+ return -1;
+ }
+
+ sock = bind_portmap();
+ if (sock == -1) {
+ if (errno == EINVAL || errno == EADDRINUSE)
+ return 0; /* Assume not needed */
+ else {
+ fclose(portmap_filep);
+ fprintf(stderr, "%s: portmap spoofing failed\n",
+ progname);
+ return -1;
+ }
+ }
+
+ spoof_portmap = fork();
+ if (spoof_portmap == -1) {
+ fclose(portmap_filep);
+ fprintf(stderr, "%s: cannot fork\n", progname);
+ return -1;
+ } else if (spoof_portmap == 0) {
+ /* Child process */
+ dummy_portmap(sock, portmap_filep);
+ _exit(255); /* Error */
+ } else {
+ /* Parent process */
+ close(sock);
+ return spoof_portmap;
+ }
+}
diff --git a/usr/kinit/nfsmount/dummypmap.h b/usr/kinit/nfsmount/dummypmap.h
new file mode 100644
index 0000000..37650bf
--- /dev/null
+++ b/usr/kinit/nfsmount/dummypmap.h
@@ -0,0 +1,11 @@
+/*
+ * Functions for the portmap spoofer
+ */
+
+#ifndef NFSMOUNT_DUMMYPORTMAP_H
+#define NFSMOUNT_DUMMYPORTMAP_H
+
+#include <unistd.h>
+pid_t start_dummy_portmap(const char *file);
+
+#endif /* NFSMOUNT_DUMMYPORTMAP_H */
diff --git a/usr/kinit/nfsmount/dummypmap_test.c b/usr/kinit/nfsmount/dummypmap_test.c
new file mode 100644
index 0000000..d81a141
--- /dev/null
+++ b/usr/kinit/nfsmount/dummypmap_test.c
@@ -0,0 +1,2 @@
+#define TEST
+#include "dummypmap.c"
diff --git a/usr/kinit/nfsmount/main.c b/usr/kinit/nfsmount/main.c
new file mode 100644
index 0000000..66969f4
--- /dev/null
+++ b/usr/kinit/nfsmount/main.c
@@ -0,0 +1,288 @@
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <arpa/inet.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <klibc/sysconfig.h> /* For _KLIBC_NO_MMU */
+
+#include <linux/nfs_mount.h>
+
+#include "nfsmount.h"
+#include "sunrpc.h"
+#include "dummypmap.h"
+
+const char *progname;
+static jmp_buf abort_buf;
+
+static struct nfs_mount_data mount_data = {
+ .version = NFS_MOUNT_VERSION,
+ .flags = NFS_MOUNT_NONLM | NFS_MOUNT_VER3 | NFS_MOUNT_TCP,
+ .rsize = 0, /* Server's choice */
+ .wsize = 0, /* Server's choice */
+ .timeo = 0, /* Kernel client's default */
+ .retrans = 3,
+ .acregmin = 3,
+ .acregmax = 60,
+ .acdirmin = 30,
+ .acdirmax = 60,
+ .namlen = NAME_MAX,
+};
+
+int nfs_port;
+int nfs_version;
+
+static struct int_opts {
+ char *name;
+ int *val;
+} int_opts[] = {
+ {"port", &nfs_port},
+ {"nfsvers", &nfs_version},
+ {"vers", &nfs_version},
+ {"rsize", &mount_data.rsize},
+ {"wsize", &mount_data.wsize},
+ {"timeo", &mount_data.timeo},
+ {"retrans", &mount_data.retrans},
+ {"acregmin", &mount_data.acregmin},
+ {"acregmax", &mount_data.acregmax},
+ {"acdirmin", &mount_data.acdirmin},
+ {"acdirmax", &mount_data.acdirmax},
+ {NULL, NULL}
+};
+
+static struct bool_opts {
+ char *name;
+ int and_mask;
+ int or_mask;
+} bool_opts[] = {
+ {"soft", ~NFS_MOUNT_SOFT, NFS_MOUNT_SOFT},
+ {"hard", ~NFS_MOUNT_SOFT, 0},
+ {"intr", ~NFS_MOUNT_INTR, NFS_MOUNT_INTR},
+ {"nointr", ~NFS_MOUNT_INTR, 0},
+ {"posix", ~NFS_MOUNT_POSIX, NFS_MOUNT_POSIX},
+ {"noposix", ~NFS_MOUNT_POSIX, 0},
+ {"cto", ~NFS_MOUNT_NOCTO, 0},
+ {"nocto", ~NFS_MOUNT_NOCTO, NFS_MOUNT_NOCTO},
+ {"ac", ~NFS_MOUNT_NOAC, 0},
+ {"noac", ~NFS_MOUNT_NOAC, NFS_MOUNT_NOAC},
+ {"lock", ~NFS_MOUNT_NONLM, 0},
+ {"nolock", ~NFS_MOUNT_NONLM, NFS_MOUNT_NONLM},
+ {"acl", ~NFS_MOUNT_NOACL, 0},
+ {"noacl", ~NFS_MOUNT_NOACL, NFS_MOUNT_NOACL},
+ {"v2", ~NFS_MOUNT_VER3, 0},
+ {"v3", ~NFS_MOUNT_VER3, NFS_MOUNT_VER3},
+ {"udp", ~NFS_MOUNT_TCP, 0},
+ {"tcp", ~NFS_MOUNT_TCP, NFS_MOUNT_TCP},
+ {"broken_suid", ~NFS_MOUNT_BROKEN_SUID, NFS_MOUNT_BROKEN_SUID},
+ {"ro", ~NFS_MOUNT_KLIBC_RONLY, NFS_MOUNT_KLIBC_RONLY},
+ {"rw", ~NFS_MOUNT_KLIBC_RONLY, 0},
+ {NULL, 0, 0}
+};
+
+static int parse_int(const char *val, const char *ctx)
+{
+ char *end;
+ int ret;
+
+ ret = (int)strtoul(val, &end, 0);
+ if (*val == '\0' || *end != '\0') {
+ fprintf(stderr, "%s: invalid value for %s\n", val, ctx);
+ longjmp(abort_buf, 1);
+ }
+ return ret;
+}
+
+static void parse_opts(char *opts)
+{
+ char *cp, *val;
+
+ while ((cp = strsep(&opts, ",")) != NULL) {
+ if (*cp == '\0')
+ continue;
+ val = strchr(cp, '=');
+ if (val != NULL) {
+ struct int_opts *opts = int_opts;
+ *val++ = '\0';
+ while (opts->name && strcmp(opts->name, cp) != 0)
+ opts++;
+ if (opts->name)
+ *(opts->val) = parse_int(val, opts->name);
+ else {
+ fprintf(stderr, "%s: bad option '%s'\n",
+ progname, cp);
+ longjmp(abort_buf, 1);
+ }
+ } else {
+ struct bool_opts *opts = bool_opts;
+ while (opts->name && strcmp(opts->name, cp) != 0)
+ opts++;
+ if (opts->name) {
+ mount_data.flags &= opts->and_mask;
+ mount_data.flags |= opts->or_mask;
+ } else {
+ fprintf(stderr, "%s: bad option '%s'\n",
+ progname, cp);
+ longjmp(abort_buf, 1);
+ }
+ }
+ }
+ /* If new-style options "nfsvers=" or "vers=" are passed, override
+ old "v2" and "v3" options */
+ if (nfs_version != 0) {
+ switch (nfs_version) {
+ case 2:
+ mount_data.flags &= ~NFS_MOUNT_VER3;
+ break;
+ case 3:
+ mount_data.flags |= NFS_MOUNT_VER3;
+ break;
+ default:
+ fprintf(stderr, "%s: bad NFS version '%d'\n",
+ progname, nfs_version);
+ longjmp(abort_buf, 1);
+ }
+ }
+}
+
+static uint32_t parse_addr(const char *ip)
+{
+ struct in_addr in;
+ if (inet_aton(ip, &in) == 0) {
+ fprintf(stderr, "%s: can't parse IP address '%s'\n",
+ progname, ip);
+ longjmp(abort_buf, 1);
+ }
+ return in.s_addr;
+}
+
+static void check_path(const char *path)
+{
+ struct stat st;
+
+ if (stat(path, &st) == -1) {
+ perror("stat");
+ longjmp(abort_buf, 1);
+ } else if (!S_ISDIR(st.st_mode)) {
+ fprintf(stderr, "%s: '%s' not a directory\n", progname, path);
+ longjmp(abort_buf, 1);
+ }
+}
+
+int main(int argc, char *argv[])
+ __attribute__ ((weak, alias("nfsmount_main")));
+
+int nfsmount_main(int argc, char *argv[])
+{
+ uint32_t server;
+ char *rem_name;
+ char *rem_path;
+ char *hostname;
+ char *path;
+ int c;
+ const char *portmap_file;
+ pid_t spoof_portmap;
+ int err, ret;
+
+ if ((err = setjmp(abort_buf)))
+ return err;
+
+ /* Set these here to avoid longjmp warning */
+ portmap_file = NULL;
+ spoof_portmap = 0;
+ server = 0;
+
+ /* If progname is set we're invoked from another program */
+ if (!progname) {
+ struct timeval now;
+ progname = argv[0];
+ gettimeofday(&now, NULL);
+ srand48(now.tv_usec ^ (now.tv_sec << 24));
+ }
+
+ while ((c = getopt(argc, argv, "o:p:")) != EOF) {
+ switch (c) {
+ case 'o':
+ parse_opts(optarg);
+ break;
+ case 'p':
+ portmap_file = optarg;
+ break;
+ case '?':
+ fprintf(stderr, "%s: invalid option -%c\n",
+ progname, optopt);
+ return 1;
+ }
+ }
+
+ if (optind == argc) {
+ fprintf(stderr, "%s: need a path\n", progname);
+ return 1;
+ }
+
+ hostname = rem_path = argv[optind];
+
+ rem_name = strdup(rem_path);
+ if (rem_name == NULL) {
+ perror("strdup");
+ return 1;
+ }
+
+ rem_path = strchr(rem_path, ':');
+ if (rem_path == NULL) {
+ fprintf(stderr, "%s: need a server\n", progname);
+ free(rem_name);
+ return 1;
+ }
+
+ *rem_path++ = '\0';
+
+ if (*rem_path != '/') {
+ fprintf(stderr, "%s: need a path\n", progname);
+ free(rem_name);
+ return 1;
+ }
+
+ server = parse_addr(hostname);
+
+ if (optind <= argc - 2)
+ path = argv[optind + 1];
+ else
+ path = "/nfs_root";
+
+ check_path(path);
+
+#if !_KLIBC_NO_MMU
+ /* Note: uClinux can't fork(), so the spoof portmapper is not
+ available on uClinux. */
+ if (portmap_file)
+ spoof_portmap = start_dummy_portmap(portmap_file);
+
+ if (spoof_portmap == -1) {
+ free(rem_name);
+ return 1;
+ }
+#endif
+
+ ret = 0;
+ if (nfs_mount(rem_name, hostname, server, rem_path, path,
+ &mount_data) != 0)
+ ret = 1;
+
+ /* If we set up the spoofer, tear it down now */
+ if (spoof_portmap) {
+ kill(spoof_portmap, SIGTERM);
+ while (waitpid(spoof_portmap, NULL, 0) == -1
+ && errno == EINTR)
+ ;
+ }
+
+ free(rem_name);
+
+ return ret;
+}
diff --git a/usr/kinit/nfsmount/mount.c b/usr/kinit/nfsmount/mount.c
new file mode 100644
index 0000000..ae48354
--- /dev/null
+++ b/usr/kinit/nfsmount/mount.c
@@ -0,0 +1,347 @@
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <linux/nfs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "nfsmount.h"
+#include "sunrpc.h"
+
+static uint32_t mount_port;
+
+struct mount_call {
+ struct rpc_call rpc;
+ uint32_t path_len;
+ char path[0];
+};
+
+/*
+ * The following structure is the NFS v3 on-the-wire file handle,
+ * as defined in rfc1813.
+ * This differs from the structure used by the kernel,
+ * defined in <linux/nfh3.h>: rfc has a long in network order,
+ * kernel has a short in native order.
+ * Both kernel and rfc use the name nfs_fh; kernel name is
+ * visible to user apps in some versions of libc.
+ * Use different name to avoid clashes.
+ */
+#define NFS_MAXFHSIZE_WIRE 64
+struct nfs_fh_wire {
+ uint32_t size;
+ char data[NFS_MAXFHSIZE_WIRE];
+} __attribute__ ((packed, aligned(4)));
+
+struct mount_reply {
+ struct rpc_reply reply;
+ uint32_t status;
+ struct nfs_fh_wire fh;
+} __attribute__ ((packed, aligned(4)));
+
+#define MNT_REPLY_MINSIZE (sizeof(struct rpc_reply) + sizeof(uint32_t))
+
+static int get_ports(uint32_t server, const struct nfs_mount_data *data)
+{
+ uint32_t nfs_ver, mount_ver;
+ uint32_t proto;
+
+ if (data->flags & NFS_MOUNT_VER3) {
+ nfs_ver = NFS3_VERSION;
+ mount_ver = NFS_MNT3_VERSION;
+ } else {
+ nfs_ver = NFS2_VERSION;
+ mount_ver = NFS_MNT_VERSION;
+ }
+
+ proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+
+ if (nfs_port == 0) {
+ nfs_port = portmap(server, NFS_PROGRAM, nfs_ver, proto);
+ if (nfs_port == 0) {
+ if (proto == IPPROTO_TCP) {
+ struct in_addr addr = { server };
+ fprintf(stderr, "NFS over TCP not "
+ "available from %s\n", inet_ntoa(addr));
+ return -1;
+ }
+ nfs_port = NFS_PORT;
+ }
+ }
+
+ if (mount_port == 0) {
+ mount_port = portmap(server, NFS_MNT_PROGRAM, mount_ver, proto);
+ if (mount_port == 0)
+ mount_port = MOUNT_PORT;
+ }
+ return 0;
+}
+
+static inline int pad_len(int len)
+{
+ return (len + 3) & ~3;
+}
+
+static inline void dump_params(uint32_t server,
+ const char *path,
+ const struct nfs_mount_data *data)
+{
+ (void)server;
+ (void)path;
+ (void)data;
+
+#ifdef DEBUG
+ struct in_addr addr = { server };
+
+ printf("NFS params:\n");
+ printf(" server = %s, path = \"%s\", ", inet_ntoa(addr), path);
+ printf("version = %d, proto = %s\n",
+ data->flags & NFS_MOUNT_VER3 ? 3 : 2,
+ (data->flags & NFS_MOUNT_TCP) ? "tcp" : "udp");
+ printf(" mount_port = %d, nfs_port = %d, flags = %08x\n",
+ mount_port, nfs_port, data->flags);
+ printf(" rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
+ data->rsize, data->wsize, data->timeo, data->retrans);
+ printf(" acreg (min, max) = (%d, %d), acdir (min, max) = (%d, %d)\n",
+ data->acregmin, data->acregmax, data->acdirmin, data->acdirmax);
+ printf(" soft = %d, intr = %d, posix = %d, nocto = %d, noac = %d\n",
+ (data->flags & NFS_MOUNT_SOFT) != 0,
+ (data->flags & NFS_MOUNT_INTR) != 0,
+ (data->flags & NFS_MOUNT_POSIX) != 0,
+ (data->flags & NFS_MOUNT_NOCTO) != 0,
+ (data->flags & NFS_MOUNT_NOAC) != 0);
+#endif
+}
+
+static inline void dump_fh(const char *data, int len)
+{
+ (void)data;
+ (void)len;
+
+#ifdef DEBUG
+ int i = 0;
+ int max = len - (len % 8);
+
+ printf("Root file handle: %d bytes\n", NFS2_FHSIZE);
+
+ while (i < max) {
+ int j;
+
+ printf(" %4d: ", i);
+ for (j = 0; j < 4; j++) {
+ printf("%02x %02x %02x %02x ",
+ data[i] & 0xff, data[i + 1] & 0xff,
+ data[i + 2] & 0xff, data[i + 3] & 0xff);
+ }
+ i += j;
+ printf("\n");
+ }
+#endif
+}
+
+static struct mount_reply mnt_reply;
+
+static int mount_call(uint32_t proc, uint32_t version,
+ const char *path, struct client *clnt)
+{
+ struct mount_call *mnt_call = NULL;
+ size_t path_len, call_len;
+ struct rpc rpc;
+ int ret = 0;
+
+ path_len = strlen(path);
+ call_len = sizeof(*mnt_call) + pad_len(path_len);
+
+ mnt_call = malloc(call_len);
+ if (mnt_call == NULL) {
+ perror("malloc");
+ goto bail;
+ }
+
+ memset(mnt_call, 0, sizeof(*mnt_call));
+
+ mnt_call->rpc.program = htonl(NFS_MNT_PROGRAM);
+ mnt_call->rpc.prog_vers = htonl(version);
+ mnt_call->rpc.proc = htonl(proc);
+ mnt_call->path_len = htonl(path_len);
+ memcpy(mnt_call->path, path, path_len);
+
+ rpc.call = (struct rpc_call *)mnt_call;
+ rpc.call_len = call_len;
+ rpc.reply = (struct rpc_reply *)&mnt_reply;
+ rpc.reply_len = sizeof(mnt_reply);
+
+ if (rpc_call(clnt, &rpc) < 0)
+ goto bail;
+
+ if (proc != MNTPROC_MNT)
+ goto done;
+
+ if (rpc.reply_len < MNT_REPLY_MINSIZE) {
+ fprintf(stderr, "incomplete reply: %zu < %zu\n",
+ rpc.reply_len, MNT_REPLY_MINSIZE);
+ goto bail;
+ }
+
+ if (mnt_reply.status != 0) {
+ fprintf(stderr, "mount call failed - server replied: %s.\n",
+ strerror(ntohl(mnt_reply.status)));
+ goto bail;
+ }
+
+ goto done;
+
+bail:
+ ret = -1;
+
+done:
+ if (mnt_call)
+ free(mnt_call);
+
+ return ret;
+}
+
+static int mount_v2(const char *path,
+ struct nfs_mount_data *data, struct client *clnt)
+{
+ int ret = mount_call(MNTPROC_MNT, NFS_MNT_VERSION, path, clnt);
+
+ if (ret == 0) {
+ dump_fh((const char *)&mnt_reply.fh, NFS2_FHSIZE);
+
+ data->root.size = NFS_FHSIZE;
+ memcpy(data->root.data, &mnt_reply.fh, NFS_FHSIZE);
+ memcpy(data->old_root.data, &mnt_reply.fh, NFS_FHSIZE);
+ }
+
+ return ret;
+}
+
+static inline int umount_v2(const char *path, struct client *clnt)
+{
+ return mount_call(MNTPROC_UMNT, NFS_MNT_VERSION, path, clnt);
+}
+
+static int mount_v3(const char *path,
+ struct nfs_mount_data *data, struct client *clnt)
+{
+ int ret = mount_call(MNTPROC_MNT, NFS_MNT3_VERSION, path, clnt);
+
+ if (ret == 0) {
+ size_t fhsize = ntohl(mnt_reply.fh.size);
+
+ dump_fh((const char *)&mnt_reply.fh.data, fhsize);
+
+ memset(data->old_root.data, 0, NFS_FHSIZE);
+ memset(&data->root, 0, sizeof(data->root));
+ data->root.size = fhsize;
+ memcpy(&data->root.data, mnt_reply.fh.data, fhsize);
+ data->flags |= NFS_MOUNT_VER3;
+ }
+
+ return ret;
+}
+
+static inline int umount_v3(const char *path, struct client *clnt)
+{
+ return mount_call(MNTPROC_UMNT, NFS_MNT3_VERSION, path, clnt);
+}
+
+int nfs_mount(const char *pathname, const char *hostname,
+ uint32_t server, const char *rem_path, const char *path,
+ struct nfs_mount_data *data)
+{
+ struct client *clnt = NULL;
+ struct sockaddr_in addr;
+ char mounted = 0;
+ int sock = -1;
+ int ret = 0;
+ int mountflags;
+
+ if (get_ports(server, data) != 0)
+ goto bail;
+
+ dump_params(server, rem_path, data);
+
+ if (data->flags & NFS_MOUNT_TCP)
+ clnt = tcp_client(server, mount_port, CLI_RESVPORT);
+ else
+ clnt = udp_client(server, mount_port, CLI_RESVPORT);
+
+ if (clnt == NULL)
+ goto bail;
+
+ if (data->flags & NFS_MOUNT_VER3)
+ ret = mount_v3(rem_path, data, clnt);
+ else
+ ret = mount_v2(rem_path, data, clnt);
+
+ if (ret == -1)
+ goto bail;
+ mounted = 1;
+
+ if (data->flags & NFS_MOUNT_TCP)
+ sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
+ else
+ sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+
+ if (sock == -1) {
+ perror("socket");
+ goto bail;
+ }
+
+ if (bindresvport(sock, 0) == -1) {
+ perror("bindresvport");
+ goto bail;
+ }
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = server;
+ addr.sin_port = htons(nfs_port);
+ memcpy(&data->addr, &addr, sizeof(data->addr));
+
+ strncpy(data->hostname, hostname, sizeof(data->hostname));
+
+ data->fd = sock;
+
+ mountflags = (data->flags & NFS_MOUNT_KLIBC_RONLY) ? MS_RDONLY : 0;
+ data->flags = data->flags & NFS_MOUNT_FLAGMASK;
+ ret = mount(pathname, path, "nfs", mountflags, data);
+
+ if (ret == -1) {
+ if (errno == ENODEV) {
+ fprintf(stderr, "mount: the kernel lacks NFS v%d "
+ "support\n",
+ (data->flags & NFS_MOUNT_VER3) ? 3 : 2);
+ } else {
+ perror("mount");
+ }
+ goto bail;
+ }
+
+ dprintf("Mounted %s on %s\n", pathname, path);
+
+ goto done;
+
+bail:
+ if (mounted) {
+ if (data->flags & NFS_MOUNT_VER3)
+ umount_v3(rem_path, clnt);
+ else
+ umount_v2(rem_path, clnt);
+ }
+
+ ret = -1;
+
+done:
+ if (clnt)
+ client_free(clnt);
+
+ if (sock != -1)
+ close(sock);
+
+ return ret;
+}
diff --git a/usr/kinit/nfsmount/nfsmount.h b/usr/kinit/nfsmount/nfsmount.h
new file mode 100644
index 0000000..7b28ded
--- /dev/null
+++ b/usr/kinit/nfsmount/nfsmount.h
@@ -0,0 +1,34 @@
+#ifndef NFSMOUNT_NFSMOUNT_H
+#define NFSMOUNT_NFSMOUNT_H
+
+#include <linux/nfs_mount.h>
+
+extern int nfs_port;
+
+extern int nfsmount_main(int argc, char *argv[]);
+int nfs_mount(const char *rem_name, const char *hostname,
+ uint32_t server, const char *rem_path,
+ const char *path, struct nfs_mount_data *data);
+
+enum nfs_proto {
+ v2 = 2,
+ v3,
+};
+
+/* masked with NFS_MOUNT_FLAGMASK before mount() call */
+#define NFS_MOUNT_KLIBC_RONLY 0x00010000U
+
+#ifdef DEBUG
+# define dprintf printf
+#else
+# define dprintf(...) ((void)0)
+#endif
+
+#ifndef MNTPROC_MNT
+#define MNTPROC_MNT 1
+#endif
+#ifndef MNTPROC_UMNT
+#define MNTPROC_UMNT 3
+#endif
+
+#endif /* NFSMOUNT_NFSMOUNT_H */
diff --git a/usr/kinit/nfsmount/portmap.c b/usr/kinit/nfsmount/portmap.c
new file mode 100644
index 0000000..0a3e2d0
--- /dev/null
+++ b/usr/kinit/nfsmount/portmap.c
@@ -0,0 +1,73 @@
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <asm/byteorder.h> /* __constant_hton* */
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "nfsmount.h"
+#include "sunrpc.h"
+
+struct portmap_call {
+ struct rpc_call rpc;
+ uint32_t program;
+ uint32_t version;
+ uint32_t proto;
+ uint32_t port;
+};
+
+struct portmap_reply {
+ struct rpc_reply rpc;
+ uint32_t port;
+};
+
+static struct portmap_call call = {
+ .rpc = {
+ .program = __constant_htonl(RPC_PMAP_PROGRAM),
+ .prog_vers = __constant_htonl(RPC_PMAP_VERSION),
+ .proc = __constant_htonl(PMAP_PROC_GETPORT),
+ }
+};
+
+uint32_t portmap(uint32_t server, uint32_t program, uint32_t version, uint32_t proto)
+{
+ struct portmap_reply reply;
+ struct client *clnt;
+ struct rpc rpc;
+ uint32_t port = 0;
+
+ clnt = tcp_client(server, RPC_PMAP_PORT, 0);
+ if (clnt == NULL) {
+ clnt = udp_client(server, RPC_PMAP_PORT, 0);
+ if (clnt == NULL)
+ goto bail;
+ }
+
+ call.program = htonl(program);
+ call.version = htonl(version);
+ call.proto = htonl(proto);
+
+ rpc.call = (struct rpc_call *)&call;
+ rpc.call_len = sizeof(call);
+ rpc.reply = (struct rpc_reply *)&reply;
+ rpc.reply_len = sizeof(reply);
+
+ if (rpc_call(clnt, &rpc) < 0)
+ goto bail;
+
+ if (rpc.reply_len < sizeof(reply)) {
+ fprintf(stderr, "incomplete reply: %zu < %zu\n",
+ rpc.reply_len, sizeof(reply));
+ goto bail;
+ }
+
+ port = ntohl(reply.port);
+
+bail:
+ dprintf("Port for %d/%d[%s]: %d\n", program, version,
+ proto == IPPROTO_TCP ? "tcp" : "udp", port);
+
+ if (clnt)
+ client_free(clnt);
+
+ return port;
+}
diff --git a/usr/kinit/nfsmount/sunrpc.c b/usr/kinit/nfsmount/sunrpc.c
new file mode 100644
index 0000000..0a7fcf5
--- /dev/null
+++ b/usr/kinit/nfsmount/sunrpc.c
@@ -0,0 +1,252 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <poll.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "nfsmount.h"
+#include "sunrpc.h"
+
+/*
+ * The magic offset is needed here because RPC over TCP includes a
+ * field that RPC over UDP doesn't. Luvverly.
+ */
+static int rpc_do_reply(struct client *clnt, struct rpc *rpc, size_t off)
+{
+ int ret;
+
+ if ((ret = read(clnt->sock,
+ ((char *)rpc->reply) + off,
+ rpc->reply_len - off)) == -1) {
+ perror("read");
+ goto bail;
+ } else if (ret < sizeof(struct rpc_reply) - off) {
+ fprintf(stderr, "short read: %d < %zu\n", ret,
+ sizeof(struct rpc_reply) - off);
+ goto bail;
+ }
+ rpc->reply_len = ret + off;
+
+ if ((!off && !(ntohl(rpc->reply->hdr.frag_hdr) & LAST_FRAG)) ||
+ rpc->reply->hdr.udp.xid != rpc->call->hdr.udp.xid ||
+ rpc->reply->hdr.udp.msg_type != htonl(RPC_REPLY)) {
+ fprintf(stderr, "bad reply\n");
+ goto bail;
+ }
+
+ if (ntohl(rpc->reply->state) != REPLY_OK) {
+ fprintf(stderr, "rpc failed: %d\n", ntohl(rpc->reply->state));
+ goto bail;
+ }
+
+ ret = 0;
+ goto done;
+
+bail:
+ ret = -1;
+done:
+ return ret;
+}
+
+static void rpc_header(struct client *clnt, struct rpc *rpc)
+{
+ (void)clnt;
+
+ rpc->call->hdr.frag_hdr = htonl(LAST_FRAG | (rpc->call_len - 4));
+ rpc->call->hdr.udp.xid = lrand48();
+ rpc->call->hdr.udp.msg_type = htonl(RPC_CALL);
+ rpc->call->rpc_vers = htonl(2);
+}
+
+static int rpc_call_tcp(struct client *clnt, struct rpc *rpc)
+{
+ int ret;
+
+ rpc_header(clnt, rpc);
+
+ if ((ret = write(clnt->sock, rpc->call, rpc->call_len)) == -1) {
+ perror("write");
+ goto bail;
+ } else if (ret < rpc->call_len) {
+ fprintf(stderr, "short write: %d < %zu\n", ret, rpc->call_len);
+ goto bail;
+ }
+
+ ret = rpc_do_reply(clnt, rpc, 0);
+ goto done;
+
+ bail:
+ ret = -1;
+
+ done:
+ return ret;
+}
+
+static int rpc_call_udp(struct client *clnt, struct rpc *rpc)
+{
+#define NR_FDS 1
+#define TIMEOUT_MS 3000
+#define MAX_TRIES 100
+#define UDP_HDR_OFF (sizeof(struct rpc_header) - sizeof(struct rpc_udp_header))
+ struct pollfd fds[NR_FDS];
+ int ret = -1;
+ int i;
+
+ rpc_header(clnt, rpc);
+
+ fds[0].fd = clnt->sock;
+ fds[0].events = POLLRDNORM;
+
+ rpc->call_len -= UDP_HDR_OFF;
+
+ for (i = 0; i < MAX_TRIES; i++) {
+ int timeout_ms = TIMEOUT_MS + (lrand48() % (TIMEOUT_MS / 2));
+ if ((ret = write(clnt->sock,
+ ((char *)rpc->call) + UDP_HDR_OFF,
+ rpc->call_len)) == -1) {
+ perror("write");
+ goto bail;
+ } else if (ret < rpc->call_len) {
+ fprintf(stderr, "short write: %d < %zu\n", ret,
+ rpc->call_len);
+ goto bail;
+ }
+ for (; i < MAX_TRIES; i++) {
+ if ((ret = poll(fds, NR_FDS, timeout_ms)) == -1) {
+ perror("poll");
+ goto bail;
+ }
+ if (ret == 0) {
+ dprintf("Timeout #%d\n", i + 1);
+ break;
+ }
+ if ((ret = rpc_do_reply(clnt, rpc, UDP_HDR_OFF)) == 0) {
+ goto done;
+ } else {
+ dprintf("Failed on try #%d - retrying\n",
+ i + 1);
+ }
+ }
+ }
+
+ bail:
+ ret = -1;
+
+ done:
+ return ret;
+}
+
+struct client *tcp_client(uint32_t server, uint16_t port, uint32_t flags)
+{
+ struct client *clnt = malloc(sizeof(*clnt));
+ struct sockaddr_in addr;
+ int sock;
+
+ if (clnt == NULL) {
+ perror("malloc");
+ goto bail;
+ }
+
+ memset(clnt, 0, sizeof(*clnt));
+
+ if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) {
+ perror("socket");
+ goto bail;
+ }
+
+ if ((flags & CLI_RESVPORT) && bindresvport(sock, 0) == -1) {
+ perror("bindresvport");
+ goto bail;
+ }
+
+ clnt->sock = sock;
+ clnt->call_stub = rpc_call_tcp;
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(port);
+ addr.sin_addr.s_addr = server;
+
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ perror("connect");
+ goto bail;
+ }
+
+ goto done;
+ bail:
+ if (clnt) {
+ free(clnt);
+ clnt = NULL;
+ }
+ done:
+ return clnt;
+}
+
+struct client *udp_client(uint32_t server, uint16_t port, uint32_t flags)
+{
+ struct client *clnt = malloc(sizeof(*clnt));
+ struct sockaddr_in addr;
+ int sock;
+
+ if (clnt == NULL) {
+ perror("malloc");
+ goto bail;
+ }
+
+ memset(clnt, 0, sizeof(*clnt));
+
+ if ((sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
+ perror("socket");
+ goto bail;
+ }
+
+ if ((flags & CLI_RESVPORT) && bindresvport(sock, 0) == -1) {
+ perror("bindresvport");
+ goto bail;
+ } else {
+ struct sockaddr_in me;
+
+ me.sin_family = AF_INET;
+ me.sin_port = 0;
+ me.sin_addr.s_addr = INADDR_ANY;
+
+ if (0 && bind(sock, (struct sockaddr *)&me, sizeof(me)) == -1) {
+ perror("bind");
+ goto bail;
+ }
+ }
+
+ clnt->sock = sock;
+ clnt->call_stub = rpc_call_udp;
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(port);
+ addr.sin_addr.s_addr = server;
+
+ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ perror("connect");
+ goto bail;
+ }
+
+ goto done;
+ bail:
+ if (clnt) {
+ free(clnt);
+ clnt = NULL;
+ }
+ done:
+ return clnt;
+}
+
+void client_free(struct client *c)
+{
+ if (c->sock != -1)
+ close(c->sock);
+ free(c);
+}
+
+int rpc_call(struct client *client, struct rpc *rpc)
+{
+ return client->call_stub(client, rpc);
+}
diff --git a/usr/kinit/nfsmount/sunrpc.h b/usr/kinit/nfsmount/sunrpc.h
new file mode 100644
index 0000000..1bcfeea
--- /dev/null
+++ b/usr/kinit/nfsmount/sunrpc.h
@@ -0,0 +1,110 @@
+/*
+ * open-coded SunRPC structures
+ */
+#ifndef NFSMOUNT_SUNRPC_H
+#define NFSMOUNT_SUNRPC_H
+
+#include <sys/types.h>
+#include <inttypes.h>
+
+#define SUNRPC_PORT 111
+#define MOUNT_PORT 627
+
+#define RPC_CALL 0
+#define RPC_REPLY 1
+
+#define PORTMAP_PROGRAM 100000
+#define NLM_PROGRAM 100021
+
+#define RPC_PMAP_PROGRAM 100000
+#define RPC_PMAP_VERSION 2
+#define RPC_PMAP_PORT 111
+
+#define PMAP_PROC_NULL 0
+#define PMAP_PROC_SET 1
+#define PMAP_PROC_UNSET 2
+#define PMAP_PROC_GETPORT 3
+#define PMAP_PROC_DUMP 4
+
+#define LAST_FRAG 0x80000000
+
+#define REPLY_OK 0
+#define REPLY_DENIED 1
+
+#define SUCCESS 0
+#define PROG_UNAVAIL 1
+#define PROG_MISMATCH 2
+#define PROC_UNAVAIL 3
+#define GARBAGE_ARGS 4
+#define SYSTEM_ERR 5
+
+enum {
+ AUTH_NULL,
+ AUTH_UNIX,
+};
+
+struct rpc_auth {
+ uint32_t flavor;
+ uint32_t len;
+ uint32_t body[];
+};
+
+struct rpc_udp_header {
+ uint32_t xid;
+ uint32_t msg_type;
+};
+
+struct rpc_header {
+ uint32_t frag_hdr;
+ struct rpc_udp_header udp;
+};
+
+struct rpc_call {
+ struct rpc_header hdr;
+ uint32_t rpc_vers;
+
+ uint32_t program;
+ uint32_t prog_vers;
+ uint32_t proc;
+ uint32_t cred_flavor;
+
+ uint32_t cred_len;
+ uint32_t vrf_flavor;
+ uint32_t vrf_len;
+};
+
+struct rpc_reply {
+ struct rpc_header hdr;
+ uint32_t reply_state;
+ uint32_t vrf_flavor;
+ uint32_t vrf_len;
+ uint32_t state;
+};
+
+struct rpc {
+ struct rpc_call *call;
+ size_t call_len;
+ struct rpc_reply *reply;
+ size_t reply_len;
+};
+
+struct client;
+
+typedef int (*call_stub) (struct client *, struct rpc *);
+
+struct client {
+ int sock;
+ call_stub call_stub;
+};
+
+#define CLI_RESVPORT 00000001
+
+struct client *tcp_client(uint32_t server, uint16_t port, uint32_t flags);
+struct client *udp_client(uint32_t server, uint16_t port, uint32_t flags);
+void client_free(struct client *client);
+
+int rpc_call(struct client *client, struct rpc *rpc);
+
+uint32_t portmap(uint32_t server, uint32_t program, uint32_t version, uint32_t proto);
+
+#endif /* NFSMOUNT_SUNRPC_H */
diff --git a/usr/kinit/nfsroot.c b/usr/kinit/nfsroot.c
new file mode 100644
index 0000000..3b80773
--- /dev/null
+++ b/usr/kinit/nfsroot.c
@@ -0,0 +1,111 @@
+#include <arpa/inet.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "kinit.h"
+#include "netdev.h"
+#include "nfsmount.h"
+
+static char *sub_client(__u32 client, char *path, size_t len)
+{
+ struct in_addr addr = { client };
+ char buf[len];
+
+ if (strstr(path, "%s") != NULL) {
+ if (client == INADDR_NONE) {
+ fprintf(stderr, "Root-NFS: no client address\n");
+ exit(1);
+ }
+
+ snprintf(buf, len, path, inet_ntoa(addr));
+ strcpy(path, buf);
+ }
+
+ return path;
+}
+
+#define NFS_ARGC 6
+#define MOUNT_POINT "/root"
+
+int mount_nfs_root(int argc, char *argv[], int flags)
+{
+ (void)flags; /* FIXME - don't ignore this */
+
+ struct in_addr addr = { INADDR_NONE };
+ __u32 client = INADDR_NONE;
+ const int len = 1024;
+ struct netdev *dev;
+ char *mtpt = MOUNT_POINT;
+ char *path = NULL;
+ char *dev_bootpath = NULL;
+ char root[len];
+ char *x, *opts;
+ int ret = 0;
+ int a = 1;
+ char *nfs_argv[NFS_ARGC + 1] = { "NFS-Mount" };
+
+ for (dev = ifaces; dev; dev = dev->next) {
+ if (dev->ip_server != INADDR_NONE &&
+ dev->ip_server != INADDR_ANY) {
+ addr.s_addr = dev->ip_server;
+ client = dev->ip_addr;
+ dev_bootpath = dev->bootpath;
+ break;
+ }
+ if (dev->ip_addr != INADDR_NONE && dev->ip_addr != INADDR_ANY)
+ client = dev->ip_addr;
+ }
+
+ /*
+ * if the "nfsroot" option is set then it overrides
+ * bootpath supplied by the boot server.
+ */
+ if ((path = get_arg(argc, argv, "nfsroot=")) == NULL) {
+ if ((path = dev_bootpath) == NULL || path[0] == '\0')
+ /* no path - set a default */
+ path = (char *)"/tftpboot/%s";
+ } else if (dev_bootpath && dev_bootpath[0] != '\0')
+ fprintf(stderr,
+ "nfsroot=%s overrides boot server bootpath %s\n",
+ path, dev_bootpath);
+
+ if ((opts = strchr(path, ',')) != NULL) {
+ *opts++ = '\0';
+ nfs_argv[a++] = (char *)"-o";
+ nfs_argv[a++] = opts;
+ }
+
+ if ((x = strchr(path, ':')) == NULL) {
+ if (addr.s_addr == INADDR_NONE) {
+ fprintf(stderr, "Root-NFS: no server defined\n");
+ exit(1);
+ }
+
+ snprintf(root, len, "%s:%s", inet_ntoa(addr), path);
+ } else {
+ strcpy(root, path);
+ }
+
+ nfs_argv[a++] = sub_client(client, root, len);
+
+ dprintf("NFS-Root: mounting %s on %s with options \"%s\"\n",
+ nfs_argv[a-1], mtpt, opts ? opts : "");
+
+ nfs_argv[a++] = mtpt;
+ nfs_argv[a] = NULL;
+ assert(a <= NFS_ARGC);
+
+ dump_args(a, nfs_argv);
+
+ if ((ret = nfsmount_main(a, nfs_argv)) != 0) {
+ ret = -1;
+ goto done;
+ }
+
+done:
+ return ret;
+}
diff --git a/usr/kinit/ramdisk_load.c b/usr/kinit/ramdisk_load.c
new file mode 100644
index 0000000..e3e15d8
--- /dev/null
+++ b/usr/kinit/ramdisk_load.c
@@ -0,0 +1,281 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+#include <linux/cdrom.h>
+#include <linux/fd.h>
+
+#include "kinit.h"
+#include "do_mounts.h"
+#include "fstype.h"
+#include "zlib.h"
+
+#define BUF_SZ 65536
+
+static void wait_for_key(void)
+{
+ /* Wait until the user presses Enter */
+ while (getchar() != '\n')
+ ;
+}
+
+static int change_disk(const char *devpath, int rfd, int disk)
+{
+ /* Try to eject and/or quiesce the device */
+ sync();
+ if (ioctl(rfd, FDEJECT, 0)) {
+ if (errno == ENOTTY) {
+ /* Not a floppy */
+ ioctl(rfd, CDROMEJECT, 0);
+ } else {
+ /* Non-ejectable floppy */
+ ioctl(rfd, FDRESET, (void *)FD_RESET_IF_NEEDED);
+ }
+ }
+ close(rfd);
+
+ fprintf(stderr,
+ "\nPlease insert disk %d for ramdisk and press Enter...", disk);
+ wait_for_key();
+
+ return open(devpath, O_RDONLY);
+}
+
+#ifdef CONFIG_KLIBC_ZLIB
+/* Also used in initrd.c */
+int load_ramdisk_compressed(const char *devpath, FILE * wfd,
+ off_t ramdisk_start)
+{
+ int rfd = -1;
+ unsigned long long ramdisk_size, ramdisk_left;
+ int disk = 1;
+ ssize_t bytes;
+ int rv;
+ unsigned char in_buf[BUF_SZ], out_buf[BUF_SZ];
+ z_stream zs;
+
+ zs.zalloc = Z_NULL; /* Use malloc() */
+ zs.zfree = Z_NULL; /* Use free() */
+ zs.next_in = Z_NULL; /* No data read yet */
+ zs.avail_in = 0;
+ zs.next_out = out_buf;
+ zs.avail_out = BUF_SZ;
+
+ if (inflateInit2(&zs, 32 + 15) != Z_OK)
+ goto err1;
+
+ rfd = open(devpath, O_RDONLY);
+ if (rfd < 0)
+ goto err2;
+
+ /* Set to the size of the medium, or "infinite" */
+ if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size))
+ ramdisk_size = ~0ULL;
+
+ do {
+ /* Purge the output preferentially over reading new
+ input, so we don't end up overrunning the input by
+ accident and demanding a new disk which doesn't
+ exist... */
+ if (zs.avail_out == 0) {
+ _fwrite(out_buf, BUF_SZ, wfd);
+ zs.next_out = out_buf;
+ zs.avail_out = BUF_SZ;
+ } else if (zs.avail_in == 0) {
+ if (ramdisk_start >= ramdisk_size) {
+ rfd = change_disk(devpath, rfd, ++disk);
+ if (rfd < 0)
+ goto err2;
+
+ if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size))
+ ramdisk_size = ~0ULL;
+ ramdisk_start = 0;
+ dprintf("New size = %llu\n", ramdisk_size);
+ }
+ do {
+ ramdisk_left = ramdisk_size - ramdisk_start;
+ bytes = min(ramdisk_left,
+ (unsigned long long)BUF_SZ);
+ bytes = pread(rfd, in_buf, bytes,
+ ramdisk_start);
+ } while (bytes == -1 && errno == EINTR);
+ if (bytes <= 0)
+ goto err2;
+ ramdisk_start += bytes;
+ zs.next_in = in_buf;
+ zs.avail_in = bytes;
+
+ /* Print dots if we're reading from a real block device */
+ if (ramdisk_size != ~0ULL)
+ putc('.', stderr);
+ }
+ rv = inflate(&zs, Z_SYNC_FLUSH);
+ } while (rv == Z_OK || rv == Z_BUF_ERROR);
+
+ dprintf("kinit: inflate returned %d\n", rv);
+
+ if (rv != Z_STREAM_END)
+ goto err2;
+
+ /* Write the last */
+ _fwrite(out_buf, BUF_SZ - zs.avail_out, wfd);
+ dprintf("kinit: writing %d bytes\n", BUF_SZ - zs.avail_out);
+
+ inflateEnd(&zs);
+ return 0;
+
+err2:
+ inflateEnd(&zs);
+err1:
+ return -1;
+}
+#else
+int load_ramdisk_compressed(const char *devpath, FILE * wfd,
+ off_t ramdisk_start)
+{
+ fprintf(stderr, "Compressed ramdisk not supported\n");
+ return -1;
+}
+#endif
+
+static int
+load_ramdisk_raw(const char *devpath, FILE * wfd, off_t ramdisk_start,
+ unsigned long long fssize)
+{
+ unsigned long long ramdisk_size, ramdisk_left;
+ int disk = 1;
+ ssize_t bytes;
+ unsigned char buf[BUF_SZ];
+ int rfd;
+
+ rfd = open(devpath, O_RDONLY);
+ if (rfd < 0)
+ return -1;
+
+ /* Set to the size of the medium, or "infinite" */
+ if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size))
+ ramdisk_size = ~0ULL;
+
+ dprintf("start: %llu size: %llu fssize: %llu\n",
+ ramdisk_start, ramdisk_size, fssize);
+
+ while (fssize) {
+
+ if (ramdisk_start >= ramdisk_size) {
+ rfd = change_disk(devpath, rfd, ++disk);
+ if (rfd < 0)
+ return -1;
+
+ if (ioctl(rfd, BLKGETSIZE64, &ramdisk_size))
+ ramdisk_size = ~0ULL;
+ ramdisk_start = 0;
+ }
+
+ do {
+ ramdisk_left =
+ min(ramdisk_size - ramdisk_start, fssize);
+ bytes = min(ramdisk_left, (unsigned long long)BUF_SZ);
+ bytes = pread(rfd, buf, bytes, ramdisk_start);
+ } while (bytes == -1 && errno == EINTR);
+ if (bytes <= 0)
+ break;
+ _fwrite(buf, bytes, wfd);
+
+ ramdisk_start += bytes;
+ fssize -= bytes;
+
+ /* Print dots if we're reading from a real block device */
+ if (ramdisk_size != ~0ULL)
+ putc('.', stderr);
+ }
+
+ return !!fssize;
+}
+
+int ramdisk_load(int argc, char *argv[])
+{
+ const char *arg_prompt_ramdisk = get_arg(argc, argv, "prompt_ramdisk=");
+ const char *arg_ramdisk_blocksize =
+ get_arg(argc, argv, "ramdisk_blocksize=");
+ const char *arg_ramdisk_start = get_arg(argc, argv, "ramdisk_start=");
+ const char *arg_ramdisk_device = get_arg(argc, argv, "ramdisk_device=");
+
+ int prompt_ramdisk = arg_prompt_ramdisk ? atoi(arg_prompt_ramdisk) : 0;
+ int ramdisk_blocksize =
+ arg_ramdisk_blocksize ? atoi(arg_ramdisk_blocksize) : 512;
+ off_t ramdisk_start =
+ arg_ramdisk_start
+ ? strtoumax(arg_ramdisk_start, NULL, 10) * ramdisk_blocksize : 0;
+ const char *ramdisk_device =
+ arg_ramdisk_device ? arg_ramdisk_device : "/dev/fd0";
+
+ dev_t ramdisk_dev;
+ int rfd;
+ FILE *wfd;
+ const char *fstype;
+ unsigned long long fssize;
+ int is_gzip = 0;
+ int err;
+
+ if (prompt_ramdisk) {
+ fprintf(stderr,
+ "Please insert disk for ramdisk and press Enter...");
+ wait_for_key();
+ }
+
+ ramdisk_dev = name_to_dev_t(ramdisk_device);
+ if (!ramdisk_dev) {
+ fprintf(stderr,
+ "Failure loading ramdisk: unknown device: %s\n",
+ ramdisk_device);
+ return 0;
+ }
+
+ create_dev("/dev/rddev", ramdisk_dev);
+ create_dev("/dev/ram0", Root_RAM0);
+ rfd = open("/dev/rddev", O_RDONLY);
+ wfd = fopen("/dev/ram0", "w");
+
+ if (rfd < 0 || !wfd) {
+ perror("Could not open ramdisk device");
+ return 0;
+ }
+
+ /* Check filesystem type */
+ if (identify_fs(rfd, &fstype, &fssize, ramdisk_start) ||
+ (fssize == 0 && !(is_gzip = !strcmp(fstype, "gzip")))) {
+ fprintf(stderr,
+ "Failure loading ramdisk: unknown filesystem type\n");
+ close(rfd);
+ fclose(wfd);
+ return 0;
+ }
+
+ dprintf("kinit: ramdisk is %s, size %llu\n", fstype, fssize);
+
+ fprintf(stderr, "Loading ramdisk (%s) ...", is_gzip ? "gzip" : "raw");
+
+ close(rfd);
+
+ if (is_gzip)
+ err = load_ramdisk_compressed("/dev/rddev", wfd, ramdisk_start);
+ else
+ err = load_ramdisk_raw("/dev/rddev", wfd,
+ ramdisk_start, fssize);
+
+ fclose(wfd);
+
+ putc('\n', stderr);
+
+ if (err) {
+ perror("Failure loading ramdisk");
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/usr/kinit/readfile.c b/usr/kinit/readfile.c
new file mode 100644
index 0000000..7a16b4a
--- /dev/null
+++ b/usr/kinit/readfile.c
@@ -0,0 +1,86 @@
+/*
+ * Read the entire contents of a file into malloc'd storage. This
+ * is mostly useful for things like /proc files where we can't just
+ * fstat() to get the length and then mmap().
+ *
+ * Returns the number of bytes read, or -1 on error.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+#include "kinit.h"
+
+ssize_t freadfile(FILE *f, char **pp)
+{
+ size_t bs; /* Decent starting point... */
+ size_t bf; /* Bytes free */
+ size_t bu = 0; /* Bytes used */
+ char *buffer, *nb;
+ size_t rv;
+ int old_errno = errno;
+
+ bs = BUFSIZ; /* A guess as good as any */
+ bf = bs;
+ buffer = malloc(bs);
+
+ if (!buffer)
+ return -1;
+
+ for (;;) {
+ errno = 0;
+
+ while (bf && (rv = _fread(buffer + bu, bf, f))) {
+ bu += rv;
+ bf -= rv;
+ }
+
+ if (errno && errno != EINTR && errno != EAGAIN) {
+ /* error */
+ free(buffer);
+ return -1;
+ }
+
+ if (bf) {
+ /* Hit EOF, no error */
+
+ /* Try to free superfluous memory */
+ if ((nb = realloc(buffer, bu + 1)))
+ buffer = nb;
+
+ /* Null-terminate result for good measure */
+ buffer[bu] = '\0';
+
+ *pp = buffer;
+ errno = old_errno;
+ return bu;
+ }
+
+ /* Double the size of the buffer */
+ bf += bs;
+ bs += bs;
+ if (!(nb = realloc(buffer, bs))) {
+ /* out of memory error */
+ free(buffer);
+ return -1;
+ }
+ buffer = nb;
+ }
+}
+
+ssize_t readfile(const char *filename, char **pp)
+{
+ FILE *f = fopen(filename, "r");
+ ssize_t rv;
+
+ if (!f)
+ return -1;
+
+ rv = freadfile(f, pp);
+
+ fclose(f);
+
+ return rv;
+}
diff --git a/usr/kinit/resume/Kbuild b/usr/kinit/resume/Kbuild
new file mode 100644
index 0000000..c804a85
--- /dev/null
+++ b/usr/kinit/resume/Kbuild
@@ -0,0 +1,34 @@
+#
+# Kbuild file for resume
+#
+
+static-y := static/resume
+shared-y := shared/resume
+
+# common .o files
+objs := resume.o resumelib.o
+
+# TODO - do we want a stripped version
+# TODO - do we want the static.g + shared.g directories?
+
+# Create lib.a with all object files (used by kinit)
+lib-y := $(objs)
+
+# Additional include paths files
+KLIBCCFLAGS += -I$(srctree)/$(src)/..
+
+# .o files used to built executables
+static/resume-y := $(objs)
+static/resume-lib := ../lib.a
+shared/resume-y := $(objs)
+shared/resume-lib := ../lib.a
+
+# Cleaning
+clean-dirs := static shared
+
+# install binary
+ifdef KLIBCSHAREDFLAGS
+install-y := $(shared-y)
+else
+install-y := $(static-y)
+endif
diff --git a/usr/kinit/resume/resume.c b/usr/kinit/resume/resume.c
new file mode 100644
index 0000000..2138078
--- /dev/null
+++ b/usr/kinit/resume/resume.c
@@ -0,0 +1,25 @@
+/*
+ * Handle resume from suspend-to-disk
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "resume.h"
+
+char *progname;
+
+static __noreturn usage(void)
+{
+ fprintf(stderr, "Usage: %s /dev/<resumedevice> [offset]\n", progname);
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ progname = argv[0];
+ if (argc < 2 || argc > 3)
+ usage();
+
+ return resume(argv[1], (argc > 2) ? strtoull(argv[2], NULL, 0) : 0ULL);
+}
diff --git a/usr/kinit/resume/resume.h b/usr/kinit/resume/resume.h
new file mode 100644
index 0000000..5fb929f
--- /dev/null
+++ b/usr/kinit/resume/resume.h
@@ -0,0 +1,7 @@
+#ifndef RESUME_H
+#define RESUME_H
+
+int do_resume(int argc, char *argv[]);
+int resume(const char *resume_file, unsigned long long resume_offset);
+
+#endif /* RESUME_H */
diff --git a/usr/kinit/resume/resumelib.c b/usr/kinit/resume/resumelib.c
new file mode 100644
index 0000000..03e596a
--- /dev/null
+++ b/usr/kinit/resume/resumelib.c
@@ -0,0 +1,106 @@
+/*
+ * Handle resume from suspend-to-disk
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+
+#include "kinit.h"
+#include "do_mounts.h"
+#include "resume.h"
+
+#ifndef CONFIG_PM_STD_PARTITION
+# define CONFIG_PM_STD_PARTITION ""
+#endif
+
+int do_resume(int argc, char *argv[])
+{
+ const char *resume_file = CONFIG_PM_STD_PARTITION;
+ const char *resume_arg;
+ unsigned long long resume_offset;
+
+ resume_arg = get_arg(argc, argv, "resume=");
+ resume_file = resume_arg ? resume_arg : resume_file;
+ /* No resume device specified */
+ if (!resume_file[0])
+ return 0;
+
+ resume_arg = get_arg(argc, argv, "resume_offset=");
+ resume_offset = resume_arg ? strtoull(resume_arg, NULL, 0) : 0ULL;
+
+ /* Fix: we either should consider reverting the device back to
+ ordinary swap, or (better) put that code into swapon */
+ /* Noresume requested */
+ if (get_flag(argc, argv, "noresume"))
+ return 0;
+ return resume(resume_file, resume_offset);
+}
+
+int resume(const char *resume_file, unsigned long long resume_offset)
+{
+ dev_t resume_device;
+ int attr_fd = -1;
+ char attr_value[64];
+ int len;
+
+ resume_device = name_to_dev_t(resume_file);
+
+ if (major(resume_device) == 0) {
+ fprintf(stderr, "Invalid resume device: %s\n", resume_file);
+ goto failure;
+ }
+
+ if ((attr_fd = open("/sys/power/resume_offset", O_WRONLY)) < 0)
+ goto fail_offset;
+
+ len = snprintf(attr_value, sizeof attr_value,
+ "%llu",
+ resume_offset);
+
+ /* This should never happen */
+ if (len >= sizeof attr_value)
+ goto fail_offset;
+
+ if (write(attr_fd, attr_value, len) != len)
+ goto fail_offset;
+
+ close(attr_fd);
+
+ if ((attr_fd = open("/sys/power/resume", O_WRONLY)) < 0)
+ goto fail_r;
+
+ len = snprintf(attr_value, sizeof attr_value,
+ "%u:%u",
+ major(resume_device), minor(resume_device));
+
+ /* This should never happen */
+ if (len >= sizeof attr_value)
+ goto fail_r;
+
+ dprintf("kinit: trying to resume from %s\n", resume_file);
+
+ if (write(attr_fd, attr_value, len) != len)
+ goto fail_r;
+
+ /* Okay, what are we still doing alive... */
+failure:
+ if (attr_fd >= 0)
+ close(attr_fd);
+ dprintf("kinit: No resume image, doing normal boot...\n");
+ return -1;
+
+fail_offset:
+ fprintf(stderr, "Cannot write /sys/power/resume_offset "
+ "(no software suspend kernel support, or old kernel version?)\n");
+ goto failure;
+
+fail_r:
+ fprintf(stderr, "Cannot write /sys/power/resume "
+ "(no software suspend kernel support?)\n");
+ goto failure;
+}
diff --git a/usr/kinit/run-init/Kbuild b/usr/kinit/run-init/Kbuild
new file mode 100644
index 0000000..eeff906
--- /dev/null
+++ b/usr/kinit/run-init/Kbuild
@@ -0,0 +1,38 @@
+#
+# Kbuild file for run-init
+#
+
+static-y := static/run-init
+shared-y := shared/run-init
+
+# common .o files
+objs := run-init.o runinitlib.o
+
+# TODO - do we want a stripped version
+# TODO - do we want the static.g + shared.g directories?
+
+# Create built-in.o with all object files (used by kinit)
+lib-y := $(objs)
+
+# force run-init to not have an executable stack (to keep READ_IMPLIES_EXEC
+# personality(2) flag from getting set and passed to init).
+EXTRA_KLIBCLDFLAGS += -z noexecstack
+
+# Additional include paths files
+KLIBCCFLAGS += -I$(srctree)/$(src)/..
+
+# .o files used to built executables
+static/run-init-y := $(objs)
+static/run-init-lib := ../lib.a
+shared/run-init-y := $(objs)
+shared/run-init-lib := ../lib.a
+
+# Cleaning
+clean-dirs := static shared
+
+# install binary
+ifdef KLIBCSHAREDFLAGS
+install-y := $(shared-y)
+else
+install-y := $(static-y)
+endif
diff --git a/usr/kinit/run-init/run-init.c b/usr/kinit/run-init/run-init.c
new file mode 100644
index 0000000..6a4ad3e
--- /dev/null
+++ b/usr/kinit/run-init/run-init.c
@@ -0,0 +1,114 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall
+ * be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Usage: exec run-init [-d caps] [-c /dev/console] [-n] [-p] /real-root /sbin/init "$@"
+ *
+ * This program should be called as the last thing in a shell script
+ * acting as /init in an initramfs; it does the following:
+ *
+ * 1. Delete all files in the initramfs;
+ * 2. Remounts /real-root onto the root filesystem;
+ * 3. Drops comma-separated list of capabilities;
+ * 4. Chroots;
+ * 5. Opens /dev/console;
+ * 6. Spawns the specified init program (with arguments.)
+ *
+ * With the -p option, it skips step 1 in order to allow the initramfs to
+ * be persisted into the running system.
+ *
+ * With the -n option, it skips steps 1, 2 and 6 and can be used to check
+ * whether the given root and init are likely to work.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include "run-init.h"
+
+static const char *program;
+
+static void __attribute__ ((noreturn)) usage(void)
+{
+ fprintf(stderr,
+ "Usage: exec %s [-d caps] [-c consoledev] [-n] [-p] /real-root /sbin/init [args]\n",
+ program);
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ /* Command-line options and defaults */
+ const char *console = "/dev/console";
+ const char *realroot;
+ const char *init;
+ const char *error;
+ const char *drop_caps = NULL;
+ bool dry_run = false;
+ bool persist_initramfs = false;
+ char **initargs;
+
+ /* Variables... */
+ int o;
+
+ /* Parse the command line */
+ program = argv[0];
+
+ while ((o = getopt(argc, argv, "c:d:pn")) != -1) {
+ if (o == 'c') {
+ console = optarg;
+ } else if (o == 'd') {
+ drop_caps = optarg;
+ } else if (o == 'n') {
+ dry_run = true;
+ } else if (o == 'p') {
+ persist_initramfs = true;
+ } else {
+ usage();
+ }
+ }
+
+ if (argc - optind < 2)
+ usage();
+
+ realroot = argv[optind];
+ init = argv[optind + 1];
+ initargs = argv + optind + 1;
+
+ error = run_init(realroot, console, drop_caps, dry_run, persist_initramfs, init, initargs);
+
+ if (error) {
+ fprintf(stderr, "%s: %s: %s\n", program, error, strerror(errno));
+ return 1;
+ } else {
+ /* Must have been a dry run */
+ return 0;
+ }
+}
diff --git a/usr/kinit/run-init/run-init.h b/usr/kinit/run-init/run-init.h
new file mode 100644
index 0000000..5240ce7
--- /dev/null
+++ b/usr/kinit/run-init/run-init.h
@@ -0,0 +1,38 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall
+ * be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ----------------------------------------------------------------------- */
+
+#ifndef RUN_INIT_H
+#define RUN_INIT_H
+
+#include <stdbool.h>
+
+const char *run_init(const char *realroot, const char *console,
+ const char *drop_caps, bool dry_run,
+ bool persist_initramfs, const char *init,
+ char **initargs);
+
+#endif
diff --git a/usr/kinit/run-init/runinitlib.c b/usr/kinit/run-init/runinitlib.c
new file mode 100644
index 0000000..1c2e56a
--- /dev/null
+++ b/usr/kinit/run-init/runinitlib.c
@@ -0,0 +1,232 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2004-2006 H. Peter Anvin - All Rights Reserved
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall
+ * be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * run_init(realroot, consoledev, drop_caps, persist_initramfs, init, initargs)
+ *
+ * This function should be called as the last thing in kinit,
+ * from initramfs, it does the following:
+ *
+ * - Delete all files in the initramfs;
+ * - Remounts /real-root onto the root filesystem;
+ * - Chroots;
+ * - Drops comma-separated list of capabilities;
+ * - Opens /dev/console;
+ * - Spawns the specified init program (with arguments.)
+ *
+ * On failure, returns a human-readable error message.
+ */
+
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include "run-init.h"
+#include "capabilities.h"
+
+/* Make it possible to compile on glibc by including constants that the
+ always-behind shipped glibc headers may not include. Classic example
+ on why the lack of ABI headers screw us up. */
+#ifndef TMPFS_MAGIC
+# define TMPFS_MAGIC 0x01021994
+#endif
+#ifndef RAMFS_MAGIC
+# define RAMFS_MAGIC 0x858458f6
+#endif
+#ifndef MS_MOVE
+# define MS_MOVE 8192
+#endif
+
+static int nuke(const char *what);
+
+static int nuke_dirent(int len, const char *dir, const char *name, dev_t me)
+{
+ int bytes = len + strlen(name) + 2;
+ char path[bytes];
+ int xlen;
+ struct stat st;
+
+ xlen = snprintf(path, bytes, "%s/%s", dir, name);
+ assert(xlen < bytes);
+
+ if (lstat(path, &st))
+ return ENOENT; /* Return 0 since already gone? */
+
+ if (st.st_dev != me)
+ return 0; /* DO NOT recurse down mount points!!!!! */
+
+ return nuke(path);
+}
+
+/* Wipe the contents of a directory, but not the directory itself */
+static int nuke_dir(const char *what)
+{
+ int len = strlen(what);
+ DIR *dir;
+ struct dirent *d;
+ int err = 0;
+ struct stat st;
+
+ if (lstat(what, &st))
+ return errno;
+
+ if (!S_ISDIR(st.st_mode))
+ return ENOTDIR;
+
+ if (!(dir = opendir(what))) {
+ /* EACCES means we can't read it. Might be empty and removable;
+ if not, the rmdir() in nuke() will trigger an error. */
+ return (errno == EACCES) ? 0 : errno;
+ }
+
+ while ((d = readdir(dir))) {
+ /* Skip . and .. */
+ if (d->d_name[0] == '.' &&
+ (d->d_name[1] == '\0' ||
+ (d->d_name[1] == '.' && d->d_name[2] == '\0')))
+ continue;
+
+ err = nuke_dirent(len, what, d->d_name, st.st_dev);
+ if (err) {
+ closedir(dir);
+ return err;
+ }
+ }
+
+ closedir(dir);
+
+ return 0;
+}
+
+static int nuke(const char *what)
+{
+ int rv;
+ int err = 0;
+
+ rv = unlink(what);
+ if (rv < 0) {
+ if (errno == EISDIR) {
+ /* It's a directory. */
+ err = nuke_dir(what);
+ if (!err)
+ err = rmdir(what) ? errno : err;
+ } else {
+ err = errno;
+ }
+ }
+
+ if (err) {
+ errno = err;
+ return err;
+ } else {
+ return 0;
+ }
+}
+
+const char *run_init(const char *realroot, const char *console,
+ const char *drop_caps, bool dry_run,
+ bool persist_initramfs, const char *init, char **initargs)
+{
+ struct stat rst, cst, ist;
+ struct statfs sfs;
+ int confd;
+
+ /* First, change to the new root directory */
+ if (chdir(realroot))
+ return "chdir to new root";
+
+ /* This is a potentially highly destructive program. Take some
+ extra precautions. */
+
+ /* Make sure the current directory is not on the same filesystem
+ as the root directory */
+ if (stat("/", &rst) || stat(".", &cst))
+ return "stat";
+
+ if (rst.st_dev == cst.st_dev)
+ return "current directory on the same filesystem as the root";
+
+ /* Make sure we're on a ramfs */
+ if (statfs("/", &sfs))
+ return "statfs /";
+ if (sfs.f_type != RAMFS_MAGIC && sfs.f_type != TMPFS_MAGIC)
+ return "rootfs not a ramfs or tmpfs";
+
+ /* Okay, I think we should be safe... */
+
+ if (!dry_run) {
+ if (!persist_initramfs) {
+ /* Delete rootfs contents */
+ if (nuke_dir("/"))
+ return "nuking initramfs contents";
+ }
+
+ /* Overmount the root */
+ if (mount(".", "/", NULL, MS_MOVE, NULL))
+ return "overmounting root";
+ }
+
+ /* chroot, chdir */
+ if (chroot(".") || chdir("/"))
+ return "chroot";
+
+ /* Drop capabilities */
+ if (drop_capabilities(drop_caps) < 0)
+ return "dropping capabilities";
+
+ /* Open /dev/console */
+ if ((confd = open(console, O_RDWR)) < 0)
+ return "opening console";
+ if (!dry_run) {
+ dup2(confd, 0);
+ dup2(confd, 1);
+ dup2(confd, 2);
+ }
+ close(confd);
+
+ if (!dry_run) {
+ /* Spawn init */
+ execv(init, initargs);
+ return init; /* Failed to spawn init */
+ } else {
+ if (stat(init, &ist))
+ return init;
+ if (!S_ISREG(ist.st_mode) || !(ist.st_mode & S_IXUGO)) {
+ errno = EACCES;
+ return init;
+ }
+ return NULL; /* Success */
+ }
+}
diff --git a/usr/kinit/xpio.c b/usr/kinit/xpio.c
new file mode 100644
index 0000000..42a9844
--- /dev/null
+++ b/usr/kinit/xpio.c
@@ -0,0 +1,51 @@
+/*
+ * Looping versions of pread() and pwrite()
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "xpio.h"
+
+ssize_t xpread(int fd, void *buf, size_t count, off_t offset)
+{
+ ssize_t ctr = 0;
+ ssize_t rv = 0;
+ char *bp = buf;
+
+ while (count) {
+ rv = pread(fd, bp, count, offset);
+
+ if (rv == 0 || (rv == -1 && errno != EINTR))
+ break;
+
+ bp += rv;
+ count -= rv;
+ offset += rv;
+ ctr += rv;
+ }
+
+ return ctr ? ctr : rv;
+}
+
+ssize_t xpwrite(int fd, void *buf, size_t count, off_t offset)
+{
+ ssize_t ctr = 0;
+ ssize_t rv = 0;
+ char *bp = buf;
+
+ while (count) {
+ rv = pwrite(fd, bp, count, offset);
+
+ if (rv == 0 || (rv == -1 && errno != EINTR))
+ break;
+
+ bp += rv;
+ count -= rv;
+ offset += rv;
+ ctr += rv;
+ }
+
+ return ctr ? ctr : rv;
+}
diff --git a/usr/kinit/xpio.h b/usr/kinit/xpio.h
new file mode 100644
index 0000000..0596a32
--- /dev/null
+++ b/usr/kinit/xpio.h
@@ -0,0 +1,11 @@
+/*
+ * kinit/xpio.h
+ */
+
+#ifndef KINIT_XPIO_H
+#define KINIT_XPIO_H
+
+ssize_t xpread(int fd, void *buf, size_t count, off_t offset);
+ssize_t xpwrite(int fd, void *buf, size_t count, off_t offset);
+
+#endif /* KINIT_XPIO_H */