diff options
Diffstat (limited to 'sourceextraction.c')
-rw-r--r-- | sourceextraction.c | 716 |
1 files changed, 716 insertions, 0 deletions
diff --git a/sourceextraction.c b/sourceextraction.c new file mode 100644 index 0000000..d28458a --- /dev/null +++ b/sourceextraction.c @@ -0,0 +1,716 @@ +/* This file is part of "reprepro" + * Copyright (C) 2008 Bernhard R. Link + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1301 USA + */ +#include <config.h> + +#include <errno.h> +#include <assert.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#ifdef HAVE_LIBARCHIVE +#include <archive.h> +#include <archive_entry.h> +#if ARCHIVE_VERSION_NUMBER < 3000000 +#define archive_read_free archive_read_finish +#endif +#endif + +#include "error.h" +#include "filecntl.h" +#include "chunks.h" +#include "uncompression.h" +#include "sourceextraction.h" + +struct sourceextraction { + bool failed, completed; + int difffile, tarfile, debiantarfile; + enum compression diffcompression, tarcompression, debiancompression; + /*@null@*/ char **section_p, **priority_p; +}; + +struct sourceextraction *sourceextraction_init(char **section_p, char **priority_p) { + struct sourceextraction *n; + + n = zNEW(struct sourceextraction); + if (FAILEDTOALLOC(n)) + return n; + n->difffile = -1; + n->tarfile = -1; + n->debiantarfile = -1; + n->section_p = section_p; + n->priority_p = priority_p; + return n; +} + +void sourceextraction_abort(struct sourceextraction *e) { + free(e); +} + +/* with must be a string constant, no pointer! */ +#define endswith(name, len, with) (len >= sizeof(with) && memcmp(name+(len+1-sizeof(with)), with, sizeof(with)-1) == 0) + +/* register a file part of this source */ +void sourceextraction_setpart(struct sourceextraction *e, int i, const char *basefilename) { + size_t bl = strlen(basefilename); + enum compression c; + + if (e->failed) + return; + + c = compression_by_suffix(basefilename, &bl); + + if (endswith(basefilename, bl, ".dsc")) + return; + else if (endswith(basefilename, bl, ".asc")) + return; + else if (endswith(basefilename, bl, ".diff")) { + e->difffile = i; + e->diffcompression = c; + return; + } else if (endswith(basefilename, bl, ".debian.tar")) { + e->debiantarfile = i; + e->debiancompression = c; + return; + } else if (endswith(basefilename, bl, ".tar")) { + e->tarfile = i; + e->tarcompression = c; + return; + } else { + // TODO: errormessage + e->failed = true; + } +} + +/* return the next needed file */ +bool sourceextraction_needs(struct sourceextraction *e, int *ofs_p) { + if (e->failed || e->completed) + return false; + if (e->difffile >= 0) { + if (!uncompression_supported(e->diffcompression)) + // TODO: errormessage + return false; + *ofs_p = e->difffile; + return true; + } else if (e->debiantarfile >= 0) { +#ifdef HAVE_LIBARCHIVE + if (!uncompression_supported(e->debiancompression)) + return false; + *ofs_p = e->debiantarfile; + return true; +#else + return false; +#endif + } else if (e->tarfile >= 0) { +#ifdef HAVE_LIBARCHIVE + if (!uncompression_supported(e->tarcompression)) + return false; + *ofs_p = e->tarfile; + return true; +#else + return false; +#endif + } else + return false; +} + +static retvalue parsediff(struct compressedfile *f, /*@null@*/char **section_p, /*@null@*/char **priority_p, bool *found_p) { + size_t destlength, lines_in, lines_out; + const char *p, *s; char *garbage; +#define BUFSIZE 4096 + char buffer[BUFSIZE]; + int bytes_read, used = 0, filled = 0; + + auto inline bool u_getline(void); + inline bool u_getline(void) { + do { + if (filled - used > 0) { + char *n; + + p = buffer + used; + n = memchr(p, '\n', filled - used); + if (n != NULL) { + used += 1 + (n - p); + *n = '\0'; + while (--n >= p && *n == '\r') + *n = '\0'; + return true; + } + } else { assert (filled == used); + filled = 0; + used = 0; + } + if (filled == BUFSIZE) { + if (used == 0) + /* overlong line */ + return false; + memmove(buffer, buffer + used, filled - used); + filled -= used; + used = 0; + } + bytes_read = uncompress_read(f, buffer + filled, + BUFSIZE - filled); + if (bytes_read <= 0) + return false; + filled += bytes_read; + } while (true); + } + auto inline char u_overlinegetchar(void); + inline char u_overlinegetchar(void) { + const char *n; + char ch; + + if (filled - used > 0) { + ch = buffer[used]; + } else { assert (filled == used); + used = 0; + bytes_read = uncompress_read(f, buffer, BUFSIZE); + if (bytes_read <= 0) { + filled = 0; + return '\0'; + } + filled = bytes_read; + ch = buffer[0]; + } + if (ch == '\n') + return '\0'; + + /* over rest of the line */ + n = memchr(buffer + used, '\n', filled - used); + if (n != NULL) { + used = 1 + (n - buffer); + return ch; + } + used = 0; + filled = 0; + /* need to read more to get to the end of the line */ + do { /* these lines can be long */ + bytes_read = uncompress_read(f, buffer, BUFSIZE); + if (bytes_read <= 0) + return false; + n = memchr(buffer, '\n', bytes_read); + } while (n == NULL); + used = 1 + (n - buffer); + filled = bytes_read; + return ch; + } + + /* we are assuming the exact format dpkg-source generates here... */ + + if (!u_getline()) { + /* empty or strange file */ + *found_p = false; + return RET_OK; + } + if (memcmp(p, "diff ", 4) == 0) { + /* one exception is allowing diff lines, + * as diff -ru adds them ... */ + if (!u_getline()) { + /* strange file */ + *found_p = false; + return RET_OK; + } + } + if (unlikely(memcmp(p, "--- ", 4) != 0)) + return RET_NOTHING; + if (!u_getline()) + /* so short a file? */ + return RET_NOTHING; + if (unlikely(memcmp(p, "+++ ", 4) != 0)) + return RET_NOTHING; + p += 4; + s = strchr(p, '/'); + if (unlikely(s == NULL)) + return RET_NOTHING; + s++; + /* another exception to allow diff output directly: + * +++ lines might have garbage after a tab... */ + garbage = strchr(s, '\t'); + if (garbage != NULL) + *garbage = '\0'; + destlength = s - p; + /* ignore all files that are not x/debian/control */ + while (strcmp(s, "debian/control") != 0) { + if (unlikely(interrupted())) + return RET_ERROR_INTERRUPTED; + if (!u_getline()) + return RET_NOTHING; + while (memcmp(p, "@@ -", 4) == 0) { + if (unlikely(interrupted())) + return RET_ERROR_INTERRUPTED; + p += 4; + while (*p != ',' && *p != ' ') { + if (unlikely(*p == '\0')) + return RET_NOTHING; + p++; + } + if (*p == ' ') + lines_in = 1; + else { + p++; + lines_in = 0; + while (*p >= '0' && *p <= '9') { + lines_in = 10*lines_in + (*p-'0'); + p++; + } + } + while (*p == ' ') + p++; + if (unlikely(*(p++) != '+')) + return RET_NOTHING; + while (*p >= '0' && *p <= '9') + p++; + if (*p == ',') { + p++; + lines_out = 0; + while (*p >= '0' && *p <= '9') { + lines_out = 10*lines_out + (*p-'0'); + p++; + } + } else if (*p == ' ') + lines_out = 1; + else + return RET_NOTHING; + while (*p == ' ') + p++; + if (unlikely(*p != '@')) + return RET_NOTHING; + + while (lines_in > 0 || lines_out > 0) { + char ch; + + ch = u_overlinegetchar(); + switch (ch) { + case '+': + if (unlikely(lines_out == 0)) + return RET_NOTHING; + lines_out--; + break; + case ' ': + if (unlikely(lines_out == 0)) + return RET_NOTHING; + lines_out--; + /* no break */ + __attribute__ ((fallthrough)); + case '-': + if (unlikely(lines_in == 0)) + return RET_NOTHING; + lines_in--; + break; + default: + return RET_NOTHING; + } + } + if (!u_getline()) { + *found_p = false; + /* nothing found successfully */ + return RET_OK; + } + } + if (memcmp(p, "\\ No newline at end of file", 27) == 0) { + if (!u_getline()) { + /* nothing found successfully */ + *found_p = false; + return RET_OK; + } + } + if (memcmp(p, "diff ", 4) == 0) { + if (!u_getline()) { + /* strange file, but nothing explicitly wrong */ + *found_p = false; + return RET_OK; + } + } + if (unlikely(memcmp(p, "--- ", 4) != 0)) + return RET_NOTHING; + if (!u_getline()) + return RET_NOTHING; + if (unlikely(memcmp(p, "+++ ", 4) != 0)) + return RET_NOTHING; + p += 4; + s = strchr(p, '/'); + if (unlikely(s == NULL)) + return RET_NOTHING; + /* another exception to allow diff output directly: + * +++ lines might have garbage after a tab... */ + garbage = strchr(s, '\t'); + if (garbage != NULL) + *garbage = '\0'; + /* if it does not always have the same directory, then + * we cannot be sure it has no debian/control, so we + * have to fail... */ + s++; + if (s != p + destlength) + return RET_NOTHING; + } + /* found debian/control */ + if (!u_getline()) + return RET_NOTHING; + if (unlikely(memcmp(p, "@@ -", 4) != 0)) + return RET_NOTHING; + p += 4; + p++; + while (*p != ',' && *p != ' ') { + if (unlikely(*p == '\0')) + return RET_NOTHING; + p++; + } + if (*p == ',') { + p++; + while (*p >= '0' && *p <= '9') + p++; + } + while (*p == ' ') + p++; + if (unlikely(*(p++) != '+')) + return RET_NOTHING; + if (*(p++) != '1' || *(p++) != ',') { + /* a diff not starting at the first line (or not being + * more than one line) is not yet supported */ + return RET_NOTHING; + } + lines_out = 0; + while (*p >= '0' && *p <= '9') { + lines_out = 10*lines_out + (*p-'0'); + p++; + } + while (*p == ' ') + p++; + if (unlikely(*p != '@')) + return RET_NOTHING; + while (lines_out > 0) { + if (unlikely(interrupted())) + return RET_ERROR_INTERRUPTED; + if (!u_getline()) + return RET_NOTHING; + + switch (*(p++)) { + case '-': + break; + default: + return RET_NOTHING; + case ' ': + case '+': + if (unlikely(lines_out == 0)) + return RET_NOTHING; + lines_out--; + if (section_p != NULL && + strncasecmp(p, "Section:", 8) == 0) { + p += 8; + while (*p == ' ' || *p == '\t') + p++; + s = p; + while (*s != ' ' && *s != '\t' && + *s != '\0' && *s != '\r') + s++; + if (s == p) + return RET_NOTHING; + *section_p = strndup(p, s-p); + if (FAILEDTOALLOC(*section_p)) + return RET_ERROR_OOM; + while (*s == ' ' || *s == '\t' || + *s == '\r') + s++; + if (*s != '\0') + return RET_NOTHING; + continue; + } + if (priority_p != NULL && + strncasecmp(p, "Priority:", 9) == 0) { + p += 9; + while (*p == ' ' || *p == '\t') + p++; + s = p; + while (*s != ' ' && *s != '\t' && + *s != '\0' && *s != '\r') + s++; + if (s == p) + return RET_NOTHING; + *priority_p = strndup(p, s-p); + if (FAILEDTOALLOC(*priority_p)) + return RET_ERROR_OOM; + while (*s == ' ' || *s == '\t' || + *s == '\r') + s++; + if (*s != '\0') + return RET_NOTHING; + continue; + } + if (*p == '\0') { + /* end of control data, we are + * finished */ + *found_p = true; + return RET_OK; + } + break; + } + } + /* cannot yet handle a .diff not containing the full control */ + return RET_NOTHING; +} + +#ifdef HAVE_LIBARCHIVE +static retvalue read_source_control_file(struct sourceextraction *e, struct archive *tar, struct archive_entry *entry) { + // TODO: implement... + size_t size, len, controllen; + ssize_t got; + char *buffer; + const char *aftercontrol; + + size = archive_entry_size(entry); + if (size <= 0) + return RET_NOTHING; + if (size > 10*1024*1024) + return RET_NOTHING; + buffer = malloc(size+2); + if (FAILEDTOALLOC(buffer)) + return RET_ERROR_OOM; + len = 0; + while ((got = archive_read_data(tar, buffer+len, ((size_t)size+1)-len)) > 0 + && !interrupted()) { + len += got; + if (len > size) { + free(buffer); + return RET_NOTHING; + } + } + if (unlikely(interrupted())) { + free(buffer); + return RET_ERROR_INTERRUPTED; + } + if (got < 0) { + free(buffer); + return RET_NOTHING; + } + buffer[len] = '\0'; + // TODO: allow a saved .diff for this file applied here + + controllen = chunk_extract(buffer, buffer, len, true, &aftercontrol); + if (controllen == 0) { + free(buffer); + return RET_NOTHING; + } + + if (e->section_p != NULL) + (void)chunk_getvalue(buffer, "Section", e->section_p); + if (e->priority_p != NULL) + (void)chunk_getvalue(buffer, "Priority", e->priority_p); + free(buffer); + return RET_OK; +} + +static int compressedfile_open(UNUSED(struct archive *a), UNUSED(void *v)) { + return ARCHIVE_OK; +} + +static int compressedfile_close(UNUSED(struct archive *a), UNUSED(void *v)) { + return ARCHIVE_OK; +} + +static ssize_t compressedfile_read(UNUSED(struct archive *a), void *d, const void **buffer_p) { + struct compressedfile *f = d; + // TODO malloc buffer instead + static char mybuffer[4096]; + + *buffer_p = mybuffer; + return uncompress_read(f, mybuffer, 4096); +} + +static retvalue parse_tarfile(struct sourceextraction *e, const char *filename, enum compression c, /*@out@*/bool *found_p) { + struct archive *tar; + struct archive_entry *entry; + struct compressedfile *file; + int a; + retvalue r, r2; + + /* While an .tar, especially an .orig.tar can be very ugly + * (they should be pristine upstream tars, so dpkg-source works around + * a lot of ugliness), + * we are looking for debian/control. This is unlikely to be in an ugly + * upstream tar verbatimly. */ + + if (!isregularfile(filename)) + return RET_NOTHING; + + tar = archive_read_new(); + if (FAILEDTOALLOC(tar)) + return RET_ERROR_OOM; + archive_read_support_format_tar(tar); + archive_read_support_format_gnutar(tar); + + r = uncompress_open(&file, filename, c); + if (!RET_IS_OK(r)) { + archive_read_free(tar); + return r; + } + + a = archive_read_open(tar, file, compressedfile_open, + compressedfile_read, compressedfile_close); + if (a != ARCHIVE_OK) { + int err = archive_errno(tar); + if (err != -EINVAL && err != 0) + fprintf(stderr, +"Error %d trying to extract control information from %s:\n" "%s\n", + err, filename, archive_error_string(tar)); + else + fprintf(stderr, +"Error trying to extract control information from %s:\n" "%s\n", + filename, archive_error_string(tar)); + archive_read_free(tar); + uncompress_abort(file); + return RET_ERROR; + } + while ((a=archive_read_next_header(tar, &entry)) == ARCHIVE_OK) { + const char *name = archive_entry_pathname(entry); + const char *s; + bool iscontrol; + + if (name[0] == '.' && name[1] == '/') + name += 2; + s = strchr(name, '/'); + if (s == NULL) + // TODO: is this already enough to give up totally? + iscontrol = false; + else + iscontrol = strcmp(s+1, "debian/control") == 0 || + strcmp(name, "debian/control") == 0; + + if (iscontrol) { + r = read_source_control_file(e, tar, entry); + archive_read_free(tar); + r2 = uncompress_error(file); + RET_UPDATE(r, r2); + uncompress_abort(file); + *found_p = true; + return r; + } + a = archive_read_data_skip(tar); + if (a != ARCHIVE_OK) { + int err = archive_errno(tar); + printf("Error %d skipping %s within %s: %s\n", + err, name, filename, + archive_error_string(tar)); + archive_read_free(tar); + if (err == 0 || err == -EINVAL) + r = RET_ERROR; + else + r = RET_ERRNO(err); + r2 = uncompress_error(file); + RET_UPDATE(r, r2); + uncompress_abort(file); + return r; + } + if (interrupted()) + return RET_ERROR_INTERRUPTED; + } + if (a != ARCHIVE_EOF) { + int err = archive_errno(tar); + fprintf(stderr, "Error %d reading %s: %s\n", + err, filename, archive_error_string(tar)); + archive_read_free(tar); + if (err == 0 || err == -EINVAL) + r = RET_ERROR; + else + r = RET_ERRNO(err); + r2 = uncompress_error(file); + RET_UPDATE(r, r2); + uncompress_abort(file); + return r; + } + archive_read_free(tar); + *found_p = false; + return uncompress_close(file); +} +#endif + +/* full file name of requested files ready to analyse */ +retvalue sourceextraction_analyse(struct sourceextraction *e, const char *fullfilename) { + retvalue r; + bool found; + +#ifndef HAVE_LIBARCHIVE + assert (e->difffile >= 0); +#endif + if (e->difffile >= 0) { + struct compressedfile *f; + + assert (uncompression_supported(e->diffcompression)); + e->difffile = -1; + + r = uncompress_open(&f, fullfilename, e->diffcompression); + if (!RET_IS_OK(r)) { + e->failed = true; + /* being unable to read a file is no hard error... */ + return RET_NOTHING; + } + r = parsediff(f, e->section_p, e->priority_p, &found); + if (RET_IS_OK(r)) { + if (!found) + r = uncompress_close(f); + else { + r = uncompress_error(f); + uncompress_abort(f); + } + } else { + uncompress_abort(f); + } + if (!RET_IS_OK(r)) + e->failed = true; + else if (found) + /* do not look in the tar, we found debian/control */ + e->completed = true; + return r; + } + +#ifdef HAVE_LIBARCHIVE + if (e->debiantarfile >= 0) { + e->debiantarfile = -1; + r = parse_tarfile(e, fullfilename, e->debiancompression, + &found); + if (!RET_IS_OK(r)) + e->failed = true; + else if (found) + /* do not look in the tar, we found debian/control */ + e->completed = true; + return r; + } +#endif + + /* if it's not the diff nor the .debian.tar, look into the .tar file: */ + assert (e->tarfile >= 0); + e->tarfile = -1; + +#ifdef HAVE_LIBARCHIVE + r = parse_tarfile(e, fullfilename, e->tarcompression, &found); + if (!RET_IS_OK(r)) + e->failed = true; + else if (found) + /* do not look in the tar, we found debian/control */ + e->completed = true; + return r; +#else + return RET_NOTHING; +#endif +} + +retvalue sourceextraction_finish(struct sourceextraction *e) { + if (e->completed) { + free(e); + return RET_OK; + } + free(e); + return RET_NOTHING; +} |