diff options
Diffstat (limited to 'src/bin/psql/copy.c')
-rw-r--r-- | src/bin/psql/copy.c | 726 |
1 files changed, 726 insertions, 0 deletions
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c new file mode 100644 index 0000000..8d6ce4c --- /dev/null +++ b/src/bin/psql/copy.c @@ -0,0 +1,726 @@ +/* + * psql - the PostgreSQL interactive terminal + * + * Copyright (c) 2000-2023, PostgreSQL Global Development Group + * + * src/bin/psql/copy.c + */ +#include "postgres_fe.h" + +#include <signal.h> +#include <sys/stat.h> +#ifndef WIN32 +#include <unistd.h> /* for isatty */ +#else +#include <io.h> /* I think */ +#endif + +#include "common.h" +#include "common/logging.h" +#include "copy.h" +#include "libpq-fe.h" +#include "pqexpbuffer.h" +#include "prompt.h" +#include "settings.h" +#include "stringutils.h" + +/* + * parse_slash_copy + * -- parses \copy command line + * + * The documented syntax is: + * \copy tablename [(columnlist)] from|to filename [options] + * \copy ( query stmt ) to filename [options] + * + * where 'filename' can be one of the following: + * '<file path>' | PROGRAM '<command>' | stdin | stdout | pstdout | pstdout + * and 'query' can be one of the following: + * SELECT | UPDATE | INSERT | DELETE + * + * An undocumented fact is that you can still write BINARY before the + * tablename; this is a hangover from the pre-7.3 syntax. The options + * syntax varies across backend versions, but we avoid all that mess + * by just transmitting the stuff after the filename literally. + * + * table name can be double-quoted and can have a schema part. + * column names can be double-quoted. + * filename can be single-quoted like SQL literals. + * command must be single-quoted like SQL literals. + * + * returns a malloc'ed structure with the options, or NULL on parsing error + */ + +struct copy_options +{ + char *before_tofrom; /* COPY string before TO/FROM */ + char *after_tofrom; /* COPY string after TO/FROM filename */ + char *file; /* NULL = stdin/stdout */ + bool program; /* is 'file' a program to popen? */ + bool psql_inout; /* true = use psql stdin/stdout */ + bool from; /* true = FROM, false = TO */ +}; + + +static void +free_copy_options(struct copy_options *ptr) +{ + if (!ptr) + return; + free(ptr->before_tofrom); + free(ptr->after_tofrom); + free(ptr->file); + free(ptr); +} + + +/* concatenate "more" onto "var", freeing the original value of *var */ +static void +xstrcat(char **var, const char *more) +{ + char *newvar; + + newvar = psprintf("%s%s", *var, more); + free(*var); + *var = newvar; +} + + +static struct copy_options * +parse_slash_copy(const char *args) +{ + struct copy_options *result; + char *token; + const char *whitespace = " \t\n\r"; + char nonstd_backslash = standard_strings() ? 0 : '\\'; + + if (!args) + { + pg_log_error("\\copy: arguments required"); + return NULL; + } + + result = pg_malloc0(sizeof(struct copy_options)); + + result->before_tofrom = pg_strdup(""); /* initialize for appending */ + + token = strtokx(args, whitespace, ".,()", "\"", + 0, false, false, pset.encoding); + if (!token) + goto error; + + /* The following can be removed when we drop 7.3 syntax support */ + if (pg_strcasecmp(token, "binary") == 0) + { + xstrcat(&result->before_tofrom, token); + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, false, pset.encoding); + if (!token) + goto error; + } + + /* Handle COPY (query) case */ + if (token[0] == '(') + { + int parens = 1; + + while (parens > 0) + { + xstrcat(&result->before_tofrom, " "); + xstrcat(&result->before_tofrom, token); + token = strtokx(NULL, whitespace, "()", "\"'", + nonstd_backslash, true, false, pset.encoding); + if (!token) + goto error; + if (token[0] == '(') + parens++; + else if (token[0] == ')') + parens--; + } + } + + xstrcat(&result->before_tofrom, " "); + xstrcat(&result->before_tofrom, token); + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, false, pset.encoding); + if (!token) + goto error; + + /* + * strtokx() will not have returned a multi-character token starting with + * '.', so we don't need strcmp() here. Likewise for '(', etc, below. + */ + if (token[0] == '.') + { + /* handle schema . table */ + xstrcat(&result->before_tofrom, token); + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, false, pset.encoding); + if (!token) + goto error; + xstrcat(&result->before_tofrom, token); + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, false, pset.encoding); + if (!token) + goto error; + } + + if (token[0] == '(') + { + /* handle parenthesized column list */ + for (;;) + { + xstrcat(&result->before_tofrom, " "); + xstrcat(&result->before_tofrom, token); + token = strtokx(NULL, whitespace, "()", "\"", + 0, false, false, pset.encoding); + if (!token) + goto error; + if (token[0] == ')') + break; + } + xstrcat(&result->before_tofrom, " "); + xstrcat(&result->before_tofrom, token); + token = strtokx(NULL, whitespace, ".,()", "\"", + 0, false, false, pset.encoding); + if (!token) + goto error; + } + + if (pg_strcasecmp(token, "from") == 0) + result->from = true; + else if (pg_strcasecmp(token, "to") == 0) + result->from = false; + else + goto error; + + /* { 'filename' | PROGRAM 'command' | STDIN | STDOUT | PSTDIN | PSTDOUT } */ + token = strtokx(NULL, whitespace, ";", "'", + 0, false, false, pset.encoding); + if (!token) + goto error; + + if (pg_strcasecmp(token, "program") == 0) + { + int toklen; + + token = strtokx(NULL, whitespace, ";", "'", + 0, false, false, pset.encoding); + if (!token) + goto error; + + /* + * The shell command must be quoted. This isn't fool-proof, but + * catches most quoting errors. + */ + toklen = strlen(token); + if (token[0] != '\'' || toklen < 2 || token[toklen - 1] != '\'') + goto error; + + strip_quotes(token, '\'', 0, pset.encoding); + + result->program = true; + result->file = pg_strdup(token); + } + else if (pg_strcasecmp(token, "stdin") == 0 || + pg_strcasecmp(token, "stdout") == 0) + { + result->file = NULL; + } + else if (pg_strcasecmp(token, "pstdin") == 0 || + pg_strcasecmp(token, "pstdout") == 0) + { + result->psql_inout = true; + result->file = NULL; + } + else + { + /* filename can be optionally quoted */ + strip_quotes(token, '\'', 0, pset.encoding); + result->file = pg_strdup(token); + expand_tilde(&result->file); + } + + /* Collect the rest of the line (COPY options) */ + token = strtokx(NULL, "", NULL, NULL, + 0, false, false, pset.encoding); + if (token) + result->after_tofrom = pg_strdup(token); + + return result; + +error: + if (token) + pg_log_error("\\copy: parse error at \"%s\"", token); + else + pg_log_error("\\copy: parse error at end of line"); + free_copy_options(result); + + return NULL; +} + + +/* + * Execute a \copy command (frontend copy). We have to open a file (or execute + * a command), then submit a COPY query to the backend and either feed it data + * from the file or route its response into the file. + */ +bool +do_copy(const char *args) +{ + PQExpBufferData query; + FILE *copystream; + struct copy_options *options; + bool success; + + /* parse options */ + options = parse_slash_copy(args); + + if (!options) + return false; + + /* prepare to read or write the target file */ + if (options->file && !options->program) + canonicalize_path(options->file); + + if (options->from) + { + if (options->file) + { + if (options->program) + { + fflush(NULL); + errno = 0; + copystream = popen(options->file, PG_BINARY_R); + } + else + copystream = fopen(options->file, PG_BINARY_R); + } + else if (!options->psql_inout) + copystream = pset.cur_cmd_source; + else + copystream = stdin; + } + else + { + if (options->file) + { + if (options->program) + { + fflush(NULL); + disable_sigpipe_trap(); + errno = 0; + copystream = popen(options->file, PG_BINARY_W); + } + else + copystream = fopen(options->file, PG_BINARY_W); + } + else if (!options->psql_inout) + copystream = pset.queryFout; + else + copystream = stdout; + } + + if (!copystream) + { + if (options->program) + pg_log_error("could not execute command \"%s\": %m", + options->file); + else + pg_log_error("%s: %m", + options->file); + free_copy_options(options); + return false; + } + + if (!options->program) + { + struct stat st; + int result; + + /* make sure the specified file is not a directory */ + if ((result = fstat(fileno(copystream), &st)) < 0) + pg_log_error("could not stat file \"%s\": %m", + options->file); + + if (result == 0 && S_ISDIR(st.st_mode)) + pg_log_error("%s: cannot copy from/to a directory", + options->file); + + if (result < 0 || S_ISDIR(st.st_mode)) + { + fclose(copystream); + free_copy_options(options); + return false; + } + } + + /* build the command we will send to the backend */ + initPQExpBuffer(&query); + printfPQExpBuffer(&query, "COPY "); + appendPQExpBufferStr(&query, options->before_tofrom); + if (options->from) + appendPQExpBufferStr(&query, " FROM STDIN "); + else + appendPQExpBufferStr(&query, " TO STDOUT "); + if (options->after_tofrom) + appendPQExpBufferStr(&query, options->after_tofrom); + + /* run it like a user command, but with copystream as data source/sink */ + pset.copyStream = copystream; + success = SendQuery(query.data); + pset.copyStream = NULL; + termPQExpBuffer(&query); + + if (options->file != NULL) + { + if (options->program) + { + int pclose_rc = pclose(copystream); + + if (pclose_rc != 0) + { + if (pclose_rc < 0) + pg_log_error("could not close pipe to external command: %m"); + else + { + char *reason = wait_result_to_str(pclose_rc); + + pg_log_error("%s: %s", options->file, + reason ? reason : ""); + free(reason); + } + success = false; + } + SetShellResultVariables(pclose_rc); + restore_sigpipe_trap(); + } + else + { + if (fclose(copystream) != 0) + { + pg_log_error("%s: %m", options->file); + success = false; + } + } + } + free_copy_options(options); + return success; +} + + +/* + * Functions for handling COPY IN/OUT data transfer. + * + * If you want to use COPY TO STDOUT/FROM STDIN in your application, + * this is the code to steal ;) + */ + +/* + * handleCopyOut + * receives data as a result of a COPY ... TO STDOUT command + * + * conn should be a database connection that you just issued COPY TO on + * and got back a PGRES_COPY_OUT result. + * + * copystream is the file stream for the data to go to. + * copystream can be NULL to eat the data without writing it anywhere. + * + * The final status for the COPY is returned into *res (but note + * we already reported the error, if it's not a success result). + * + * result is true if successful, false if not. + */ +bool +handleCopyOut(PGconn *conn, FILE *copystream, PGresult **res) +{ + bool OK = true; + char *buf; + int ret; + + for (;;) + { + ret = PQgetCopyData(conn, &buf, 0); + + if (ret < 0) + break; /* done or server/connection error */ + + if (buf) + { + if (OK && copystream && fwrite(buf, 1, ret, copystream) != ret) + { + pg_log_error("could not write COPY data: %m"); + /* complain only once, keep reading data from server */ + OK = false; + } + PQfreemem(buf); + } + } + + if (OK && copystream && fflush(copystream)) + { + pg_log_error("could not write COPY data: %m"); + OK = false; + } + + if (ret == -2) + { + pg_log_error("COPY data transfer failed: %s", PQerrorMessage(conn)); + OK = false; + } + + /* + * Check command status and return to normal libpq state. + * + * If for some reason libpq is still reporting PGRES_COPY_OUT state, we + * would like to forcibly exit that state, since our caller would be + * unable to distinguish that situation from reaching the next COPY in a + * command string that happened to contain two consecutive COPY TO STDOUT + * commands. However, libpq provides no API for doing that, and in + * principle it's a libpq bug anyway if PQgetCopyData() returns -1 or -2 + * but hasn't exited COPY_OUT state internally. So we ignore the + * possibility here. + */ + *res = PQgetResult(conn); + if (PQresultStatus(*res) != PGRES_COMMAND_OK) + { + pg_log_info("%s", PQerrorMessage(conn)); + OK = false; + } + + return OK; +} + +/* + * handleCopyIn + * sends data to complete a COPY ... FROM STDIN command + * + * conn should be a database connection that you just issued COPY FROM on + * and got back a PGRES_COPY_IN result. + * copystream is the file stream to read the data from. + * isbinary can be set from PQbinaryTuples(). + * The final status for the COPY is returned into *res (but note + * we already reported the error, if it's not a success result). + * + * result is true if successful, false if not. + */ + +/* read chunk size for COPY IN - size is not critical */ +#define COPYBUFSIZ 8192 + +bool +handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res) +{ + bool OK; + char buf[COPYBUFSIZ]; + bool showprompt; + + /* + * Establish longjmp destination for exiting from wait-for-input. (This is + * only effective while sigint_interrupt_enabled is TRUE.) + */ + if (sigsetjmp(sigint_interrupt_jmp, 1) != 0) + { + /* got here with longjmp */ + + /* Terminate data transfer */ + PQputCopyEnd(conn, + (PQprotocolVersion(conn) < 3) ? NULL : + _("canceled by user")); + + OK = false; + goto copyin_cleanup; + } + + /* Prompt if interactive input */ + if (isatty(fileno(copystream))) + { + showprompt = true; + if (!pset.quiet) + puts(_("Enter data to be copied followed by a newline.\n" + "End with a backslash and a period on a line by itself, or an EOF signal.")); + } + else + showprompt = false; + + OK = true; + + if (isbinary) + { + /* interactive input probably silly, but give one prompt anyway */ + if (showprompt) + { + const char *prompt = get_prompt(PROMPT_COPY, NULL); + + fputs(prompt, stdout); + fflush(stdout); + } + + for (;;) + { + int buflen; + + /* enable longjmp while waiting for input */ + sigint_interrupt_enabled = true; + + buflen = fread(buf, 1, COPYBUFSIZ, copystream); + + sigint_interrupt_enabled = false; + + if (buflen <= 0) + break; + + if (PQputCopyData(conn, buf, buflen) <= 0) + { + OK = false; + break; + } + } + } + else + { + bool copydone = false; + int buflen; + bool at_line_begin = true; + + /* + * In text mode, we have to read the input one line at a time, so that + * we can stop reading at the EOF marker (\.). We mustn't read beyond + * the EOF marker, because if the data was inlined in a SQL script, we + * would eat up the commands after the EOF marker. + */ + buflen = 0; + while (!copydone) + { + char *fgresult; + + if (at_line_begin && showprompt) + { + const char *prompt = get_prompt(PROMPT_COPY, NULL); + + fputs(prompt, stdout); + fflush(stdout); + } + + /* enable longjmp while waiting for input */ + sigint_interrupt_enabled = true; + + fgresult = fgets(&buf[buflen], COPYBUFSIZ - buflen, copystream); + + sigint_interrupt_enabled = false; + + if (!fgresult) + copydone = true; + else + { + int linelen; + + linelen = strlen(fgresult); + buflen += linelen; + + /* current line is done? */ + if (buf[buflen - 1] == '\n') + { + /* check for EOF marker, but not on a partial line */ + if (at_line_begin) + { + /* + * This code erroneously assumes '\.' on a line alone + * inside a quoted CSV string terminates the \copy. + * https://www.postgresql.org/message-id/E1TdNVQ-0001ju-GO@wrigleys.postgresql.org + * https://www.postgresql.org/message-id/bfcd57e4-8f23-4c3e-a5db-2571d09208e2@beta.fastmail.com + */ + if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) || + (linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0)) + { + copydone = true; + } + } + + if (copystream == pset.cur_cmd_source) + { + pset.lineno++; + pset.stmt_lineno++; + } + at_line_begin = true; + } + else + at_line_begin = false; + } + + /* + * If the buffer is full, or we've reached the EOF, flush it. + * + * Make sure there's always space for four more bytes in the + * buffer, plus a NUL terminator. That way, an EOF marker is + * never split across two fgets() calls, which simplifies the + * logic. + */ + if (buflen >= COPYBUFSIZ - 5 || (copydone && buflen > 0)) + { + if (PQputCopyData(conn, buf, buflen) <= 0) + { + OK = false; + break; + } + + buflen = 0; + } + } + } + + /* Check for read error */ + if (ferror(copystream)) + OK = false; + + /* + * Terminate data transfer. We can't send an error message if we're using + * protocol version 2. (libpq no longer supports protocol version 2, but + * keep the version checks just in case you're using a pre-v14 libpq.so at + * runtime) + */ + if (PQputCopyEnd(conn, + (OK || PQprotocolVersion(conn) < 3) ? NULL : + _("aborted because of read failure")) <= 0) + OK = false; + +copyin_cleanup: + + /* + * Clear the EOF flag on the stream, in case copying ended due to an EOF + * signal. This allows an interactive TTY session to perform another COPY + * FROM STDIN later. (In non-STDIN cases, we're about to close the file + * anyway, so it doesn't matter.) Although we don't ever test the flag + * with feof(), some fread() implementations won't read more data if it's + * set. This also clears the error flag, but we already checked that. + */ + clearerr(copystream); + + /* + * Check command status and return to normal libpq state. + * + * We do not want to return with the status still PGRES_COPY_IN: our + * caller would be unable to distinguish that situation from reaching the + * next COPY in a command string that happened to contain two consecutive + * COPY FROM STDIN commands. We keep trying PQputCopyEnd() in the hope + * it'll work eventually. (What's actually likely to happen is that in + * attempting to flush the data, libpq will eventually realize that the + * connection is lost. But that's fine; it will get us out of COPY_IN + * state, which is what we need.) + */ + while (*res = PQgetResult(conn), PQresultStatus(*res) == PGRES_COPY_IN) + { + OK = false; + PQclear(*res); + /* We can't send an error message if we're using protocol version 2 */ + PQputCopyEnd(conn, + (PQprotocolVersion(conn) < 3) ? NULL : + _("trying to exit copy mode")); + } + if (PQresultStatus(*res) != PGRES_COMMAND_OK) + { + pg_log_info("%s", PQerrorMessage(conn)); + OK = false; + } + + return OK; +} |