diff options
Diffstat (limited to '')
-rw-r--r-- | lib/clplumbing/cl_poll.c | 809 |
1 files changed, 809 insertions, 0 deletions
diff --git a/lib/clplumbing/cl_poll.c b/lib/clplumbing/cl_poll.c new file mode 100644 index 0000000..789eb1a --- /dev/null +++ b/lib/clplumbing/cl_poll.c @@ -0,0 +1,809 @@ +#include <lha_internal.h> +#include <stdlib.h> +#include <unistd.h> +/* + * Substitute poll(2) function using POSIX real time signals. + * + * The poll(2) system call often has significant latencies and realtime + * impacts (probably because of its variable length argument list). + * + * These functions let us use real time signals and sigtimedwait(2) instead + * of poll - for those files which work with real time signals. + * In the 2.4 series of Linux kernels, this does *not* include FIFOs. + * + * NOTE: We (have to) grab the SIGPOLL signal for our own purposes. + * Hope that's OK with you... + * + * Special caution: We can only incompletely simulate the difference between + * the level-triggered interface of poll(2) and the edge-triggered behavior + * of I/O signals. As a result you *must* read all previously-indicated + * incoming data before calling cl_poll() again. Otherwise you may miss + * some incoming data (and possibly hang). + * + * + * Copyright (C) 2003 IBM Corporation + * + * Author: <alanr@unix.sh> + * + * This software licensed under the GNU LGPL. + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of version 2.1 of the GNU Lesser General Public + * License as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + **************************************************************************/ + + +#define __USE_GNU 1 +# include <fcntl.h> +#undef __USE_GNU + +#include <errno.h> +#include <string.h> +#include <glib.h> +#include <clplumbing/cl_log.h> +#include <clplumbing/cl_poll.h> +#include <clplumbing/cl_signal.h> + + + +/* Turn on to log odd realtime behavior */ + +#define TIME_CALLS 1 +#ifdef TIME_CALLS +# include <clplumbing/longclock.h> +# include <clplumbing/cl_log.h> +#endif + +static int debug = 0; + +int /* Slightly sleazy... */ +cl_glibpoll(GPollFD* ufds, guint nfsd, gint timeout) +{ + (void)debug; + return cl_poll((struct pollfd*)ufds, nfsd, timeout); +} + +#if defined (F_SETSIG) && defined(F_SETOWN) && defined (O_ASYNC) +# define HAVE_FCNTL_F_SETSIG +#endif + +#ifndef HAVE_FCNTL_F_SETSIG + +/* + * Dummy cl_poll() and cl_poll_ignore() functions for systems where + * we don't have all the support we need. + */ + +int +cl_poll(struct pollfd *fds, unsigned int nfds, int timeout) +{ + return poll(fds, (nfds_t)nfds, timeout); +} + +int +cl_poll_ignore(int fd) +{ + return 0; +} + +#else /* HAVE_FCNTL_F_SETSIG */ +static void dump_fd_info(struct pollfd *fds, unsigned int nfds, int timeoutms); +static void check_fd_info(struct pollfd *fds, unsigned int nfds); +static void cl_real_poll_fd(int fd); +static void cl_poll_sigpoll_overflow_sigaction(int nsig, siginfo_t* , void*); +static void cl_poll_sigpoll_overflow(void); +static int cl_poll_get_sigqlimit(void); +typedef unsigned char poll_bool; + +/* + * Here's our strategy: + * We have a set of signals which we use for these file descriptors, + * and we use sigtimedwait(2) to wait on information from these various + * signals. + * + * If we are ever asked to wait for a particular signal, then we will + * enable signals for that file descriptor, and post the events in + * our own cache. The next time you include that signal in a call + * to cl_poll(), you will get the information delivered + * to you in your cl_poll() call. + * + * If you want to stop monitoring a particular file descriptor, use + * cl_poll_ignore() for that purpose. Doing this is a good idea, but + * not fatal if omitted... + */ + +/* Information about a file descriptor we're monitoring */ + +typedef struct poll_fd_info_s { + short nsig; /* Which signal goes with it? */ + short pendevents; /* Pending events */ +}poll_info_t; + +static int max_allocated = 0; +static poll_bool* is_monitored = NULL; /* Sized by max_allocated */ +static poll_info_t* monitorinfo = NULL; /* Sized by max_allocated */ +static int cl_nsig = 0; +static gboolean SigQOverflow = FALSE; + +static int cl_init_poll_sig(struct pollfd *fds, unsigned int nfds); +static short cl_poll_assignsig(int fd); +static void cl_poll_sigaction(int nsig, siginfo_t* info, void* v); +static int cl_poll_prepsig(int nsig); + + +/* + * SignalSet is the set of all file descriptors we're monitoring. + * + * We monitor a file descriptor forever, unless you tell us not to + * by calling cl_poll_ignore(), or you (mistakenly) give it to + * us to look at in another poll call after you've closed it. + */ + +static sigset_t SignalSet; + +/* Select the signal you want us to use (must be a RT signal) */ +int +cl_poll_setsig(int nsig) +{ + if (nsig < SIGRTMIN || nsig >= SIGRTMAX) { + errno = EINVAL; + return -1; + } + if (cl_poll_prepsig(nsig) < 0) { + return -1; + } + cl_nsig = nsig; + return 0; +} + +/* + * It's harmless to call us multiple times on the same signal. + */ +static int +cl_poll_prepsig(int nsig) +{ + static gboolean setinityet=FALSE; + + if (!setinityet) { + CL_SIGEMPTYSET(&SignalSet); + cl_signal_set_simple_action(SIGPOLL + , cl_poll_sigpoll_overflow_sigaction + , NULL); + setinityet = TRUE; + } + if (CL_SIGINTERRUPT(nsig, FALSE) < 0) { + cl_perror("sig_interrupt(%d, FALSE)", nsig); + return -1; + } + if (CL_SIGADDSET(&SignalSet, nsig) < 0) { + cl_perror("sig_addset(&SignalSet, %d)", nsig); + return -1; + } + if (CL_SIGPROCMASK(SIG_BLOCK, &SignalSet, NULL) < 0) { + cl_perror("sig_sigprocmask(SIG_BLOCK, sig %d)", nsig); + return -1; + } + if (debug) { + cl_log(LOG_DEBUG + , "Signal %d belongs to us...", nsig); + cl_log(LOG_DEBUG, "cl_poll_prepsig(%d) succeeded.", nsig); + } + + return 0; +} + +#define FD_CHUNKSIZE 64 + +/* Set of events everyone must monitor whether they want to or not ;-) */ +#define CONSTEVENTS (POLLHUP|POLLERR|POLLNVAL) + +#define RECORDFDEVENT(fd, flags) (monitorinfo[fd].pendevents |= (flags)) + +/* + * Initialized our poll-simulation data structures. + * This means (among other things) registering any monitored + * file descriptors. + */ +static int +cl_init_poll_sig(struct pollfd *fds, unsigned int nfds) +{ + unsigned j; + int maxmonfd = -1; + int nmatch = 0; + + + if (cl_nsig == 0) { + cl_nsig = ((SIGRTMIN+SIGRTMAX)/2); + if (cl_poll_setsig(cl_nsig) < 0) { + return -1; + } + } + for (j=0; j < nfds; ++j) { + const int fd = fds[j].fd; + + if (fd > maxmonfd) { + maxmonfd = fd; + } + } + + /* See if we need to malloc/realloc our data structures */ + + if (maxmonfd >= max_allocated) { + int newsize; + int growthamount; + + newsize = ((maxmonfd + FD_CHUNKSIZE)/FD_CHUNKSIZE) + * FD_CHUNKSIZE; + growthamount = newsize - max_allocated; + + /* This can't happen ;-) */ + if (growthamount <= 0 || newsize <= maxmonfd) { + errno = EINVAL; + return -1; + } + + /* Allocate (more) memory! */ + + if ((is_monitored = (poll_bool*)realloc(is_monitored + , newsize * sizeof(poll_bool))) == NULL + || (monitorinfo = (poll_info_t*) realloc(monitorinfo + , newsize * sizeof(poll_info_t))) == NULL) { + + if (is_monitored) { + free(is_monitored); + is_monitored = NULL; + } + if (monitorinfo) { + free(monitorinfo); + monitorinfo = NULL; + } + max_allocated = 0; + errno = ENOMEM; + return -1; + } + memset(monitorinfo+max_allocated, 0 + , growthamount * sizeof(monitorinfo[0])); + memset(is_monitored+max_allocated, FALSE + , growthamount*sizeof(is_monitored[0])); + max_allocated = newsize; + } + + if (fds->events != 0 && debug) { + cl_log(LOG_DEBUG + , "Current event mask for fd [0] {%d} 0x%x" + , fds->fd, fds->events); + } + /* + * Examine each fd for the following things: + * Is it already monitored? + * if not, set it up for monitoring. + * Do we have events for it? + * if so, post events... + */ + + for (j=0; j < nfds; ++j) { + const int fd = fds[j].fd; + poll_info_t* moni = monitorinfo+fd; + short nsig; + int badfd = FALSE; + + is_monitored[fd] = TRUE; + + if (moni->nsig <= 0) { + nsig = cl_poll_assignsig(fd); + if (nsig < 0) { + RECORDFDEVENT(fd, POLLERR); + badfd = TRUE; + }else{ + /* Use real poll(2) to get initial + * event status + */ + moni->nsig = nsig; + cl_real_poll_fd(fd); + } + }else if (fcntl(fd, F_GETFD) < 0) { + cl_log(LOG_ERR, "bad fd(%d)", fd); + RECORDFDEVENT(fd, POLLNVAL); + badfd = TRUE; + } + + /* Look for pending events... */ + + fds[j].revents = (moni->pendevents + & (fds[j].events|CONSTEVENTS)); + + if (fds[j].revents) { + ++nmatch; + moni->pendevents &= ~(fds[j].revents); + if (debug) { + cl_log(LOG_DEBUG + , "revents for fd %d: 0x%x" + , fds[j].fd, fds[j].revents); + cl_log(LOG_DEBUG + , "events for fd %d: 0x%x" + , fds[j].fd, fds[j].events); + } + }else if (fds[j].events && debug) { + cl_log(LOG_DEBUG + , "pendevents for fd %d: 0x%x" + , fds[j].fd, moni->pendevents); + } + if (badfd) { + cl_poll_ignore(fd); + } + } + if (nmatch != 0 && debug) { + cl_log(LOG_DEBUG, "Returning %d events from cl_init_poll_sig()" + , nmatch); + } + return nmatch; +} + +/* + * Initialize our current state of the world with info from the + * real poll(2) call. + * + * We call this when we first see a particular fd, and after a signal + * queue overflow. + */ +static void +cl_real_poll_fd(int fd) +{ + struct pollfd pfd[1]; + + if (fd >= max_allocated || !is_monitored[fd]) { + return; + } + + if (debug) { + cl_log(LOG_DEBUG + , "Calling poll(2) on fd %d", fd); + } + /* Get the current state of affaris from poll(2) */ + pfd[0].fd = fd; + pfd[0].revents = 0; + pfd[0].events = ~0; + if (poll(pfd, 1, 0) >= 0) { + RECORDFDEVENT(fd, pfd[0].revents); + if (pfd[0].revents & (POLLNVAL|POLLERR)) { + cl_log(LOG_INFO, "cl_poll_real_fd(%d): error in revents [%d]" + , fd, pfd[0].revents); + } + if (debug) { + cl_log(LOG_DEBUG + , "Old news from poll(2) for fd %d: 0x%x" + , fd, pfd[0].revents); + } + }else{ + if (fcntl(fd, F_GETFL) < 0) { + cl_perror("cl_poll_real_fd(%d): F_GETFL failure" + , fd); + RECORDFDEVENT(fd, POLLNVAL); + }else{ + RECORDFDEVENT(fd, POLLERR); + } + } +} + +/* + * Assign a signal for monitoring the given file descriptor + */ + +static short +cl_poll_assignsig(int fd) +{ + int flags; + + + if (debug) { + cl_log(LOG_DEBUG + , "Signal %d monitors fd %d...", cl_nsig, fd); + } + + /* Test to see if the file descriptor is good */ + if ((flags = fcntl(fd, F_GETFL)) < 0) { + cl_perror("cl_poll_assignsig(%d) F_GETFL failure" + , fd); + return -1; + } + + /* Associate the right signal with the fd */ + + if (fcntl(fd, F_SETSIG, cl_nsig) < 0) { + cl_perror("cl_poll_assignsig(%d) F_SETSIG failure" + , fd); + return -1; + } + + /* Direct the signals to us */ + if (fcntl(fd, F_SETOWN, getpid()) < 0) { + cl_perror("cl_poll_assignsig(%d) F_SETOWN failure", fd); + return -1; + } + + /* OK... Go ahead and send us signals! */ + + if (fcntl(fd, F_SETFL, flags|O_ASYNC) < 0) { + cl_perror("cl_poll_assignsig(%d) F_SETFL(O_ASYNC) failure" + , fd); + return -1; + } + + return cl_nsig; +} + + +/* + * This is a function we call as a (fake) signal handler. + * + * It records events to our "monitorinfo" structure. + * + * Except for the cl_log() call, it could be called in a signal + * context. + */ + +static void +cl_poll_sigaction(int nsig, siginfo_t* info, void* v) +{ + int fd; + + /* What do you suppose all the various si_code values mean? */ + + fd = info->si_fd; + if (debug) { + cl_log(LOG_DEBUG + , "cl_poll_sigaction(nsig=%d fd=%d" + ", si_code=%d si_band=0x%lx)" + , nsig, fd, info->si_code + , (unsigned long)info->si_band); + } + + if (fd <= 0) { + return; + } + + + if (fd >= max_allocated || !is_monitored[fd]) { + return; + } + + /* We should not call logging functions in (real) signal handlers */ + if (nsig != monitorinfo[fd].nsig) { + cl_log(LOG_ERR, "cl_poll_sigaction called with signal %d/%d" + , nsig, monitorinfo[fd].nsig); + } + + /* Record everything as a pending event. */ + RECORDFDEVENT(fd, info->si_band); +} + + + +/* + * This is called whenever a file descriptor shouldn't be + * monitored any more. + */ +int +cl_poll_ignore(int fd) +{ + int flags; + + if (debug) { + cl_log(LOG_DEBUG + , "cl_poll_ignore(%d)", fd); + } + if (fd < 0 || fd >= max_allocated) { + errno = EINVAL; + return -1; + } + if (!is_monitored[fd]) { + return 0; + } + + is_monitored[fd] = FALSE; + memset(monitorinfo+fd, 0, sizeof(monitorinfo[0])); + + if ((flags = fcntl(fd, F_GETFL)) >= 0) { + flags &= ~O_ASYNC; + if (fcntl(fd, F_SETFL, flags) < 0) { + return -1; + } + }else{ + return flags; + } + return 0; +} + + +/* + * cl_poll: fake poll routine based on POSIX realtime signals. + * + * We want to emulate poll as exactly as possible, but poll has a couple + * of problems: scaleability, and it tends to sleep in the kernel + * because the first argument is an argument of arbitrary size, and + * generally requires allocating memory. + * + * The challenge is that poll is level-triggered, but the POSIX + * signals (and sigtimedwait(2)) are edge triggered. This is + * one of the reasons why we have the cl_real_poll_fd() function + * - to get the current "level" before we start. + * Once we have this level we can compute something like the current + * level + */ + +int +cl_poll(struct pollfd *fds, unsigned int nfds, int timeoutms) +{ + int nready; + struct timespec ts; + static const struct timespec zerotime = {0L, 0L}; + const struct timespec* itertime = &ts; + siginfo_t info; + int eventcount = 0; + unsigned int j; + int savederrno = errno; + int stw_errno; + int rc; + longclock_t starttime; + longclock_t endtime; + const int msfudge + = 2* 1000/hz_longclock(); + int mselapsed = 0; + + /* Do we have any old news to report? */ + if ((nready=cl_init_poll_sig(fds, nfds)) != 0) { + /* Return error or old news to report */ + if (debug) { + cl_log(LOG_DEBUG, "cl_poll: early return(%d)", nready); + } + return nready; + } + + /* Nothing to report yet... */ + + /* So, we'll do a sigtimedwait(2) to wait for signals + * and see if we can find something to report... + * + * cl_init_poll() prepared a set of file signals to watch... + */ + +recalcandwaitagain: + if (timeoutms >= 0) { + ts.tv_sec = timeoutms / 1000; + ts.tv_nsec = (((unsigned long)timeoutms) % 1000UL)*1000000UL; + }else{ + ts.tv_sec = G_MAXLONG; + ts.tv_nsec = 99999999UL; + } + + /* + * Perform a timed wait for any of our signals... + * + * We shouldn't sleep for any call but (possibly) the first one. + * Subsequent calls should just pick up other events without + * sleeping. + */ + + starttime = time_longclock(); + /* + * Wait up to the prescribed time for a signal. + * If we get a signal, then loop grabbing all other + * pending signals. Note that subsequent iterations will + * use &zerotime to get the minimum wait time. + */ + if (debug) { + check_fd_info(fds, nfds); + dump_fd_info(fds, nfds, timeoutms); + } +waitagain: + while (sigtimedwait(&SignalSet, &info, itertime) >= 0) { + int nsig = info.si_signo; + + /* Call signal handler to simulate signal reception */ + + cl_poll_sigaction(nsig, &info, NULL); + itertime = &zerotime; + } + stw_errno=errno; /* Save errno for later use */ + endtime = time_longclock(); + mselapsed = longclockto_ms(sub_longclock(endtime, starttime)); + +#ifdef TIME_CALLS + if (timeoutms >= 0 && mselapsed > timeoutms + msfudge) { + /* We slept too long... */ + cl_log(LOG_WARNING + , "sigtimedwait() sequence for %d ms took %d ms" + , timeoutms, mselapsed); + } +#endif + + if (SigQOverflow) { + /* OOPS! Better recover from this! */ + /* This will use poll(2) to correct our current status */ + cl_poll_sigpoll_overflow(); + } + + /* Post observed events and count them... */ + + for (j=0; j < nfds; ++j) { + int fd = fds[j].fd; + poll_info_t* moni = monitorinfo+fd; + fds[j].revents = (moni->pendevents + & (fds[j].events|CONSTEVENTS)); + if (fds[j].revents) { + ++eventcount; + moni->pendevents &= ~(fds[j].revents); + /* Make POLLHUP persistent */ + if (fds[j].revents & POLLHUP) { + moni->pendevents |= POLLHUP; + /* Don't lose input events at EOF */ + if (fds[j].events & POLLIN) { + cl_real_poll_fd(fds[j].fd); + } + } + } + } + if (eventcount == 0 && stw_errno == EAGAIN && timeoutms != 0) { + /* We probably saw an event the user didn't ask to see. */ + /* Consquently, we may have more waiting to do */ + if (timeoutms < 0) { + /* Restore our infinite wait time */ + itertime = &ts; + goto waitagain; + }else if (timeoutms > 0) { + if (mselapsed < timeoutms) { + timeoutms -= mselapsed; + goto recalcandwaitagain; + } + } + } + rc = (eventcount > 0 ? eventcount : (stw_errno == EAGAIN ? 0 : -1)); + + if (rc >= 0) { + errno = savederrno; + } + return rc; +} +/* + * Debugging routine for printing current poll arguments, etc. + */ +static void +dump_fd_info(struct pollfd *fds, unsigned int nfds, int timeoutms) +{ + unsigned j; + + cl_log(LOG_DEBUG, "timeout: %d milliseconds", timeoutms); + for (j=0; j < nfds; ++j) { + int fd = fds[j].fd; + poll_info_t* moni = monitorinfo+fd; + + cl_log(LOG_DEBUG, "fd %d flags: 0%o, signal: %d, events: 0x%x" + ", revents: 0x%x, pendevents: 0x%x" + , fd, fcntl(fd, F_GETFL), moni->nsig + , fds[j].events, fds[j].revents, moni->pendevents); + } + for (j=SIGRTMIN; j < (unsigned)SIGRTMAX; ++j) { + if (!sigismember(&SignalSet, j)) { + continue; + } + cl_log(LOG_DEBUG, "Currently monitoring RT signal %d", j); + } +} + +/* + * Debugging routine for auditing our file descriptors, etc. + */ +static void +check_fd_info(struct pollfd *fds, unsigned int nfds) +{ + unsigned j; + + for (j=0; j < nfds; ++j) { + int fd = fds[j].fd; + poll_info_t* moni = monitorinfo+fd; + + if (!sigismember(&SignalSet, moni->nsig)) { + cl_log(LOG_ERR, "SIGNAL %d not in monitored SignalSet" + , moni->nsig); + } + } + for (j=0; j < 10; ++j) { + int sig; + int flags; + int pid; + if ((flags = fcntl(j, F_GETFL)) < 0 || (flags & O_ASYNC) ==0){ + continue; + } + sig = fcntl(j, F_GETSIG); + if (sig == 0) { + cl_log(LOG_ERR, "FD %d will get SIGIO", j); + } + if (!sigismember(&SignalSet, sig)) { + cl_log(LOG_ERR, "FD %d (signal %d) is not in SignalSet" + , j, sig); + } + if (sig < SIGRTMIN || sig >= SIGRTMAX) { + cl_log(LOG_ERR, "FD %d (signal %d) is not RealTime" + , j, sig); + } + pid = fcntl(j, F_GETOWN); + if (pid != getpid()) { + cl_log(LOG_ERR, "FD %d (signal %d) owner is pid %d" + , j, sig, pid); + } + } +} + +/* Note that the kernel signalled an event queue overflow */ +static void +cl_poll_sigpoll_overflow_sigaction(int nsig, siginfo_t* info, void* v) +{ + SigQOverflow = TRUE; +} + +#define MAXQNAME "rtsig-max" +/* + * Called when signal queue overflow is suspected. + * We then use poll(2) to get the current data. It's slow, but it + * should work quite nicely. + */ +static void +cl_poll_sigpoll_overflow(void) +{ + int fd; + int limit; + + if (!SigQOverflow) { + return; + } + cl_log(LOG_WARNING, "System signal queue overflow."); + limit = cl_poll_get_sigqlimit(); + if (limit > 0) { + cl_log(LOG_WARNING, "Increase '%s'. Current limit is %d" + " (see sysctl(8)).", MAXQNAME, limit); + } + + SigQOverflow = FALSE; + + for (fd = 0; fd < max_allocated; ++fd) { + if (is_monitored[fd]) { + cl_real_poll_fd(fd); + } + } +} + +#define PSK "/proc/sys/kernel/" + +/* Get current kernel signal queue limit */ +/* This only works on Linux - but that's not a big problem... */ +static int +cl_poll_get_sigqlimit(void) +{ + int limit = -1; + int pfd; + char result[32]; + + pfd = open(PSK MAXQNAME, O_RDONLY); + if (pfd >= 0 && read(pfd, result, sizeof(result)) > 1) { + limit = atoi(result); + if (limit < 1) { + limit = -1; + } + } + if (pfd >= 0) { + close(pfd); + } + return limit; +} +#endif /* HAVE_FCNTL_F_SETSIG */ |