/*--------------------------------------------------------------- * Copyright (c) 1999,2000,2001,2002,2003 * The Board of Trustees of the University of Illinois * All Rights Reserved. *--------------------------------------------------------------- * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software (Iperf) and associated * documentation files (the "Software"), to deal in the Software * without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, * sublicense, and/or sell copies of the Software, and to permit * persons to whom the Software is furnished to do * so, subject to the following conditions: * * * Redistributions of source code must retain the above * copyright notice, this list of conditions and * the following disclaimers. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimers in the documentation and/or other materials * provided with the distribution. * * * Neither the names of the University of Illinois, NCSA, * nor the names of its contributors may be used to endorse * or promote products derived from this Software without * specific prior written permission. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE CONTIBUTORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * ________________________________________________________________ * National Laboratory for Applied Network Research * National Center for Supercomputing Applications * University of Illinois at Urbana-Champaign * http://www.ncsa.uiuc.edu * ________________________________________________________________ * * delay.c * by Mark Gates * updates * by Robert J. McMahon * ------------------------------------------------------------------- * attempts at accurate microsecond delays * ------------------------------------------------------------------- */ #include "headers.h" #include "util.h" #include "delay.h" #include "Thread.h" #include #define MILLION 1000000 #define BILLION 1000000000 /* ------------------------------------------------------------------- * A micro-second delay function * o Use a busy loop or nanosleep * * Some notes: * o clock nanosleep with a relative is preferred (see man page for why) * o clock_gettime() (if available) is preferred over gettimeofday() * as it give nanosecond resolution and should be more efficient. * It also supports CLOCK_MONOTONIC and CLOCK_MONOTONIC_RAW * though CLOCK_REALTIME is being used by the code. * o This code does not use Timestamp object, as the goal of these * functions is accurate delays (vs accurate timestamps.) * o The syscalls such as nanosleep guarantee at least the request time * and can and will delay longer, particularly due to things like context * switching, causing the delay to lose accuracy * o Kalman filtering is used to predict delay error which in turn * is used to adjust the delay, hopefully mitigating the above. * Note: This can cause the delay to return faster than the request, * i.e. the *at least* guarantee is not preserved for the kalman * adjusted delay calls. * o Remember, the Client is keeping a running average delay for the * thread so errors in delay will also be adjusted there. (Assuming * it's possible. It's not really possible at top line link rates * because lost time can't be made up for by speeding up the transmits. * Hence, don't lose time with delay calls which error on the side of * taking too long. Kalman should help much here.) * * POSIX nanosleep(). This allows a higher timing resolution * (under Linux e.g. it uses hrtimers), does not affect any signals, * and will use up remaining time when interrupted. * ------------------------------------------------------------------- */ void delay_loop(unsigned long usec) { #ifdef HAVE_CLOCK_NANOSLEEP { struct timespec res; res.tv_sec = usec/MILLION; res.tv_nsec = (usec * 1000) % BILLION; #ifndef WIN32 clock_nanosleep(CLOCK_MONOTONIC, 0, &res, NULL); #else clock_nanosleep(0, 0, &res, NULL); #endif } #else #ifdef HAVE_KALMAN delay_kalman(usec); #else #ifdef HAVE_NANOSLEEP delay_nanosleep(usec); #else delay_busyloop(usec); #endif #endif #endif } int clock_usleep (struct timeval *request) { int rc = 0; #if HAVE_THREAD_DEBUG thread_debug("Thread called clock_usleep() until %ld.%ld", request->tv_sec, request->tv_usec); #endif #ifdef HAVE_CLOCK_NANOSLEEP struct timespec tmp; tmp.tv_sec = request->tv_sec; tmp.tv_nsec = request->tv_usec * 1000; // Cygwin systems have an issue with CLOCK_MONOTONIC #if defined(CLOCK_MONOTONIC) && !defined(WIN32) rc = clock_nanosleep(CLOCK_MONOTONIC, 0, &tmp, NULL); #else rc = clock_nanosleep(0, 0, &tmp, NULL); #endif if (rc) { fprintf(stderr, "failed clock_nanosleep()=%d\n", rc); } #else struct timeval now; struct timeval next = *request; TimeGetNow(now); double delta_usecs; if ((delta_usecs = TimeDifference(next, now)) > 0.0) { delay_loop(delta_usecs); } #endif return rc; } int clock_usleep_abstime (struct timeval *request) { int rc = 0; #if defined(HAVE_CLOCK_NANOSLEEP) && defined(TIMER_ABSTIME) && !defined(WIN32) struct timespec tmp; tmp.tv_sec = request->tv_sec; tmp.tv_nsec = request->tv_usec * 1000; rc = clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &tmp, NULL); if (rc) { fprintf(stderr, "failed clock_nanosleep()=%d\n", rc); } #else struct timeval now; struct timeval next = *request; #ifdef HAVE_CLOCK_GETTIME struct timespec t1; clock_gettime(CLOCK_REALTIME, &t1); now.tv_sec = t1.tv_sec; now.tv_usec = t1.tv_nsec / 1000; #else gettimeofday(&now, NULL); #endif double delta_usecs; if ((delta_usecs = (1e6 * TimeDifference(next, now))) > 0.0) { delay_loop(delta_usecs); } #endif return rc; } #ifdef HAVE_NANOSLEEP // Can use the nanosleep syscall suspending the thread void delay_nanosleep (unsigned long usec) { struct timespec requested, remaining; requested.tv_sec = 0; requested.tv_nsec = usec * 1000L; // Note, signals will cause the nanosleep // to return early. That's fine. nanosleep(&requested, &remaining); } #endif #if defined (HAVE_NANOSLEEP) || defined (HAVE_CLOCK_GETTIME) static void timespec_add_ulong (struct timespec *tv0, unsigned long value) { tv0->tv_sec += (value / BILLION); tv0->tv_nsec += (value % BILLION); if (tv0->tv_nsec >= BILLION) { tv0->tv_sec++; tv0->tv_nsec -= BILLION; } } #endif #ifdef HAVE_KALMAN // Kalman versions attempt to support delay request // accuracy over a minimum guaranteed delay by // prediciting the delay error. This is // the basic recursive algorithm. static void kalman_update (struct kalman_state *state, double measurement) { //prediction update state->p = state->p + state->q; //measurement update state->k = state->p / (state->p + state->r); state->x = state->x + (state->k * (measurement - state->x)); state->p = (1 - state->k) * state->p; } #endif #ifdef HAVE_CLOCK_GETTIME // Delay calls for systems with clock_gettime // Working units are nanoseconds and structures are timespec static void timespec_add_double (struct timespec *tv0, double value) { tv0->tv_nsec += (unsigned long) value; if (tv0->tv_nsec >= BILLION) { tv0->tv_sec++; tv0->tv_nsec -= BILLION; } } // tv1 assumed greater than tv0 static double timespec_diff (struct timespec tv1, struct timespec tv0) { double result; if (tv1.tv_nsec < tv0.tv_nsec) { tv1.tv_nsec += BILLION; tv1.tv_sec--; } result = (double) (((tv1.tv_sec - tv0.tv_sec) * BILLION) + (tv1.tv_nsec - tv0.tv_nsec)); return result; } static void timespec_add( struct timespec *tv0, struct timespec *tv1) { tv0->tv_sec += tv1->tv_sec; tv0->tv_nsec += tv1->tv_nsec; if ( tv0->tv_nsec >= BILLION ) { tv0->tv_nsec -= BILLION; tv0->tv_sec++; } } static inline int timespec_greaterthan(struct timespec tv1, struct timespec tv0) { if (tv1.tv_sec > tv0.tv_sec || \ ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_nsec > tv0.tv_nsec))) { return 1; } else { return 0; } } // A cpu busy loop for systems with clock_gettime void delay_busyloop (unsigned long usec) { struct timespec t1, t2; clock_gettime(CLOCK_REALTIME, &t1); timespec_add_ulong(&t1, (usec * 1000L)); while (1) { clock_gettime(CLOCK_REALTIME, &t2); if (timespec_greaterthan(t2, t1)) break; } } // Kalman routines for systems with clock_gettime #ifdef HAVE_KALMAN // Request units is microseconds // Adjust units is nanoseconds void delay_kalman (unsigned long usec) { struct timespec t1, t2, finishtime, requested={0,0}, remaining; double nsec_adjusted, err; static struct kalman_state kalmanerr={ 0.00001, //q process noise covariance 0.1, //r measurement noise covariance 0.0, //x value, error predictio (units nanoseconds) 1, //p estimation error covariance 0.75 //k kalman gain }; // Get the current clock clock_gettime(CLOCK_REALTIME, &t1); // Perform the kalman adjust per the predicted delay error nsec_adjusted = (usec * 1000.0) - kalmanerr.x; // Set a timespec to be used by the nanosleep // as well as for the finished time calculation timespec_add_double(&requested, nsec_adjusted); // Set the finish time in timespec format finishtime = t1; timespec_add(&finishtime, &requested); # ifdef HAVE_NANOSLEEP // Don't call nanosleep for values less than 10 microseconds // as the syscall is too expensive. Let the busy loop // provide the delay for times under that. if (nsec_adjusted > 10000) { nanosleep(&requested, &remaining); } # endif while (1) { clock_gettime(CLOCK_REALTIME, &t2); if (timespec_greaterthan(t2, finishtime)) break; } // Compute the delay error in units of nanoseconds // and cast to type double err = (timespec_diff(t2, t1) - (usec * 1000)); // printf("req: %ld adj: %f err: %.5f (ns)\n", usec, nsec_adjusted, kalmanerr.x); kalman_update(&kalmanerr, err); } #endif // HAVE_KALMAN #else // Sadly, these systems must use the not so efficient gettimeofday() // and working units are microseconds, struct is timeval static void timeval_add_ulong (struct timeval *tv0, unsigned long value) { tv0->tv_usec += value; if (tv0->tv_usec >= MILLION) { tv0->tv_sec++; tv0->tv_usec -= MILLION; } } static inline int timeval_greaterthan(struct timeval tv1, struct timeval tv0) { if (tv1.tv_sec > tv0.tv_sec || \ ((tv0.tv_sec == tv1.tv_sec) && (tv1.tv_usec > tv0.tv_usec))) { return 1; } else { return 0; } } // tv1 assumed greater than tv0 static double timeval_diff (struct timeval tv1, struct timeval tv0) { double result; if (tv1.tv_usec < tv0.tv_usec) { tv1.tv_usec += MILLION; tv1.tv_sec--; } result = (double) (((tv1.tv_sec - tv0.tv_sec) * MILLION) + (tv1.tv_usec - tv0.tv_usec)); return result; } void delay_busyloop (unsigned long usec) { struct timeval t1, t2; gettimeofday( &t1, NULL ); timeval_add_ulong(&t1, usec); while (1) { gettimeofday( &t2, NULL ); if (timeval_greaterthan(t2, t1)) break; } } #ifdef HAVE_KALMAN // Request units is microseconds // Adjust units is microseconds void delay_kalman (unsigned long usec) { struct timeval t1, t2, finishtime; long usec_adjusted; double err; static struct kalman_state kalmanerr={ 0.00001, //q process noise covariance 0.1, //r measurement noise covariance 0.0, //x value, error predictio (units nanoseconds) 1, //p estimation error covariance 0.25 //k kalman gain }; // Get the current clock gettimeofday( &t1, NULL ); // Perform the kalman adjust per the predicted delay error if (kalmanerr.x > 0) { usec_adjusted = usec - (long) floor(kalmanerr.x); if (usec_adjusted < 0) usec_adjusted = 0; } else usec_adjusted = usec + (long) floor(kalmanerr.x); // Set the finishtime finishtime = t1; timeval_add_ulong(&finishtime, usec_adjusted); # ifdef HAVE_NANOSLEEP // Don't call nanosleep for values less than 10 microseconds // as the syscall is too expensive. Let the busy loop // provide the delay for times under that. if (usec_adjusted > 10) { struct timespec requested={0,0}, remaining; timespec_add_ulong(&requested, (usec_adjusted * 1000)); nanosleep(&requested, &remaining); } # endif while (1) { gettimeofday(&t2, NULL ); if (timeval_greaterthan(t2, finishtime)) break; } // Compute the delay error in units of microseconds // and cast to type double err = (double)(timeval_diff(t2, t1) - usec); // printf("req: %ld adj: %ld err: %.5f (us)\n", usec, usec_adjusted, kalmanerr.x); kalman_update(&kalmanerr, err); } #endif // Kalman #endif