/* Maxmemory directive handling (LRU eviction and other policies). * * ---------------------------------------------------------------------------- * * Copyright (c) 2009-2016, Salvatore Sanfilippo * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Redis nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "server.h" #include "bio.h" #include "atomicvar.h" #include "script.h" #include /* ---------------------------------------------------------------------------- * Data structures * --------------------------------------------------------------------------*/ /* To improve the quality of the LRU approximation we take a set of keys * that are good candidate for eviction across performEvictions() calls. * * Entries inside the eviction pool are taken ordered by idle time, putting * greater idle times to the right (ascending order). * * When an LFU policy is used instead, a reverse frequency indication is used * instead of the idle time, so that we still evict by larger value (larger * inverse frequency means to evict keys with the least frequent accesses). * * Empty entries have the key pointer set to NULL. */ #define EVPOOL_SIZE 16 #define EVPOOL_CACHED_SDS_SIZE 255 struct evictionPoolEntry { unsigned long long idle; /* Object idle time (inverse frequency for LFU) */ sds key; /* Key name. */ sds cached; /* Cached SDS object for key name. */ int dbid; /* Key DB number. */ }; static struct evictionPoolEntry *EvictionPoolLRU; /* ---------------------------------------------------------------------------- * Implementation of eviction, aging and LRU * --------------------------------------------------------------------------*/ /* Return the LRU clock, based on the clock resolution. This is a time * in a reduced-bits format that can be used to set and check the * object->lru field of redisObject structures. */ unsigned int getLRUClock(void) { return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX; } /* This function is used to obtain the current LRU clock. * If the current resolution is lower than the frequency we refresh the * LRU clock (as it should be in production servers) we return the * precomputed value, otherwise we need to resort to a system call. */ unsigned int LRU_CLOCK(void) { unsigned int lruclock; if (1000/server.hz <= LRU_CLOCK_RESOLUTION) { lruclock = server.lruclock; } else { lruclock = getLRUClock(); } return lruclock; } /* Given an object returns the min number of milliseconds the object was never * requested, using an approximated LRU algorithm. */ unsigned long long estimateObjectIdleTime(robj *o) { unsigned long long lruclock = LRU_CLOCK(); if (lruclock >= o->lru) { return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION; } else { return (lruclock + (LRU_CLOCK_MAX - o->lru)) * LRU_CLOCK_RESOLUTION; } } /* LRU approximation algorithm * * Redis uses an approximation of the LRU algorithm that runs in constant * memory. Every time there is a key to expire, we sample N keys (with * N very small, usually in around 5) to populate a pool of best keys to * evict of M keys (the pool size is defined by EVPOOL_SIZE). * * The N keys sampled are added in the pool of good keys to expire (the one * with an old access time) if they are better than one of the current keys * in the pool. * * After the pool is populated, the best key we have in the pool is expired. * However note that we don't remove keys from the pool when they are deleted * so the pool may contain keys that no longer exist. * * When we try to evict a key, and all the entries in the pool don't exist * we populate it again. This time we'll be sure that the pool has at least * one key that can be evicted, if there is at least one key that can be * evicted in the whole database. */ /* Create a new eviction pool. */ void evictionPoolAlloc(void) { struct evictionPoolEntry *ep; int j; ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE); for (j = 0; j < EVPOOL_SIZE; j++) { ep[j].idle = 0; ep[j].key = NULL; ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE); ep[j].dbid = 0; } EvictionPoolLRU = ep; } /* This is a helper function for performEvictions(), it is used in order * to populate the evictionPool with a few entries every time we want to * expire a key. Keys with idle time bigger than one of the current * keys are added. Keys are always added if there are free entries. * * We insert keys on place in ascending order, so keys with the smaller * idle time are on the left, and keys with the higher idle time on the * right. */ void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) { int j, k, count; dictEntry *samples[server.maxmemory_samples]; count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples); for (j = 0; j < count; j++) { unsigned long long idle; sds key; robj *o; dictEntry *de; de = samples[j]; key = dictGetKey(de); /* If the dictionary we are sampling from is not the main * dictionary (but the expires one) we need to lookup the key * again in the key dictionary to obtain the value object. */ if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) { if (sampledict != keydict) de = dictFind(keydict, key); o = dictGetVal(de); } /* Calculate the idle time according to the policy. This is called * idle just because the code initially handled LRU, but is in fact * just a score where an higher score means better candidate. */ if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) { idle = estimateObjectIdleTime(o); } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { /* When we use an LRU policy, we sort the keys by idle time * so that we expire keys starting from greater idle time. * However when the policy is an LFU one, we have a frequency * estimation, and we want to evict keys with lower frequency * first. So inside the pool we put objects using the inverted * frequency subtracting the actual frequency to the maximum * frequency of 255. */ idle = 255-LFUDecrAndReturn(o); } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { /* In this case the sooner the expire the better. */ idle = ULLONG_MAX - (long)dictGetVal(de); } else { serverPanic("Unknown eviction policy in evictionPoolPopulate()"); } /* Insert the element inside the pool. * First, find the first empty bucket or the first populated * bucket that has an idle time smaller than our idle time. */ k = 0; while (k < EVPOOL_SIZE && pool[k].key && pool[k].idle < idle) k++; if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) { /* Can't insert if the element is < the worst element we have * and there are no empty buckets. */ continue; } else if (k < EVPOOL_SIZE && pool[k].key == NULL) { /* Inserting into empty position. No setup needed before insert. */ } else { /* Inserting in the middle. Now k points to the first element * greater than the element to insert. */ if (pool[EVPOOL_SIZE-1].key == NULL) { /* Free space on the right? Insert at k shifting * all the elements from k to end to the right. */ /* Save SDS before overwriting. */ sds cached = pool[EVPOOL_SIZE-1].cached; memmove(pool+k+1,pool+k, sizeof(pool[0])*(EVPOOL_SIZE-k-1)); pool[k].cached = cached; } else { /* No free space on right? Insert at k-1 */ k--; /* Shift all elements on the left of k (included) to the * left, so we discard the element with smaller idle time. */ sds cached = pool[0].cached; /* Save SDS before overwriting. */ if (pool[0].key != pool[0].cached) sdsfree(pool[0].key); memmove(pool,pool+1,sizeof(pool[0])*k); pool[k].cached = cached; } } /* Try to reuse the cached SDS string allocated in the pool entry, * because allocating and deallocating this object is costly * (according to the profiler, not my fantasy. Remember: * premature optimization bla bla bla. */ int klen = sdslen(key); if (klen > EVPOOL_CACHED_SDS_SIZE) { pool[k].key = sdsdup(key); } else { memcpy(pool[k].cached,key,klen+1); sdssetlen(pool[k].cached,klen); pool[k].key = pool[k].cached; } pool[k].idle = idle; pool[k].dbid = dbid; } } /* ---------------------------------------------------------------------------- * LFU (Least Frequently Used) implementation. * We have 24 total bits of space in each object in order to implement * an LFU (Least Frequently Used) eviction policy, since we re-use the * LRU field for this purpose. * * We split the 24 bits into two fields: * * 16 bits 8 bits * +----------------+--------+ * + Last decr time | LOG_C | * +----------------+--------+ * * LOG_C is a logarithmic counter that provides an indication of the access * frequency. However this field must also be decremented otherwise what used * to be a frequently accessed key in the past, will remain ranked like that * forever, while we want the algorithm to adapt to access pattern changes. * * So the remaining 16 bits are used in order to store the "decrement time", * a reduced-precision Unix time (we take 16 bits of the time converted * in minutes since we don't care about wrapping around) where the LOG_C * counter is halved if it has an high value, or just decremented if it * has a low value. * * New keys don't start at zero, in order to have the ability to collect * some accesses before being trashed away, so they start at LFU_INIT_VAL. * The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL * when incrementing the key, so that keys starting at LFU_INIT_VAL * (or having a smaller value) have a very high chance of being incremented * on access. * * During decrement, the value of the logarithmic counter is halved if * its current value is greater than two times the LFU_INIT_VAL, otherwise * it is just decremented by one. * --------------------------------------------------------------------------*/ /* Return the current time in minutes, just taking the least significant * 16 bits. The returned time is suitable to be stored as LDT (last decrement * time) for the LFU implementation. */ unsigned long LFUGetTimeInMinutes(void) { return (server.unixtime/60) & 65535; } /* Given an object last access time, compute the minimum number of minutes * that elapsed since the last access. Handle overflow (ldt greater than * the current 16 bits minutes time) considering the time as wrapping * exactly once. */ unsigned long LFUTimeElapsed(unsigned long ldt) { unsigned long now = LFUGetTimeInMinutes(); if (now >= ldt) return now-ldt; return 65535-ldt+now; } /* Logarithmically increment a counter. The greater is the current counter value * the less likely is that it gets really incremented. Saturate it at 255. */ uint8_t LFULogIncr(uint8_t counter) { if (counter == 255) return 255; double r = (double)rand()/RAND_MAX; double baseval = counter - LFU_INIT_VAL; if (baseval < 0) baseval = 0; double p = 1.0/(baseval*server.lfu_log_factor+1); if (r < p) counter++; return counter; } /* If the object decrement time is reached decrement the LFU counter but * do not update LFU fields of the object, we update the access time * and counter in an explicit way when the object is really accessed. * And we will times halve the counter according to the times of * elapsed time than server.lfu_decay_time. * Return the object frequency counter. * * This function is used in order to scan the dataset for the best object * to fit: as we check for the candidate, we incrementally decrement the * counter of the scanned objects if needed. */ unsigned long LFUDecrAndReturn(robj *o) { unsigned long ldt = o->lru >> 8; unsigned long counter = o->lru & 255; unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0; if (num_periods) counter = (num_periods > counter) ? 0 : counter - num_periods; return counter; } /* We don't want to count AOF buffers and slaves output buffers as * used memory: the eviction should use mostly data size, because * it can cause feedback-loop when we push DELs into them, putting * more and more DELs will make them bigger, if we count them, we * need to evict more keys, and then generate more DELs, maybe cause * massive eviction loop, even all keys are evicted. * * This function returns the sum of AOF and replication buffer. */ size_t freeMemoryGetNotCountedMemory(void) { size_t overhead = 0; /* Since all replicas and replication backlog share global replication * buffer, we think only the part of exceeding backlog size is the extra * separate consumption of replicas. * * Note that although the backlog is also initially incrementally grown * (pushing DELs consumes memory), it'll eventually stop growing and * remain constant in size, so even if its creation will cause some * eviction, it's capped, and also here to stay (no resonance effect) * * Note that, because we trim backlog incrementally in the background, * backlog size may exceeds our setting if slow replicas that reference * vast replication buffer blocks disconnect. To avoid massive eviction * loop, we don't count the delayed freed replication backlog into used * memory even if there are no replicas, i.e. we still regard this memory * as replicas'. */ if ((long long)server.repl_buffer_mem > server.repl_backlog_size) { /* We use list structure to manage replication buffer blocks, so backlog * also occupies some extra memory, we can't know exact blocks numbers, * we only get approximate size according to per block size. */ size_t extra_approx_size = (server.repl_backlog_size/PROTO_REPLY_CHUNK_BYTES + 1) * (sizeof(replBufBlock)+sizeof(listNode)); size_t counted_mem = server.repl_backlog_size + extra_approx_size; if (server.repl_buffer_mem > counted_mem) { overhead += (server.repl_buffer_mem - counted_mem); } } if (server.aof_state != AOF_OFF) { overhead += sdsAllocSize(server.aof_buf); } return overhead; } /* Get the memory status from the point of view of the maxmemory directive: * if the memory used is under the maxmemory setting then C_OK is returned. * Otherwise, if we are over the memory limit, the function returns * C_ERR. * * The function may return additional info via reference, only if the * pointers to the respective arguments is not NULL. Certain fields are * populated only when C_ERR is returned: * * 'total' total amount of bytes used. * (Populated both for C_ERR and C_OK) * * 'logical' the amount of memory used minus the slaves/AOF buffers. * (Populated when C_ERR is returned) * * 'tofree' the amount of memory that should be released * in order to return back into the memory limits. * (Populated when C_ERR is returned) * * 'level' this usually ranges from 0 to 1, and reports the amount of * memory currently used. May be > 1 if we are over the memory * limit. * (Populated both for C_ERR and C_OK) */ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) { size_t mem_reported, mem_used, mem_tofree; /* Check if we are over the memory usage limit. If we are not, no need * to subtract the slaves output buffers. We can just return ASAP. */ mem_reported = zmalloc_used_memory(); if (total) *total = mem_reported; /* We may return ASAP if there is no need to compute the level. */ if (!server.maxmemory) { if (level) *level = 0; return C_OK; } if (mem_reported <= server.maxmemory && !level) return C_OK; /* Remove the size of slaves output buffers and AOF buffer from the * count of used memory. */ mem_used = mem_reported; size_t overhead = freeMemoryGetNotCountedMemory(); mem_used = (mem_used > overhead) ? mem_used-overhead : 0; /* Compute the ratio of memory usage. */ if (level) *level = (float)mem_used / (float)server.maxmemory; if (mem_reported <= server.maxmemory) return C_OK; /* Check if we are still over the memory limit. */ if (mem_used <= server.maxmemory) return C_OK; /* Compute how much memory we need to free. */ mem_tofree = mem_used - server.maxmemory; if (logical) *logical = mem_used; if (tofree) *tofree = mem_tofree; return C_ERR; } /* Return 1 if used memory is more than maxmemory after allocating more memory, * return 0 if not. Redis may reject user's requests or evict some keys if used * memory exceeds maxmemory, especially, when we allocate huge memory at once. */ int overMaxmemoryAfterAlloc(size_t moremem) { if (!server.maxmemory) return 0; /* No limit. */ /* Check quickly. */ size_t mem_used = zmalloc_used_memory(); if (mem_used + moremem <= server.maxmemory) return 0; size_t overhead = freeMemoryGetNotCountedMemory(); mem_used = (mem_used > overhead) ? mem_used - overhead : 0; return mem_used + moremem > server.maxmemory; } /* The evictionTimeProc is started when "maxmemory" has been breached and * could not immediately be resolved. This will spin the event loop with short * eviction cycles until the "maxmemory" condition has resolved or there are no * more evictable items. */ static int isEvictionProcRunning = 0; static int evictionTimeProc( struct aeEventLoop *eventLoop, long long id, void *clientData) { UNUSED(eventLoop); UNUSED(id); UNUSED(clientData); if (performEvictions() == EVICT_RUNNING) return 0; /* keep evicting */ /* For EVICT_OK - things are good, no need to keep evicting. * For EVICT_FAIL - there is nothing left to evict. */ isEvictionProcRunning = 0; return AE_NOMORE; } void startEvictionTimeProc(void) { if (!isEvictionProcRunning) { isEvictionProcRunning = 1; aeCreateTimeEvent(server.el, 0, evictionTimeProc, NULL, NULL); } } /* Check if it's safe to perform evictions. * Returns 1 if evictions can be performed * Returns 0 if eviction processing should be skipped */ static int isSafeToPerformEvictions(void) { /* - There must be no script in timeout condition. * - Nor we are loading data right now. */ if (isInsideYieldingLongCommand() || server.loading) return 0; /* By default replicas should ignore maxmemory * and just be masters exact copies. */ if (server.masterhost && server.repl_slave_ignore_maxmemory) return 0; /* If 'evict' action is paused, for whatever reason, then return false */ if (isPausedActionsWithUpdate(PAUSE_ACTION_EVICT)) return 0; return 1; } /* Algorithm for converting tenacity (0-100) to a time limit. */ static unsigned long evictionTimeLimitUs(void) { serverAssert(server.maxmemory_eviction_tenacity >= 0); serverAssert(server.maxmemory_eviction_tenacity <= 100); if (server.maxmemory_eviction_tenacity <= 10) { /* A linear progression from 0..500us */ return 50uL * server.maxmemory_eviction_tenacity; } if (server.maxmemory_eviction_tenacity < 100) { /* A 15% geometric progression, resulting in a limit of ~2 min at tenacity==99 */ return (unsigned long)(500.0 * pow(1.15, server.maxmemory_eviction_tenacity - 10.0)); } return ULONG_MAX; /* No limit to eviction time */ } /* Check that memory usage is within the current "maxmemory" limit. If over * "maxmemory", attempt to free memory by evicting data (if it's safe to do so). * * It's possible for Redis to suddenly be significantly over the "maxmemory" * setting. This can happen if there is a large allocation (like a hash table * resize) or even if the "maxmemory" setting is manually adjusted. Because of * this, it's important to evict for a managed period of time - otherwise Redis * would become unresponsive while evicting. * * The goal of this function is to improve the memory situation - not to * immediately resolve it. In the case that some items have been evicted but * the "maxmemory" limit has not been achieved, an aeTimeProc will be started * which will continue to evict items until memory limits are achieved or * nothing more is evictable. * * This should be called before execution of commands. If EVICT_FAIL is * returned, commands which will result in increased memory usage should be * rejected. * * Returns: * EVICT_OK - memory is OK or it's not possible to perform evictions now * EVICT_RUNNING - memory is over the limit, but eviction is still processing * EVICT_FAIL - memory is over the limit, and there's nothing to evict * */ int performEvictions(void) { /* Note, we don't goto update_metrics here because this check skips eviction * as if it wasn't triggered. it's a fake EVICT_OK. */ if (!isSafeToPerformEvictions()) return EVICT_OK; int keys_freed = 0; size_t mem_reported, mem_tofree; long long mem_freed; /* May be negative */ mstime_t latency, eviction_latency; long long delta; int slaves = listLength(server.slaves); int result = EVICT_FAIL; if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK) { result = EVICT_OK; goto update_metrics; } if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) { result = EVICT_FAIL; /* We need to free memory, but policy forbids. */ goto update_metrics; } unsigned long eviction_time_limit_us = evictionTimeLimitUs(); mem_freed = 0; latencyStartMonitor(latency); monotime evictionTimer; elapsedStart(&evictionTimer); /* Try to smoke-out bugs (server.also_propagate should be empty here) */ serverAssert(server.also_propagate.numops == 0); while (mem_freed < (long long)mem_tofree) { int j, k, i; static unsigned int next_db = 0; sds bestkey = NULL; int bestdbid; redisDb *db; dict *dict; dictEntry *de; if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) || server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { struct evictionPoolEntry *pool = EvictionPoolLRU; while (bestkey == NULL) { unsigned long total_keys = 0, keys; /* We don't want to make local-db choices when expiring keys, * so to start populate the eviction pool sampling keys from * every DB. */ for (i = 0; i < server.dbnum; i++) { db = server.db+i; dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ? db->dict : db->expires; if ((keys = dictSize(dict)) != 0) { evictionPoolPopulate(i, dict, db->dict, pool); total_keys += keys; } } if (!total_keys) break; /* No keys to evict. */ /* Go backward from best to worst element to evict. */ for (k = EVPOOL_SIZE-1; k >= 0; k--) { if (pool[k].key == NULL) continue; bestdbid = pool[k].dbid; if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) { de = dictFind(server.db[bestdbid].dict, pool[k].key); } else { de = dictFind(server.db[bestdbid].expires, pool[k].key); } /* Remove the entry from the pool. */ if (pool[k].key != pool[k].cached) sdsfree(pool[k].key); pool[k].key = NULL; pool[k].idle = 0; /* If the key exists, is our pick. Otherwise it is * a ghost and we need to try the next element. */ if (de) { bestkey = dictGetKey(de); break; } else { /* Ghost... Iterate again. */ } } } } /* volatile-random and allkeys-random policy */ else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) { /* When evicting a random key, we try to evict a key for * each DB, so we use the static 'next_db' variable to * incrementally visit all DBs. */ for (i = 0; i < server.dbnum; i++) { j = (++next_db) % server.dbnum; db = server.db+j; dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ? db->dict : db->expires; if (dictSize(dict) != 0) { de = dictGetRandomKey(dict); bestkey = dictGetKey(de); bestdbid = j; break; } } } /* Finally remove the selected key. */ if (bestkey) { db = server.db+bestdbid; robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); /* We compute the amount of memory freed by db*Delete() alone. * It is possible that actually the memory needed to propagate * the DEL in AOF and replication link is greater than the one * we are freeing removing the key, but we can't account for * that otherwise we would never exit the loop. * * Same for CSC invalidation messages generated by signalModifiedKey. * * AOF and Output buffer memory will be freed eventually so * we only care about memory used by the key space. */ enterExecutionUnit(1, 0); delta = (long long) zmalloc_used_memory(); latencyStartMonitor(eviction_latency); dbGenericDelete(db,keyobj,server.lazyfree_lazy_eviction,DB_FLAG_KEY_EVICTED); latencyEndMonitor(eviction_latency); latencyAddSampleIfNeeded("eviction-del",eviction_latency); delta -= (long long) zmalloc_used_memory(); mem_freed += delta; server.stat_evictedkeys++; signalModifiedKey(NULL,db,keyobj); notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", keyobj, db->id); propagateDeletion(db,keyobj,server.lazyfree_lazy_eviction); exitExecutionUnit(); postExecutionUnitOperations(); decrRefCount(keyobj); keys_freed++; if (keys_freed % 16 == 0) { /* When the memory to free starts to be big enough, we may * start spending so much time here that is impossible to * deliver data to the replicas fast enough, so we force the * transmission here inside the loop. */ if (slaves) flushSlavesOutputBuffers(); /* Normally our stop condition is the ability to release * a fixed, pre-computed amount of memory. However when we * are deleting objects in another thread, it's better to * check, from time to time, if we already reached our target * memory, since the "mem_freed" amount is computed only * across the dbAsyncDelete() call, while the thread can * release the memory all the time. */ if (server.lazyfree_lazy_eviction) { if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { break; } } /* After some time, exit the loop early - even if memory limit * hasn't been reached. If we suddenly need to free a lot of * memory, don't want to spend too much time here. */ if (elapsedUs(evictionTimer) > eviction_time_limit_us) { // We still need to free memory - start eviction timer proc startEvictionTimeProc(); break; } } } else { goto cant_free; /* nothing to free... */ } } /* at this point, the memory is OK, or we have reached the time limit */ result = (isEvictionProcRunning) ? EVICT_RUNNING : EVICT_OK; cant_free: if (result == EVICT_FAIL) { /* At this point, we have run out of evictable items. It's possible * that some items are being freed in the lazyfree thread. Perform a * short wait here if such jobs exist, but don't wait long. */ mstime_t lazyfree_latency; latencyStartMonitor(lazyfree_latency); while (bioPendingJobsOfType(BIO_LAZY_FREE) && elapsedUs(evictionTimer) < eviction_time_limit_us) { if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { result = EVICT_OK; break; } usleep(eviction_time_limit_us < 1000 ? eviction_time_limit_us : 1000); } latencyEndMonitor(lazyfree_latency); latencyAddSampleIfNeeded("eviction-lazyfree",lazyfree_latency); } latencyEndMonitor(latency); latencyAddSampleIfNeeded("eviction-cycle",latency); update_metrics: if (result == EVICT_RUNNING || result == EVICT_FAIL) { if (server.stat_last_eviction_exceeded_time == 0) elapsedStart(&server.stat_last_eviction_exceeded_time); } else if (result == EVICT_OK) { if (server.stat_last_eviction_exceeded_time != 0) { server.stat_total_eviction_exceeded_time += elapsedUs(server.stat_last_eviction_exceeded_time); server.stat_last_eviction_exceeded_time = 0; } } return result; }