61 files changed, 11281 insertions, 0 deletions
diff --git a/src/dmclock/.gitignore b/src/dmclock/.gitignore
new file mode 100644
index 000000000..db9226e3c
--- /dev/null
+++ b/src/dmclock/.gitignore
@@ -0,0 +1,5 @@
+*~
+*.dSYM
+*.o
+build*
+cscope.*
diff --git a/src/dmclock/.travis.yml b/src/dmclock/.travis.yml
new file mode 100644
index 000000000..8fb8f898e
--- /dev/null
+++ b/src/dmclock/.travis.yml
@@ -0,0 +1,29 @@
+language: cpp
+cache: ccache
+dist: xenial
+sudo: false
+branches:
+  only:
+    - master
+os:
+  - linux
+compiler:
+  - clang
+  - gcc
+addons:
+  apt:
+    sources:
+      - ubuntu-toolchain-r-test
+    packages:
+      - cmake
+      - libgtest-dev
+      - libboost-dev
+before_script:
+  - mkdir build
+  - cd build
+script:
+  - cmake ..
+  - cmake --build . -- -j2 && ctest -V -j2
+env:
+  global:
+    - LANG="en_US.UTF-8"
diff --git a/src/dmclock/CMakeLists.txt b/src/dmclock/CMakeLists.txt
new file mode 100644
index 000000000..049ea5e31
--- /dev/null
+++ b/src/dmclock/CMakeLists.txt
@@ -0,0 +1,44 @@
+cmake_minimum_required(VERSION 3.5.1)
+
+project(dmclock CXX)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules")
+
+if (NOT(BOOST_FOUND))
+  find_package(Boost REQUIRED)
+endif()
+
+find_package(Threads)
+
+if(CMAKE_CXX_STANDARD OR CMAKE_CXX_FLAGS MATCHES "-std=(c|gnu)\\+\\+")
+  # use existing settings if available
+else()
+  set(CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()
+
+add_subdirectory(src)
+
+# Determine if dmclock is built as a subproject (using add_subdirectory)
+# or if it is the master project.
+set(MASTER_PROJECT FALSE)
+if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
+  set(MASTER_PROJECT TRUE)
+endif()
+
+option(dmclock_TEST "Generate test targets" ${MASTER_PROJECT})
+if(dmclock_TEST)
+  if (NOT(TARGET gtest AND TARGET gtest_main))
+    if (NOT GTEST_FOUND)
+      find_package(GTest QUIET)
+      if (NOT GTEST_FOUND)
+        include(BuildGTest)
+      endif()
+    endif()
+  endif()
+
+  enable_testing()
+  add_subdirectory(test)
+  add_subdirectory(support/test)
+  add_subdirectory(sim)
+endif()
diff --git a/src/dmclock/COPYING b/src/dmclock/COPYING
new file mode 100644
index 000000000..6f1dfff1f
--- /dev/null
+++ b/src/dmclock/COPYING
@@ -0,0 +1,3 @@
+Files: *
+Copyright: (C) 2016-2018 by Red Hat Inc.
+License: LGPL2.1 (see COPYING-LGPL2.1)
diff --git a/src/dmclock/COPYING-LGPL2.1 b/src/dmclock/COPYING-LGPL2.1
new file mode 100644
index 000000000..5ab7695ab
--- /dev/null
+++ b/src/dmclock/COPYING-LGPL2.1
@@ -0,0 +1,504 @@
+		  GNU LESSER GENERAL PUBLIC LICENSE
+		       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+		  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+  
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+			    NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/src/dmclock/README.md b/src/dmclock/README.md
new file mode 100644
index 000000000..b046fd3e3
--- /dev/null
+++ b/src/dmclock/README.md
@@ -0,0 +1,45 @@
+# dmclock
+
+This repository contains C++ 11 code that implements the dmclock
+distributed quality of service algorithm. See __mClock: Handling
+Throughput Variability for Hypervisor IO Scheduling__ by Gulati,
+Merchant, and Varman for a description of the algorithm.
+
+## Bugs and features
+
+There is a dmclock project under https://tracker.ceph.com/ through
+which bugs can be reported and featuers requested.
+
+## Running cmake
+
+When running cmake, set the build type with either:
+
+    -DCMAKE_BUILD_TYPE=Debug
+    -DCMAKE_BUILD_TYPE=Release
+
+To turn on profiling, run cmake with an additional:
+
+    -DPROFILE=yes
+
+## Running make
+
+### Building the dmclock library
+
+The `make` command builds a library libdmclock.a. That plus the header
+files in the src directory allow one to use the implementation in
+their code.
+
+### Building unit tests
+
+The `make dmclock-tests` command builds unit tests.
+
+### Building simulations
+
+The `make dmclock-sims` command builds two simulations -- *dmc_sim*
+and *ssched_sim* -- which incorporate, respectively, the dmclock
+priority queue or a very simple scheduler for comparison. Other
+priority queue implementations could be added in the future.
+
+## dmclock API
+
+To be written....
diff --git a/src/dmclock/benchmark/README.md b/src/dmclock/benchmark/README.md
new file mode 100644
index 000000000..d945e986f
--- /dev/null
+++ b/src/dmclock/benchmark/README.md
@@ -0,0 +1,42 @@
+# dmclock benchmarking
+
+**IMPORTANT**: now that K_WAY_HEAP is no longer allowed to have the
+value 1, the shell and Python scripts that generate the PDFs no longer
+work exactly correctly. Some effort to debug is necessary.
+
+This directory contains scripts to evaluate effects of different
+branching-factors (k=1 to k=11) in the IndirectIntrusiveHeap
+data-structure. IndirectIntrusiveHeap is now a k-way heap, so finding
+an ideal value for k (i.e., k=2 or k=3) for a particular work-load is
+important. Also, it is well-documented that the right choice of
+k-value improves the caching behaviour [Syed -- citation needed
+here]. As a result, the overall performance of an application using
+k-way heap increases significantly [Syed -- citation needed here].
+
+A rule of thumb is the following:
+	if number of elements are <= 6, use k=1
+	otherwise, use k=3.
+
+## Prerequisites
+
+requires python 2.7, gnuplot, and awk.
+  
+## Running benchmark
+
+./run.sh [name_of_the_output] [k_way] [repeat] # [Syed -- last two command line args do not work]
+
+The "run.sh" script looks for config files in the "configs" directory,
+and the final output is generated as
+"name_of_the_output.pdf". Internally, "run.sh" calls other scripts
+such as data_gen.sh, data_parser.py, and plot_gen.sh.
+
+## Modifying parameters
+
+To modify k-value and/or the amount of times each simulation is
+repeated, modify the following two variables in "run.sh" file:
+
+    k_way=[your_value]
+    repeat=[your_value]
+
+For example, k_way=3 means, the benchmark will compare simulations
+using 1-way, 2-way, and 3-way heaps.
diff --git a/src/dmclock/benchmark/configs/dmc_sim_100_100.conf b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf
new file mode 100644
index 000000000..c93d4c71f
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_100_100.conf
@@ -0,0 +1,31 @@
+[global]
+server_groups = 1
+client_groups = 2
+server_random_selection = true
+server_soft_limit = true
+
+[server.0]
+server_count = 100
+server_iops  = 160
+
+[client.0]
+client_count = 99
+client_wait = 0
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 10
+client_total_ops = 10000
+client_server_select_range = 100
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 100.0
+client_limit = 0.0
+client_weight = 1.0
diff --git a/src/dmclock/benchmark/configs/dmc_sim_8_6.conf b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf
new file mode 100644
index 000000000..28aeb401d
--- /dev/null
+++ b/src/dmclock/benchmark/configs/dmc_sim_8_6.conf
@@ -0,0 +1,43 @@
+[global]
+server_groups = 1
+client_groups = 3
+server_random_selection = true
+server_soft_limit = true
+
+[client.0]
+client_count = 2
+client_wait = 0
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 2
+client_wait = 5
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 20.0
+client_limit = 40.0
+client_weight = 1.0
+
+[client.2]
+client_count = 2
+client_wait = 10
+client_total_ops = 1000
+client_server_select_range = 8
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 50.0
+client_weight = 2.0
+
+
+[server.0]
+server_count = 8
+server_iops  = 160
diff --git a/src/dmclock/benchmark/data_gen.sh b/src/dmclock/benchmark/data_gen.sh
new file mode 100755
index 000000000..80a77bd9a
--- /dev/null
+++ b/src/dmclock/benchmark/data_gen.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+config_dir="configs"
+repeat=2 #5
+
+# parameter check -- output_file name
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+  k_way="$2"
+else
+  echo "Please provide the maximum K_WAY value"
+  exit
+fi
+
+# parameter check --repeat
+if [ "$3" != "" ]; then
+  repeat="$3"
+fi
+
+echo "k-way:$k_way, num_repeat:$repeat"
+
+# create simulators in different directories 
+k=2
+while [ $k -le $k_way ]
+do
+  mkdir "build_$k"
+  cd "build_$k"
+  rm -rf *
+  cmake -DCMAKE_BUILD_TYPE=Release -DK_WAY_HEAP=$k ../../.
+  make dmclock-sims
+  cd ..
+  
+  k=$(( $k + 1 ))
+done
+
+# run simulators 
+echo '' > $output_file
+for config in "$config_dir"/*.conf
+do
+  k=2
+  while [ $k -le $k_way ]
+  do
+    cd "build_$k"
+    
+    # repeat same experiment
+    i=0
+    while [ $i -lt $repeat ]
+    do  
+      i=$(( $i + 1 ))
+      
+      # clear cache first
+      sync
+      #sudo sh -c 'echo 1 >/proc/sys/vm/drop_caches'
+      #sudo sh -c 'echo 2 >/proc/sys/vm/drop_caches'
+      #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'
+
+      # run with heap
+      msg="file_name:$k:$config"
+      echo $msg >> ../$output_file
+      echo "running $msg ..."
+      ./sim/dmc_sim -c ../$config | awk '(/average/)' >> ../$output_file
+    done # end repeat
+    cd ..
+    k=$(( $k + 1 ))
+  done # end k_way
+done # end config
+
diff --git a/src/dmclock/benchmark/data_parser.py b/src/dmclock/benchmark/data_parser.py
new file mode 100755
index 000000000..c90d85fd9
--- /dev/null
+++ b/src/dmclock/benchmark/data_parser.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+
+class DataPoint:  
+  def __init__(self):                
+    self.nserver = 0;
+    self.nclient = 0;
+    self.heap_type = 0;  
+    self.total_time_to_add_req = 0;
+    self.total_time_to_complete_req = 0;
+    self.config = ''
+
+  def set_name(self, config, heap_type):
+    self.config = config;
+    self.heap_type = heap_type
+
+  def get_conig(self):
+    import re
+    return re.split(r"/|\.", self.config)[1]
+
+  def __str__(self):
+    return "s:%d, c:%d,h:%d,config:%s"%(self.nserver, self.nclient, self.heap_type, self.config);
+# end DataPoint
+
+
+def isFloat(elem):        
+ try:
+  float(elem)
+  return True
+ except ValueError:
+  return False
+#end isFloat
+
+
+def parse_config_params(fname):
+  nclient = 0;
+  nserver = 0;
+  # read config file property 
+  with open(fname, 'r') as f:
+    for line in f:
+      line = line.strip('\n \t')
+      if not line: continue;
+      if line.startswith("client_count"):
+        nclient += int(line.split('=')[-1]);
+      if line.startswith("server_count"): 
+        nserver += int(line.split('=')[-1]);
+  # end of file
+  return [nserver, nclient];
+# parse_config_params
+
+def make_aggregate_data_point(dps, config, heap_type): 
+    # create new aggregate point
+    dp = DataPoint();
+    # set set and k_way_heap property
+    dp.set_name(config, heap_type); 
+    
+    num_run = 0
+    for _dp in dps:
+      if _dp.config == config and _dp.heap_type == heap_type:
+        # print _dp, config, heap_type
+        dp.nserver =_dp.nserver
+        dp.nclient = _dp.nclient
+        num_run                       += 1
+        dp.total_time_to_add_req      += _dp.total_time_to_add_req
+        dp.total_time_to_complete_req += _dp.total_time_to_complete_req 
+        
+    # average
+    dp.total_time_to_add_req      /= num_run;
+    dp.total_time_to_complete_req /= num_run
+    #print dp
+    return dp;
+
+def parse_data_points(filename):
+  dps = []; #data-points
+  dp = None;
+  state = 0;
+  configs = {}
+  k_ways  = {}
+  
+  with open(filename, 'r') as f:
+    for line in f:
+      line = line.strip('\n \t')
+      if not line: continue;
+      
+      # file_name:1:configs/dmc_sim_8_6.conf
+      if line.startswith("file_name"):      
+        if dp:
+          dps.append(dp);
+          state = 0;
+         
+        # new data-point 
+        dp = DataPoint();
+        parts = line.split(':')
+        fname = parts[-1];        
+        dp.heap_type = int(parts[1]);
+        if dp.heap_type not in k_ways:
+          k_ways[dp.heap_type] = 1;
+        
+        # add to the dictionary
+        configs[fname] = 1;
+        
+        dp.config = fname;
+        params = parse_config_params(fname)      
+        dp.nserver = params[0];
+        dp.nclient = params[-1];
+         
+      elif line.startswith("average"):	# take last 2 averages
+        r = [float(s) for s in line.split(' ') if isFloat(s)]
+        state +=1;
+        #print r, dp #if isFloat(s)
+        if state == 3:
+          dp.total_time_to_add_req = r[0]
+        elif state == 4:
+          dp.total_time_to_complete_req = r[0]
+        else: pass
+
+      else: 
+        pass;    
+  # final entry
+  dps.append(dp) 
+  
+  # compute average of multiple runs
+  dps_avg = []
+  for config in configs:
+    data_per_config = []
+    for k in k_ways:
+      aggr_dp = make_aggregate_data_point(dps, config , k);
+      data_per_config.append(aggr_dp);
+    dps_avg.append(data_per_config);
+  # end for
+  return dps_avg;
+# end parse_data_points
+
+
+def create_header(num_cols):
+  fields = ['nserver_nclient(config_file)','add_req', 'complete_req'];
+  header = fields[0]
+  #write add_req_{1, ...}
+  for i in range(num_cols):
+    header = '%s %s_%i'%(header, fields[1], i+2)
+  #write complete_req_{1, ...}
+  for i in range(num_cols):
+    header = '%s %s_%i'%(header, fields[2], i+2)
+  # new-line
+  header = '%s\n'%(header)
+  return header
+# end create_header
+
+
+def create_data_line(aggr_dp):
+  # get common info
+  dp = aggr_dp[0]
+  data_line = "s:%d_c:%d "%(dp.nserver, dp.nclient);
+  # get the point-count
+  num_cols = len(aggr_dp);
+  # write add_req_{1, ...}
+  for i in range(num_cols):
+    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_add_req)
+  # write complete_req_{1, ...}
+  for i in range(num_cols):
+    data_line = '%s %f'%(data_line, aggr_dp[i].total_time_to_complete_req)
+  # new-line
+  data_line = '%s\n'%(data_line)
+  return data_line
+# end create_data_line
+
+    
+def make_data(filename):
+  # write the aggregated point in space separated file  
+  dps = parse_data_points(filename);
+  if not len(dps) : return
+  print "total points: ", len(dps)
+  # open file
+  with open('%s.dat'%(filename), 'w+') as f:
+    # write header
+    f.write(create_header(len(dps[0])));
+    # write data-line
+    for aggr_dp in dps:
+    	f.write(create_data_line(aggr_dp));
+
+
+def main(output_file):
+  print output_file
+  make_data(output_file);
+
+import sys
+if __name__ == "__main__":
+  file_name="result"
+  if len(sys.argv) > 1:
+    file_name=sys.argv[1].strip()
+  main(file_name)
+
diff --git a/src/dmclock/benchmark/plot_gen.sh b/src/dmclock/benchmark/plot_gen.sh
new file mode 100755
index 000000000..d90bde192
--- /dev/null
+++ b/src/dmclock/benchmark/plot_gen.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+# parameter check -- k-value
+if [ "$2" != "" ]; then
+  k_way="$2"
+else
+  echo "Please provide the maximum K_WAY value"
+  exit
+fi
+#echo "k-way: $k_way"
+#exit
+
+gnuplot << EOF
+
+# Note you need gnuplot 4.4 for the pdfcairo terminal.
+clear
+reset
+
+set terminal pdfcairo size 7in,5in font "Gill Sans,5" linewidth 1 rounded fontscale .8 noenhanced
+set output "${output_file}.pdf"
+
+# starts multiplot
+set multiplot layout 2,1
+
+# Line style for axes
+set style line 80 lt rgb "#808080"
+
+# Line style for grid
+set style line 81 lt 0  # dashed
+set style line 81 lt rgb "#808080"  # grey
+
+set grid back linestyle 81
+set border 3 back linestyle 80 
+
+#set xtics rotate out
+set style data histogram
+set style histogram clustered
+
+set style fill solid border
+set xlabel 'Heap Timing for different K values'   
+set ylabel 'Time (nanosec)'        
+set key top right
+
+set yrange [0:*]
+
+# plot 1
+set title 'Request Addition Time'
+plot for [COL=2:($k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+
+# plot 2
+set title 'Request Completion Time'
+plot for [COL=($k_way + 2):(2 * $k_way + 1)] '${output_file}.dat' using COL:xticlabels(1) title columnheader
+EOF
diff --git a/src/dmclock/benchmark/run.sh b/src/dmclock/benchmark/run.sh
new file mode 100755
index 000000000..11432b530
--- /dev/null
+++ b/src/dmclock/benchmark/run.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# default value
+k_way=3 #11
+repeat=2 #5
+
+output_file="" 
+if [ "$1" != "" ]; then
+  output_file="$1"
+else
+  echo "Please provide the name of the output file"
+  exit
+fi
+
+echo "generating file ${output_file}"
+sh data_gen.sh ${output_file} ${k_way} ${repeat}
+
+echo "converting ${output_file} to ${output_file}.dat"
+python data_parser.py ${output_file}
+
+echo "now generating bar-chart"
+#gnuplot -e 'output_file=value'  plot_gen.gnuplot 
+sh plot_gen.sh  ${output_file} ${k_way}
+echo "done! check ${output_file}.pdf"
diff --git a/src/dmclock/cmake/modules/BuildGTest.cmake b/src/dmclock/cmake/modules/BuildGTest.cmake
new file mode 100644
index 000000000..2b3f2ae52
--- /dev/null
+++ b/src/dmclock/cmake/modules/BuildGTest.cmake
@@ -0,0 +1,71 @@
+macro(_build_gtest gtest_root)
+  include(ExternalProject)
+  ExternalProject_Add(googletest
+    SOURCE_DIR ${gtest_root}
+    CMAKE_ARGS -DBUILD_GMOCK=OFF -DBUILD_GTEST=ON
+    INSTALL_COMMAND ""
+    LOG_CONFIGURE ON
+    LOG_BUILD ON)
+
+  ExternalProject_Get_Property(googletest source_dir)
+  find_path(GTEST_INCLUDE_DIRS
+    NAMES gtest/gtest.h
+    PATHS ${source_dir}/googletest/include /usr/include)
+  find_path(GMOCK_INCLUDE_DIRS
+    NAMES gmock/gmock.h
+    PATHS ${source_dir}/googlemock/include /usr/include)
+
+  find_package(Threads REQUIRED)
+
+  ExternalProject_Get_Property(googletest binary_dir)
+  set(GTEST_LIBRARY_PATH ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}gtest.a)
+  set(GTEST_LIBRARY GTest::GTest)
+  add_library(${GTEST_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GTEST_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIRS}"
+    IMPORTED_LOCATION ${GTEST_LIBRARY_PATH}
+    IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GTEST_LIBRARY} googletest)
+  set(GTEST_LIBRARIES ${GTEST_LIBRARY})
+
+  set(GTEST_MAIN_LIBRARY_PATH ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}gtest_main.a)
+  set(GTEST_MAIN_LIBRARY GTest::Main)
+  add_library(${GTEST_MAIN_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GTEST_MAIN_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GTEST_INCLUDE_DIRS}"
+    IMPORTED_LOCATION ${GTEST_MAIN_LIBRARY_PATH}
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GTEST_MAIN_LIBRARY} googletest)
+
+  set(GMOCK_LIBRARY_PATH ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}gmock.a)
+  set(GMOCK_LIBRARY GMock::GMock)
+  add_library(${GMOCK_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GMOCK_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GMOCK_INCLUDE_DIRS}"
+    IMPORTED_LOCATION "${GMOCK_LIBRARY_PATH}"
+    IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GMOCK_LIBRARY} googletest)
+
+  set(GMOCK_MAIN_LIBRARY_PATH ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}gmock_main.a)
+  set(GMOCK_MAIN_LIBRARY GMock::Main)
+  add_library(${GMOCK_MAIN_LIBRARY} STATIC IMPORTED)
+  set_target_properties(${GMOCK_MAIN_LIBRARY} PROPERTIES
+    INTERFACE_INCLUDE_DIRECTORIES "${GMOCK_INCLUDE_DIRS}"
+    IMPORTED_LOCATION ${GMOCK_MAIN_LIBRARY_PATH}
+    IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+    IMPORTED_LINK_INTERFACE_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+  add_dependencies(${GMOCK_MAIN_LIBRARY} ${GTEST_LIBRARY})
+endmacro()
+
+find_path(GTEST_ROOT src/gtest.cc
+  HINTS $ENV{GTEST_ROOT}
+  PATHS /usr/src/googletest/googletest /usr/src/gtest)
+
+if(EXISTS ${GTEST_ROOT})
+  message(STATUS "Found googletest: ${GTEST_ROOT}")
+  _build_gtest(${GTEST_ROOT})
+else()
+  message(SEND_ERROR "Could NOT find googletest")
+endif()
diff --git a/src/dmclock/dmclock-config.cmake.in b/src/dmclock/dmclock-config.cmake.in
new file mode 100644
index 000000000..01636532c
--- /dev/null
+++ b/src/dmclock/dmclock-config.cmake.in
@@ -0,0 +1,17 @@
+# - Config file for the FooBar package
+# It defines the following variables
+#  DMCLOCK_INCLUDE_DIRS - include directories for FooBar
+#  DMCLOCK_LIBRARIES    - libraries to link against
+ 
+# Compute paths
+get_filename_component(DMCLOCK_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+set(DMCLOCK_INCLUDE_DIRS "${DMCLOCK_CMAKE_DIR}/src")
+# set(DMCLOCK_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@")
+ 
+# Our library dependencies (contains definitions for IMPORTED targets)
+if(NOT TARGET dmclock AND NOT dmclock_BINARY_DIR)
+  include("${DMCLOCK_CMAKE_DIR}/dmclock-targets.cmake")
+endif()
+ 
+# These are IMPORTED targets created by FooBarTargets.cmake
+set(DMCLOCK_LIBRARIES dmclock)
diff --git a/src/dmclock/dmclock-targets.cmake b/src/dmclock/dmclock-targets.cmake
new file mode 100644
index 000000000..2c84f34a1
--- /dev/null
+++ b/src/dmclock/dmclock-targets.cmake
@@ -0,0 +1 @@
+export(PACKAGE dmclock)
diff --git a/src/dmclock/sim/CMakeLists.txt b/src/dmclock/sim/CMakeLists.txt
new file mode 100644
index 000000000..ca537ce6f
--- /dev/null
+++ b/src/dmclock/sim/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(K_WAY_HEAP "" CACHE STRING "K_WAY_HEAP")
+
+if(K_WAY_HEAP)
+  if(K_WAY_HEAP LESS 2)
+    message(FATAL_ERROR "K_WAY_HEAP value should be at least 2")
+  else()
+    set(CMAKE_CXX_SIM_FLAGS "-DK_WAY_HEAP=${K_WAY_HEAP}")
+  endif()
+endif()
+
+add_subdirectory(src)
diff --git a/src/dmclock/sim/dmc_sim_100th.conf b/src/dmclock/sim/dmc_sim_100th.conf
new file mode 100644
index 000000000..17d004354
--- /dev/null
+++ b/src/dmclock/sim/dmc_sim_100th.conf
@@ -0,0 +1,32 @@
+[global]
+server_groups = 1
+client_groups = 2
+server_random_selection = true
+server_soft_limit = true
+
+[client.0]
+client_count = 99
+client_wait = 0
+client_total_ops = 1000
+client_server_select_range = 10
+client_iops_goal = 50
+client_outstanding_ops = 100
+client_reservation = 20.0
+client_limit = 60.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 10
+client_total_ops = 1000
+client_server_select_range = 10
+client_iops_goal = 50
+client_outstanding_ops = 100
+client_reservation = 20.0
+client_limit = 60.0
+client_weight = 1.0
+
+[server.0]
+server_count = 100
+server_iops = 40
+server_threads = 1
diff --git a/src/dmclock/sim/dmc_sim_example.conf b/src/dmclock/sim/dmc_sim_example.conf
new file mode 100644
index 000000000..e98b870eb
--- /dev/null
+++ b/src/dmclock/sim/dmc_sim_example.conf
@@ -0,0 +1,56 @@
+[global]
+server_groups = 1
+client_groups = 4
+server_random_selection = false
+server_soft_limit = false
+
+[client.0]
+client_count = 1
+client_wait = 0
+client_total_ops = 2000
+client_server_select_range = 1
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 0.0
+client_weight = 1.0
+
+[client.1]
+client_count = 1
+client_wait = 5
+client_total_ops = 2000
+client_server_select_range = 1
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 40.0
+client_weight = 1.0
+
+[client.2]
+client_count = 1
+client_wait = 10
+client_total_ops = 2000
+client_server_select_range = 1
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 50.0
+client_weight = 2.0
+client_req_cost = 1
+
+[client.3]
+client_count = 1
+client_wait = 10
+client_total_ops = 2000
+client_server_select_range = 1
+client_iops_goal = 200
+client_outstanding_ops = 32
+client_reservation = 0.0
+client_limit = 50.0
+client_weight = 2.0
+client_req_cost = 3
+
+[server.0]
+server_count = 1
+server_iops = 160
+server_threads = 1
diff --git a/src/dmclock/sim/src/CMakeLists.txt b/src/dmclock/sim/src/CMakeLists.txt
new file mode 100644
index 000000000..dab011ed0
--- /dev/null
+++ b/src/dmclock/sim/src/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(local_flags "-Wall ${CMAKE_CXX_SIM_FLAGS}")
+
+set(ssched_sim_srcs test_ssched.cc test_ssched_main.cc)
+set(dmc_sim_srcs test_dmclock.cc test_dmclock_main.cc)
+set(config_srcs config.cc str_list.cc ConfUtils.cc)
+
+set_source_files_properties(${ssched_sim_srcs} ${dmc_sim_srcs} ${dmc_srcs} ${config_srcs}
+  PROPERTIES
+  COMPILE_FLAGS "${local_flags}"
+  )
+
+if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+  set(warnings_off " -Wno-unused-variable -Wno-unused-function")
+elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+  set(warnings_off " -Wno-unused-but-set-variable -Wno-unused-function")
+endif()
+
+# append warning flags to certain source files
+set_property(
+  SOURCE ${ssched_sim_srcs} ${dmc_sim_srcs} ${config_srcs}
+  APPEND_STRING
+  PROPERTY COMPILE_FLAGS "${warnings_off}"
+  )
+
+add_executable(ssched_sim EXCLUDE_FROM_ALL ${ssched_sim_srcs})
+target_include_directories(ssched_sim PRIVATE ssched) # ssched code
+add_executable(dmc_sim EXCLUDE_FROM_ALL ${dmc_sim_srcs} ${config_srcs})
+
+set_target_properties(ssched_sim dmc_sim
+  PROPERTIES
+  RUNTIME_OUTPUT_DIRECTORY ..)
+
+add_dependencies(dmc_sim dmclock)
+
+target_link_libraries(ssched_sim LINK_PRIVATE Threads::Threads)
+target_link_libraries(dmc_sim LINK_PRIVATE dmclock)
+
+add_custom_target(dmclock-sims DEPENDS ssched_sim dmc_sim)
diff --git a/src/dmclock/sim/src/ConfUtils.cc b/src/dmclock/sim/src/ConfUtils.cc
new file mode 100644
index 000000000..a05f7dc42
--- /dev/null
+++ b/src/dmclock/sim/src/ConfUtils.cc
@@ -0,0 +1,574 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include <algorithm>
+#include <errno.h>
+#include <list>
+#include <map>
+#include <sstream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <iostream>
+
+#include <assert.h>
+#include "ConfUtils.h"
+
+using std::cerr;
+using std::ostringstream;
+using std::pair;
+using std::string;
+
+#define MAX_CONFIG_FILE_SZ 0x40000000
+
+////////////////////////////// ConfLine //////////////////////////////
+ConfLine::
+ConfLine(const std::string &key_, const std::string &val_,
+      const std::string &newsection_, const std::string &comment_, int line_no_)
+  : key(key_), val(val_), newsection(newsection_)
+{
+  // If you want to implement writable ConfFile support, you'll need to save
+  // the comment and line_no arguments here.
+}
+
+bool ConfLine::
+operator<(const ConfLine &rhs) const
+{
+  // We only compare keys.
+  // If you have more than one line with the same key in a given section, the
+  // last one wins.
+  if (key < rhs.key)
+    return true;
+  else
+    return false;
+}
+
+std::ostream &operator<<(std::ostream& oss, const ConfLine &l)
+{
+  oss << "ConfLine(key = '" << l.key << "', val='"
+      << l.val << "', newsection='" << l.newsection << "')";
+  return oss;
+}
+///////////////////////// ConfFile //////////////////////////
+ConfFile::
+ConfFile()
+{
+}
+
+ConfFile::
+~ConfFile()
+{
+}
+
+void ConfFile::
+clear()
+{
+  sections.clear();
+}
+
+/* We load the whole file into memory and then parse it.  Although this is not
+ * the optimal approach, it does mean that most of this code can be shared with
+ * the bufferlist loading function. Since bufferlists are always in-memory, the
+ * load_from_buffer interface works well for them.
+ * In general, configuration files should be a few kilobytes at maximum, so
+ * loading the whole configuration into memory shouldn't be a problem.
+ */
+int ConfFile::
+parse_file(const std::string &fname, std::deque<std::string> *errors,
+	   std::ostream *warnings)
+{
+  clear();
+
+  int ret = 0;
+  size_t sz;
+  char *buf = NULL;
+  char buf2[128];
+  FILE *fp = fopen(fname.c_str(), "r");
+  if (!fp) {
+    ret = -errno;
+    return ret;
+  }
+
+  struct stat st_buf;
+  if (fstat(fileno(fp), &st_buf)) {
+    ret = -errno;
+    ostringstream oss;
+    oss << "read_conf: failed to fstat '" << fname << "': " << strerror_r(ret, buf2, sizeof(buf2));
+    errors->push_back(oss.str());
+    goto done;
+  }
+
+  if (st_buf.st_size > MAX_CONFIG_FILE_SZ) {
+    ostringstream oss;
+    oss << "read_conf: config file '" << fname << "' is " << st_buf.st_size
+	<< " bytes, but the maximum is " << MAX_CONFIG_FILE_SZ;
+    errors->push_back(oss.str());
+    ret = -EINVAL;
+    goto done;
+  }
+
+  sz = (size_t)st_buf.st_size;
+  buf = (char*)malloc(sz);
+  if (!buf) {
+    ret = -ENOMEM;
+    goto done;
+  }
+
+  if (fread(buf, 1, sz, fp) != sz) {
+    if (ferror(fp)) {
+      ret = -errno;
+      ostringstream oss;
+      oss << "read_conf: fread error while reading '" << fname << "': "
+	  << strerror_r(ret, buf2, sizeof(buf2));
+      errors->push_back(oss.str());
+      goto done;
+    }
+    else {
+      ostringstream oss;
+      oss << "read_conf: unexpected EOF while reading '" << fname << "': "
+	  << "possible concurrent modification?";
+      errors->push_back(oss.str());
+      ret = -EIO;
+      goto done;
+    }
+  }
+
+  load_from_buffer(buf, sz, errors, warnings);
+  ret = 0;
+
+done:
+  free(buf);
+  fclose(fp);
+  return ret;
+}
+
+int ConfFile::
+read(const std::string &section, const std::string &key, std::string &val) const
+{
+  string k(normalize_key_name(key));
+
+  const_section_iter_t s = sections.find(section);
+  if (s == sections.end())
+    return -ENOENT;
+  ConfLine exemplar(k, "", "", "", 0);
+  ConfSection::const_line_iter_t l = s->second.lines.find(exemplar);
+  if (l == s->second.lines.end())
+    return -ENOENT;
+  val = l->val;
+  return 0;
+}
+
+ConfFile::const_section_iter_t ConfFile::
+sections_begin() const
+{
+  return sections.begin();
+}
+
+ConfFile::const_section_iter_t ConfFile::
+sections_end() const
+{
+  return sections.end();
+}
+
+void ConfFile::
+trim_whitespace(std::string &str, bool strip_internal)
+{
+  // strip preceding
+  const char *in = str.c_str();
+  while (true) {
+    char c = *in;
+    if ((!c) || (!isspace(c)))
+      break;
+    ++in;
+  }
+  char output[strlen(in) + 1];
+  strcpy(output, in);
+
+  // strip trailing
+  char *o = output + strlen(output);
+  while (true) {
+    if (o == output)
+      break;
+    --o;
+    if (!isspace(*o)) {
+      ++o;
+      *o = '\0';
+      break;
+    }
+  }
+
+  if (!strip_internal) {
+    str.assign(output);
+    return;
+  }
+
+  // strip internal
+  char output2[strlen(output) + 1];
+  char *out2 = output2;
+  bool prev_was_space = false;
+  for (char *u = output; *u; ++u) {
+    char c = *u;
+    if (isspace(c)) {
+      if (!prev_was_space)
+	*out2++ = c;
+      prev_was_space = true;
+    }
+    else {
+      *out2++ = c;
+      prev_was_space = false;
+    }
+  }
+  *out2++ = '\0';
+  str.assign(output2);
+}
+
+/* Normalize a key name.
+ *
+ * Normalized key names have no leading or trailing whitespace, and all
+ * whitespace is stored as underscores.  The main reason for selecting this
+ * normal form is so that in common/config.cc, we can use a macro to stringify
+ * the field names of md_config_t and get a key in normal form.
+ */
+std::string ConfFile::
+normalize_key_name(const std::string &key)
+{
+  string k(key);
+  ConfFile::trim_whitespace(k, true);
+  std::replace(k.begin(), k.end(), ' ', '_');
+  return k;
+}
+
+std::ostream &operator<<(std::ostream &oss, const ConfFile &cf)
+{
+  for (ConfFile::const_section_iter_t s = cf.sections_begin();
+       s != cf.sections_end(); ++s) {
+    oss << "[" << s->first << "]\n";
+    for (ConfSection::const_line_iter_t l = s->second.lines.begin();
+	 l != s->second.lines.end(); ++l) {
+      if (!l->key.empty()) {
+	oss << "\t" << l->key << " = \"" << l->val << "\"\n";
+      }
+    }
+  }
+  return oss;
+}
+
+void ConfFile::
+load_from_buffer(const char *buf, size_t sz, std::deque<std::string> *errors,
+		 std::ostream *warnings)
+{
+  errors->clear();
+
+  section_iter_t::value_type vt("global", ConfSection());
+  pair < section_iter_t, bool > vr(sections.insert(vt));
+  assert(vr.second);
+  section_iter_t cur_section = vr.first;
+  std::string acc;
+
+  const char *b = buf;
+  int line_no = 0;
+  size_t line_len = -1;
+  size_t rem = sz;
+  while (1) {
+    b += line_len + 1;
+    rem -= line_len + 1;
+    if (rem == 0)
+      break;
+    line_no++;
+
+    // look for the next newline
+    const char *end = (const char*)memchr(b, '\n', rem);
+    if (!end) {
+      ostringstream oss;
+      oss << "read_conf: ignoring line " << line_no << " because it doesn't "
+	  << "end with a newline! Please end the config file with a newline.";
+      errors->push_back(oss.str());
+      break;
+    }
+
+    // find length of line, and search for NULLs
+    line_len = 0;
+    bool found_null = false;
+    for (const char *tmp = b; tmp != end; ++tmp) {
+      line_len++;
+      if (*tmp == '\0') {
+	found_null = true;
+      }
+    }
+
+    if (found_null) {
+      ostringstream oss;
+      oss << "read_conf: ignoring line " << line_no << " because it has "
+	  << "an embedded null.";
+      errors->push_back(oss.str());
+      acc.clear();
+      continue;
+    }
+
+    if ((line_len >= 1) && (b[line_len-1] == '\\')) {
+      // A backslash at the end of a line serves as a line continuation marker.
+      // Combine the next line with this one.
+      // Remove the backslash itself from the text.
+      acc.append(b, line_len - 1);
+      continue;
+    }
+
+    acc.append(b, line_len);
+
+    //cerr << "acc = '" << acc << "'" << std::endl;
+    ConfLine *cline = process_line(line_no, acc.c_str(), errors);
+    acc.clear();
+    if (!cline)
+      continue;
+    const std::string &csection(cline->newsection);
+    if (!csection.empty()) {
+      std::map <std::string, ConfSection>::value_type nt(csection, ConfSection());
+      pair < section_iter_t, bool > nr(sections.insert(nt));
+      cur_section = nr.first;
+    }
+    else {
+      if (cur_section->second.lines.count(*cline)) {
+	// replace an existing key/line in this section, so that
+	//  [mysection]
+	//    foo = 1
+	//    foo = 2
+	// will result in foo = 2.
+	cur_section->second.lines.erase(*cline);
+	if (cline->key.length() && warnings)
+	  *warnings << "warning: line " << line_no << ": '" << cline->key << "' in section '"
+		    << cur_section->first << "' redefined " << std::endl;
+      }
+      // add line to current section
+      //std::cerr << "cur_section = " << cur_section->first << ", " << *cline << std::endl;
+      cur_section->second.lines.insert(*cline);
+    }
+    delete cline;
+  }
+
+  if (!acc.empty()) {
+    ostringstream oss;
+    oss << "read_conf: don't end with lines that end in backslashes!";
+    errors->push_back(oss.str());
+  }
+}
+
+/*
+ * A simple state-machine based parser.
+ * This probably could/should be rewritten with something like boost::spirit
+ * or yacc if the grammar ever gets more complex.
+ */
+ConfLine* ConfFile::
+process_line(int line_no, const char *line, std::deque<std::string> *errors)
+{
+  enum acceptor_state_t {
+    ACCEPT_INIT,
+    ACCEPT_SECTION_NAME,
+    ACCEPT_KEY,
+    ACCEPT_VAL_START,
+    ACCEPT_UNQUOTED_VAL,
+    ACCEPT_QUOTED_VAL,
+    ACCEPT_COMMENT_START,
+    ACCEPT_COMMENT_TEXT,
+  };
+  const char *l = line;
+  acceptor_state_t state = ACCEPT_INIT;
+  string key, val, newsection, comment;
+  bool escaping = false;
+  while (true) {
+    char c = *l++;
+    switch (state) {
+      case ACCEPT_INIT:
+	if (c == '\0')
+	  return NULL; // blank line. Not an error, but not interesting either.
+	else if (c == '[')
+	  state = ACCEPT_SECTION_NAME;
+	else if ((c == '#') || (c == ';'))
+	  state = ACCEPT_COMMENT_TEXT;
+	else if (c == ']') {
+	  ostringstream oss;
+	  oss << "unexpected right bracket at char " << (l - line)
+	      << ", line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if (isspace(c)) {
+	  // ignore whitespace here
+	}
+	else {
+	  // try to accept this character as a key
+	  state = ACCEPT_KEY;
+	  --l;
+	}
+	break;
+      case ACCEPT_SECTION_NAME:
+	if (c == '\0') {
+	  ostringstream oss;
+	  oss << "error parsing new section name: expected right bracket "
+	      << "at char " << (l - line) << ", line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == ']') && (!escaping)) {
+	  trim_whitespace(newsection, true);
+	  if (newsection.empty()) {
+	    ostringstream oss;
+	    oss << "error parsing new section name: no section name found? "
+	        << "at char " << (l - line) << ", line " << line_no;
+	    errors->push_back(oss.str());
+	    return NULL;
+	  }
+	  state = ACCEPT_COMMENT_START;
+	}
+	else if (((c == '#') || (c == ';')) && (!escaping)) {
+	  ostringstream oss;
+	  oss << "unexpected comment marker while parsing new section name, at "
+	      << "char " << (l - line) << ", line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  newsection += c;
+	}
+	break;
+      case ACCEPT_KEY:
+	if ((((c == '#') || (c == ';')) && (!escaping)) || (c == '\0')) {
+	  ostringstream oss;
+	  if (c == '\0') {
+	    oss << "end of key=val line " << line_no
+	        << " reached, no \"=val\" found...missing =?";
+	  } else {
+	    oss << "unexpected character while parsing putative key value, "
+		<< "at char " << (l - line) << ", line " << line_no;
+	  }
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == '=') && (!escaping)) {
+	  key = normalize_key_name(key);
+	  if (key.empty()) {
+	    ostringstream oss;
+	    oss << "error parsing key name: no key name found? "
+	        << "at char " << (l - line) << ", line " << line_no;
+	    errors->push_back(oss.str());
+	    return NULL;
+	  }
+	  state = ACCEPT_VAL_START;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  key += c;
+	}
+	break;
+      case ACCEPT_VAL_START:
+	if (c == '\0')
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	else if ((c == '#') || (c == ';'))
+	  state = ACCEPT_COMMENT_TEXT;
+	else if (c == '"')
+	  state = ACCEPT_QUOTED_VAL;
+	else if (isspace(c)) {
+	  // ignore whitespace
+	}
+	else {
+	  // try to accept character as a val
+	  state = ACCEPT_UNQUOTED_VAL;
+	  --l;
+	}
+	break;
+      case ACCEPT_UNQUOTED_VAL:
+	if (c == '\0') {
+	  if (escaping) {
+	    ostringstream oss;
+	    oss << "error parsing value name: unterminated escape sequence "
+	        << "at char " << (l - line) << ", line " << line_no;
+	    errors->push_back(oss.str());
+	    return NULL;
+	  }
+	  trim_whitespace(val, false);
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	}
+	else if (((c == '#') || (c == ';')) && (!escaping)) {
+	  trim_whitespace(val, false);
+	  state = ACCEPT_COMMENT_TEXT;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  val += c;
+	}
+	break;
+      case ACCEPT_QUOTED_VAL:
+	if (c == '\0') {
+	  ostringstream oss;
+	  oss << "found opening quote for value, but not the closing quote. "
+	      << "line " << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	else if ((c == '"') && (!escaping)) {
+	  state = ACCEPT_COMMENT_START;
+	}
+	else if ((c == '\\') && (!escaping)) {
+	  escaping = true;
+	}
+	else {
+	  escaping = false;
+	  // Add anything, including whitespace.
+	  val += c;
+	}
+	break;
+      case ACCEPT_COMMENT_START:
+	if (c == '\0') {
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	}
+	else if ((c == '#') || (c == ';')) {
+	  state = ACCEPT_COMMENT_TEXT;
+	}
+	else if (isspace(c)) {
+	  // ignore whitespace
+	}
+	else {
+	  ostringstream oss;
+	  oss << "unexpected character at char " << (l - line) << " of line "
+	      << line_no;
+	  errors->push_back(oss.str());
+	  return NULL;
+	}
+	break;
+      case ACCEPT_COMMENT_TEXT:
+	if (c == '\0')
+	  return new ConfLine(key, val, newsection, comment, line_no);
+	else
+	  comment += c;
+	break;
+      default:
+	assert(0);
+	break;
+    }
+    assert(c != '\0'); // We better not go past the end of the input string.
+  }
+}
diff --git a/src/dmclock/sim/src/ConfUtils.h b/src/dmclock/sim/src/ConfUtils.h
new file mode 100644
index 000000000..3db1d1e14
--- /dev/null
+++ b/src/dmclock/sim/src/ConfUtils.h
@@ -0,0 +1,83 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_CONFUTILS_H
+#define CEPH_CONFUTILS_H
+
+#include <deque>
+#include <map>
+#include <set>
+#include <string>
+
+/*
+ * Ceph configuration file support.
+ *
+ * This class loads an INI-style configuration from a file or bufferlist, and
+ * holds it in memory. In general, an INI configuration file is composed of
+ * sections, which contain key/value pairs. You can put comments on the end of
+ * lines by using either a hash mark (#) or the semicolon (;).
+ *
+ * You can get information out of ConfFile by calling get_key or by examining
+ * individual sections.
+ *
+ * This class could be extended to support modifying configuration files and
+ * writing them back out without too much difficulty. Currently, this is not
+ * implemented, and the file is read-only.
+ */
+class ConfLine {
+public:
+  ConfLine(const std::string &key_, const std::string &val_,
+	   const std::string &newsection_, const std::string &comment_, int line_no_);
+  bool operator<(const ConfLine &rhs) const;
+  friend std::ostream &operator<<(std::ostream& oss, const ConfLine &l);
+
+  std::string key, val, newsection;
+};
+
+class ConfSection {
+public:
+  typedef std::set <ConfLine>::const_iterator const_line_iter_t;
+
+  std::set <ConfLine> lines;
+};
+
+class ConfFile {
+public:
+  typedef std::map <std::string, ConfSection>::iterator section_iter_t;
+  typedef std::map <std::string, ConfSection>::const_iterator const_section_iter_t;
+
+  ConfFile();
+  ~ConfFile();
+  void clear();
+  int parse_file(const std::string &fname, std::deque<std::string> *errors, std::ostream *warnings);
+  int read(const std::string &section, const std::string &key,
+	      std::string &val) const;
+
+  const_section_iter_t sections_begin() const;
+  const_section_iter_t sections_end() const;
+
+  static void trim_whitespace(std::string &str, bool strip_internal);
+  static std::string normalize_key_name(const std::string &key);
+  friend std::ostream &operator<<(std::ostream &oss, const ConfFile &cf);
+
+private:
+  void load_from_buffer(const char *buf, size_t sz,
+			std::deque<std::string> *errors, std::ostream *warnings);
+  static ConfLine* process_line(int line_no, const char *line,
+			        std::deque<std::string> *errors);
+
+  std::map <std::string, ConfSection> sections;
+};
+
+#endif
diff --git a/src/dmclock/sim/src/config.cc b/src/dmclock/sim/src/config.cc
new file mode 100644
index 000000000..dae2903e1
--- /dev/null
+++ b/src/dmclock/sim/src/config.cc
@@ -0,0 +1,184 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <unistd.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <iostream>
+#include <vector>
+#include <list>
+
+#include "config.h"
+#include "str_list.h"
+
+
+static void dashes_to_underscores(const char *input, char *output) {
+  char c = 0;
+  char *o = output;
+  const char *i = input;
+  // first two characters are copied as-is
+  *o = *i++;
+  if (*o++ == '\0')
+    return;
+  *o = *i++;
+  if (*o++ == '\0')
+    return;
+  for (; ((c = *i)); ++i) {
+    if (c == '=') {
+      strcpy(o, i);
+      return;
+    }
+    if (c == '-')
+      *o++ = '_';
+    else
+      *o++ = c;
+  }
+  *o++ = '\0';
+}
+
+static int va_ceph_argparse_witharg(std::vector<const char*> &args,
+	std::vector<const char*>::iterator &i, std::string *ret,
+	std::ostream &oss, va_list ap) {
+  const char *first = *i;
+  char tmp[strlen(first)+1];
+  dashes_to_underscores(first, tmp);
+  first = tmp;
+
+  // does this argument match any of the possibilities?
+  while (1) {
+    const char *a = va_arg(ap, char*);
+    if (a == NULL)
+      return 0;
+    int strlen_a = strlen(a);
+    char a2[strlen_a+1];
+    dashes_to_underscores(a, a2);
+    if (strncmp(a2, first, strlen(a2)) == 0) {
+      if (first[strlen_a] == '=') {
+	*ret = first + strlen_a + 1;
+	i = args.erase(i);
+	return 1;
+      }
+      else if (first[strlen_a] == '\0') {
+	// find second part (or not)
+	if (i+1 == args.end()) {
+	  oss << "Option " << *i << " requires an argument." << std::endl;
+	  i = args.erase(i);
+	  return -EINVAL;
+	}
+	i = args.erase(i);
+	*ret = *i;
+	i = args.erase(i);
+	return 1;
+      }
+    }
+  }
+}
+
+bool crimson::qos_simulation::ceph_argparse_witharg(std::vector<const char*> &args,
+	std::vector<const char*>::iterator &i, std::string *ret, ...) {
+  int r;
+  va_list ap;
+  va_start(ap, ret);
+  r = va_ceph_argparse_witharg(args, i, ret, std::cerr, ap);
+  va_end(ap);
+  if (r < 0)
+    _exit(1);
+  return r != 0;
+}
+
+void crimson::qos_simulation::ceph_argparse_early_args(std::vector<const char*>& args, std::string *conf_file_list) {
+  std::string val;
+
+  std::vector<const char *> orig_args = args;
+
+  for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
+    if (ceph_argparse_witharg(args, i, &val, "--conf", "-c", (char*)NULL)) {
+      *conf_file_list = val;
+    }
+    else {
+      // ignore
+      ++i;
+    }
+  }
+  return;
+}
+
+static bool stobool(const std::string & v) {
+    return !v.empty () &&
+           (strcasecmp (v.c_str (), "true") == 0 ||
+	   atoi (v.c_str ()) != 0);
+}
+
+int crimson::qos_simulation::parse_config_file(const std::string &fname, sim_config_t &g_conf) {
+  ConfFile cf;
+  std::deque<std::string> err;
+  std::ostringstream warn;
+  int ret = cf.parse_file(fname.c_str(), &err, &warn);
+  if (ret) {
+    // error
+    return ret;
+  }
+
+  std::string val;
+  if (!cf.read("global", "server_groups", val))
+    g_conf.server_groups = std::stoul(val);
+  if (!cf.read("global", "client_groups", val))
+    g_conf.client_groups = std::stoul(val);
+  if (!cf.read("global", "server_random_selection", val))
+    g_conf.server_random_selection = stobool(val);
+  if (!cf.read("global", "server_soft_limit", val))
+    g_conf.server_soft_limit = stobool(val);
+  if (!cf.read("global", "anticipation_timeout", val))
+    g_conf.anticipation_timeout = stod(val);
+
+  for (unsigned i = 0; i < g_conf.server_groups; i++) {
+    srv_group_t st;
+    std::string section = "server." + std::to_string(i);
+    if (!cf.read(section, "server_count", val))
+      st.server_count = std::stoul(val);
+    if (!cf.read(section, "server_iops", val))
+      st.server_iops = std::stoul(val);
+    if (!cf.read(section, "server_threads", val))
+      st.server_threads = std::stoul(val);
+    g_conf.srv_group.push_back(st);
+  }
+
+  for (unsigned i = 0; i < g_conf.client_groups; i++) {
+    cli_group_t ct;
+    std::string section = "client." + std::to_string(i);
+    if (!cf.read(section, "client_count", val))
+      ct.client_count = std::stoul(val);
+    if (!cf.read(section, "client_wait", val))
+      ct.client_wait = std::chrono::seconds(std::stoul(val));
+    if (!cf.read(section, "client_total_ops", val))
+      ct.client_total_ops = std::stoul(val);
+    if (!cf.read(section, "client_server_select_range", val))
+      ct.client_server_select_range = std::stoul(val);
+    if (!cf.read(section, "client_iops_goal", val))
+      ct.client_iops_goal = std::stoul(val);
+    if (!cf.read(section, "client_outstanding_ops", val))
+      ct.client_outstanding_ops = std::stoul(val);
+    if (!cf.read(section, "client_reservation", val))
+      ct.client_reservation = std::stod(val);
+    if (!cf.read(section, "client_limit", val))
+      ct.client_limit = std::stod(val);
+    if (!cf.read(section, "client_weight", val))
+      ct.client_weight = std::stod(val);
+    if (!cf.read(section, "client_req_cost", val))
+      ct.client_req_cost = std::stoul(val);
+    g_conf.cli_group.push_back(ct);
+  }
+
+  return 0;
+}
diff --git a/src/dmclock/sim/src/config.h b/src/dmclock/sim/src/config.h
new file mode 100644
index 000000000..d61ca45a8
--- /dev/null
+++ b/src/dmclock/sim/src/config.h
@@ -0,0 +1,158 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <string.h>
+
+#include <chrono>
+#include <vector>
+#include <sstream>
+#include <iomanip>
+
+#include "ConfUtils.h"
+
+#include "sim_recs.h"
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    struct cli_group_t {
+      unsigned client_count;
+      std::chrono::seconds client_wait;
+      unsigned client_total_ops;
+      unsigned client_server_select_range;
+      unsigned client_iops_goal;
+      unsigned client_outstanding_ops;
+      double client_reservation;
+      double client_limit;
+      double client_weight;
+      Cost client_req_cost;
+
+      cli_group_t(unsigned _client_count = 100,
+		  unsigned _client_wait = 0,
+		  unsigned _client_total_ops = 1000,
+		  unsigned _client_server_select_range = 10,
+		  unsigned _client_iops_goal = 50,
+		  unsigned _client_outstanding_ops = 100,
+		  double _client_reservation = 20.0,
+		  double _client_limit = 60.0,
+		  double _client_weight = 1.0,
+		  Cost _client_req_cost = 1u) :
+	client_count(_client_count),
+	client_wait(std::chrono::seconds(_client_wait)),
+	client_total_ops(_client_total_ops),
+	client_server_select_range(_client_server_select_range),
+	client_iops_goal(_client_iops_goal),
+	client_outstanding_ops(_client_outstanding_ops),
+	client_reservation(_client_reservation),
+	client_limit(_client_limit),
+	client_weight(_client_weight),
+	client_req_cost(_client_req_cost)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+	  const cli_group_t& cli_group) {
+	out <<
+	  "client_count = " << cli_group.client_count << "\n" <<
+	  "client_wait = " << cli_group.client_wait.count() << "\n" <<
+	  "client_total_ops = " << cli_group.client_total_ops << "\n" <<
+	  "client_server_select_range = " << cli_group.client_server_select_range << "\n" <<
+	  "client_iops_goal = " << cli_group.client_iops_goal << "\n" <<
+	  "client_outstanding_ops = " << cli_group.client_outstanding_ops << "\n" <<
+	  std::fixed << std::setprecision(1) <<
+	  "client_reservation = " << cli_group.client_reservation << "\n" <<
+	  "client_limit = " << cli_group.client_limit << "\n" <<
+	  "client_weight = " << cli_group.client_weight << "\n" <<
+	  "client_req_cost = " << cli_group.client_req_cost;
+	return out;
+      }
+    }; // class cli_group_t
+
+
+    struct srv_group_t {
+      unsigned server_count;
+      unsigned server_iops;
+      unsigned server_threads;
+
+      srv_group_t(unsigned _server_count = 100,
+		  unsigned _server_iops = 40,
+		  unsigned _server_threads = 1) :
+	server_count(_server_count),
+	server_iops(_server_iops),
+	server_threads(_server_threads)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+	  const srv_group_t& srv_group) {
+	out <<
+	  "server_count = " << srv_group.server_count << "\n" <<
+	  "server_iops = " << srv_group.server_iops << "\n" <<
+	  "server_threads = " << srv_group.server_threads;
+	return out;
+      }
+    }; // class srv_group_t
+
+
+    struct sim_config_t {
+      unsigned server_groups;
+      unsigned client_groups;
+      bool server_random_selection;
+      bool server_soft_limit;
+      double anticipation_timeout;
+
+      std::vector<cli_group_t> cli_group;
+      std::vector<srv_group_t> srv_group;
+
+      sim_config_t(unsigned _server_groups = 1,
+		   unsigned _client_groups = 1,
+		   bool _server_random_selection = false,
+		   bool _server_soft_limit = true,
+		   double _anticipation_timeout = 0.0) :
+	server_groups(_server_groups),
+	client_groups(_client_groups),
+	server_random_selection(_server_random_selection),
+	server_soft_limit(_server_soft_limit),
+	anticipation_timeout(_anticipation_timeout)
+      {
+	srv_group.reserve(server_groups);
+	cli_group.reserve(client_groups);
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+	  const sim_config_t& sim_config) {
+	out <<
+	  "server_groups = " << sim_config.server_groups << "\n" <<
+	  "client_groups = " << sim_config.client_groups << "\n" <<
+	  "server_random_selection = " << sim_config.server_random_selection << "\n" <<
+	  "server_soft_limit = " << sim_config.server_soft_limit << "\n" <<
+	  std::fixed << std::setprecision(3) << 
+	  "anticipation_timeout = " << sim_config.anticipation_timeout;
+	return out;
+      }
+    }; // class sim_config_t
+
+
+    bool ceph_argparse_witharg(std::vector<const char*> &args,
+	std::vector<const char*>::iterator &i, std::string *ret, ...);
+    void ceph_argparse_early_args(std::vector<const char*>& args, std::string *conf_file_list);
+    int parse_config_file(const std::string &fname, sim_config_t &g_conf);
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/src/dmclock/sim/src/sim_client.h b/src/dmclock/sim/src/sim_client.h
new file mode 100644
index 000000000..182cdb803
--- /dev/null
+++ b/src/dmclock/sim/src/sim_client.h
@@ -0,0 +1,340 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <chrono>
+#include <vector>
+#include <deque>
+#include <iostream>
+
+#include "sim_recs.h"
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    struct req_op_t {};
+    struct wait_op_t {};
+    constexpr struct req_op_t req_op {};
+    constexpr struct wait_op_t wait_op {};
+
+
+    enum class CliOp { req, wait };
+    struct CliInst {
+      CliOp op;
+      union {
+	std::chrono::milliseconds wait_time;
+	struct {
+	  uint32_t count;
+	  std::chrono::microseconds time_bw_reqs;
+	  uint16_t max_outstanding;
+	} req_params;
+      } args;
+
+      // D is a duration type
+      template<typename D>
+      CliInst(wait_op_t, D duration) :
+	op(CliOp::wait)
+      {
+	args.wait_time =
+	  std::chrono::duration_cast<std::chrono::milliseconds>(duration);
+      }
+
+      CliInst(req_op_t,
+	      uint32_t count, double ops_per_sec, uint16_t max_outstanding) :
+	op(CliOp::req)
+      {
+	args.req_params.count = count;
+	args.req_params.max_outstanding = max_outstanding;
+	uint32_t us = uint32_t(0.5 + 1.0 / ops_per_sec * 1000000);
+	args.req_params.time_bw_reqs = std::chrono::microseconds(us);
+      }
+    };
+
+
+    using ServerSelectFunc = std::function<const ServerId&(uint64_t seed)>;
+
+
+    template<typename SvcTrk, typename ReqPm, typename RespPm, typename Accum>
+    class SimulatedClient {
+    public:
+
+      struct InternalStats {
+	std::mutex mtx;
+	std::chrono::nanoseconds track_resp_time;
+	std::chrono::nanoseconds get_req_params_time;
+	uint32_t track_resp_count;
+	uint32_t get_req_params_count;
+
+	InternalStats() :
+	  track_resp_time(0),
+	  get_req_params_time(0),
+	  track_resp_count(0),
+	  get_req_params_count(0)
+	{
+	  // empty
+	}
+      };
+
+      using SubmitFunc =
+	std::function<void(const ServerId&,
+			   TestRequest&&,
+			   const ClientId&,
+			   const ReqPm&)>;
+
+      using ClientAccumFunc = std::function<void(Accum&,const RespPm&)>;
+
+      typedef std::chrono::time_point<std::chrono::steady_clock> TimePoint;
+
+      static TimePoint now() { return std::chrono::steady_clock::now(); }
+
+    protected:
+
+      struct RespQueueItem {
+	TestResponse response;
+	ServerId     server_id;
+	RespPm       resp_params;
+	Cost         request_cost;
+      };
+
+      const ClientId id;
+      const SubmitFunc submit_f;
+      const ServerSelectFunc server_select_f;
+      const ClientAccumFunc accum_f;
+
+      std::vector<CliInst> instructions;
+
+      SvcTrk service_tracker;
+
+      // TODO: use lock rather than atomic???
+      std::atomic_ulong        outstanding_ops;
+      std::atomic_bool         requests_complete;
+
+      std::deque<RespQueueItem> resp_queue;
+
+      std::mutex               mtx_req;
+      std::condition_variable  cv_req;
+
+      std::mutex               mtx_resp;
+      std::condition_variable  cv_resp;
+
+      using RespGuard = std::lock_guard<decltype(mtx_resp)>;
+      using Lock = std::unique_lock<std::mutex>;
+
+      // data collection
+
+      std::vector<TimePoint>   op_times;
+      Accum                    accumulator;
+      InternalStats            internal_stats;
+
+      std::thread              thd_req;
+      std::thread              thd_resp;
+
+    public:
+
+      SimulatedClient(ClientId _id,
+		      const SubmitFunc& _submit_f,
+		      const ServerSelectFunc& _server_select_f,
+		      const ClientAccumFunc& _accum_f,
+		      const std::vector<CliInst>& _instrs) :
+	id(_id),
+	submit_f(_submit_f),
+	server_select_f(_server_select_f),
+	accum_f(_accum_f),
+	instructions(_instrs),
+	service_tracker(),
+	outstanding_ops(0),
+	requests_complete(false)
+      {
+	size_t op_count = 0;
+	for (auto i : instructions) {
+	  if (CliOp::req == i.op) {
+	    op_count += i.args.req_params.count;
+	  }
+	}
+	op_times.reserve(op_count);
+
+	thd_resp = std::thread(&SimulatedClient::run_resp, this);
+	thd_req = std::thread(&SimulatedClient::run_req, this);
+      }
+
+
+      SimulatedClient(ClientId _id,
+		      const SubmitFunc& _submit_f,
+		      const ServerSelectFunc& _server_select_f,
+		      const ClientAccumFunc& _accum_f,
+		      uint16_t _ops_to_run,
+		      double _iops_goal,
+		      uint16_t _outstanding_ops_allowed) :
+	SimulatedClient(_id,
+			_submit_f, _server_select_f, _accum_f,
+			{{req_op, _ops_to_run, _iops_goal, _outstanding_ops_allowed}})
+      {
+	// empty
+      }
+
+
+      SimulatedClient(const SimulatedClient&) = delete;
+      SimulatedClient(SimulatedClient&&) = delete;
+      SimulatedClient& operator=(const SimulatedClient&) = delete;
+      SimulatedClient& operator=(SimulatedClient&&) = delete;
+
+      virtual ~SimulatedClient() {
+	wait_until_done();
+      }
+
+      void receive_response(const TestResponse& resp,
+			    const ServerId& server_id,
+			    const RespPm& resp_params,
+			    const Cost request_cost) {
+	RespGuard g(mtx_resp);
+	resp_queue.push_back(
+	  RespQueueItem{ resp, server_id, resp_params, request_cost });
+	cv_resp.notify_one();
+      }
+
+      const std::vector<TimePoint>& get_op_times() const { return op_times; }
+
+      void wait_until_done() {
+	if (thd_req.joinable()) thd_req.join();
+	if (thd_resp.joinable()) thd_resp.join();
+      }
+
+      const Accum& get_accumulator() const { return accumulator; }
+
+      const InternalStats& get_internal_stats() const { return internal_stats; }
+
+    protected:
+
+      void run_req() {
+	size_t ops_count = 0;
+	for (auto i : instructions) {
+	  if (CliOp::wait == i.op) {
+	    std::this_thread::sleep_for(i.args.wait_time);
+	  } else if (CliOp::req == i.op) {
+	    Lock l(mtx_req);
+	    for (uint64_t o = 0; o < i.args.req_params.count; ++o) {
+	      while (outstanding_ops >= i.args.req_params.max_outstanding) {
+		cv_req.wait(l);
+	      }
+
+	      l.unlock();
+	      auto now = std::chrono::steady_clock::now();
+	      const ServerId& server = server_select_f(o);
+
+	      ReqPm rp =
+		time_stats_w_return<decltype(internal_stats.get_req_params_time),
+				    ReqPm>(internal_stats.mtx,
+					   internal_stats.get_req_params_time,
+					   [&]() -> ReqPm {
+					     return service_tracker.get_req_params(server);
+					   });
+	      count_stats(internal_stats.mtx,
+			  internal_stats.get_req_params_count);
+
+	      submit_f(server,
+		       TestRequest{server, static_cast<uint32_t>(o), 12},
+		       id, rp);
+	      ++outstanding_ops;
+	      l.lock(); // lock for return to top of loop
+
+	      auto delay_time = now + i.args.req_params.time_bw_reqs;
+	      while (std::chrono::steady_clock::now() < delay_time) {
+		cv_req.wait_until(l, delay_time);
+	      } // while
+	    } // for
+	    ops_count += i.args.req_params.count;
+	  } else {
+	    assert(false);
+	  }
+	} // for loop
+
+	requests_complete = true;
+
+	// all requests made, thread ends
+      }
+
+
+      void run_resp() {
+	std::chrono::milliseconds delay(1000);
+	int op = 0;
+
+	Lock l(mtx_resp);
+
+	// since the following code would otherwise be repeated (except for
+	// the call to notify_one) in the two loops below; let's avoid
+	// repetition and define it once.
+	const auto proc_resp = [this, &op, &l](const bool notify_req_cv) {
+	  if (!resp_queue.empty()) {
+	    RespQueueItem item = resp_queue.front();
+	    resp_queue.pop_front();
+
+	    l.unlock();
+
+	    // data collection
+
+	    op_times.push_back(now());
+	    accum_f(accumulator, item.resp_params);
+
+	    // processing
+
+#if 0 // not needed
+	    TestResponse& resp = item.response;
+#endif
+
+	    time_stats(internal_stats.mtx,
+		       internal_stats.track_resp_time,
+		       [&](){
+			 service_tracker.track_resp(item.server_id, item.resp_params, item.request_cost);
+		       });
+	    count_stats(internal_stats.mtx,
+			internal_stats.track_resp_count);
+
+	    --outstanding_ops;
+	    if (notify_req_cv) {
+	      cv_req.notify_one();
+	    }
+
+	    l.lock();
+	  }
+	};
+
+	while(!requests_complete.load()) {
+	  while(resp_queue.empty() && !requests_complete.load()) {
+	    cv_resp.wait_for(l, delay);
+	  }
+	  proc_resp(true);
+	}
+
+	while(outstanding_ops.load() > 0) {
+	  while(resp_queue.empty() && outstanding_ops.load() > 0) {
+	    cv_resp.wait_for(l, delay);
+	  }
+	  proc_resp(false); // don't call notify_one as all requests are complete
+	}
+
+	// all responses received, thread ends
+      }
+    }; // class SimulatedClient
+
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/src/dmclock/sim/src/sim_recs.h b/src/dmclock/sim/src/sim_recs.h
new file mode 100644
index 000000000..010630072
--- /dev/null
+++ b/src/dmclock/sim/src/sim_recs.h
@@ -0,0 +1,131 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <signal.h>
+
+#include <sys/time.h>
+
+#include <cmath>
+#include <limits>
+#include <string>
+#include <mutex>
+#include <iostream>
+#include <functional>
+
+
+using ClientId = unsigned;
+using ServerId = unsigned;
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    using Cost = uint32_t;
+
+    inline void debugger() {
+      raise(SIGCONT);
+    }
+
+    template<typename T>
+    void time_stats(std::mutex& mtx,
+		    T& time_accumulate,
+		    std::function<void()> code) {
+      auto t1 = std::chrono::steady_clock::now();
+      code();
+      auto t2 = std::chrono::steady_clock::now();
+      auto duration = t2 - t1;
+      auto cast_duration = std::chrono::duration_cast<T>(duration);
+      std::lock_guard<std::mutex> lock(mtx);
+      time_accumulate += cast_duration;
+    }
+
+    // unfortunately it's hard for the compiler to infer the types,
+    // and therefore when called the template params might have to be
+    // explicit
+    template<typename T, typename R>
+    R time_stats_w_return(std::mutex& mtx,
+			  T& time_accumulate,
+			  std::function<R()> code) {
+      auto t1 = std::chrono::steady_clock::now();
+      R result = code();
+      auto t2 = std::chrono::steady_clock::now();
+      auto duration = t2 - t1;
+      auto cast_duration = std::chrono::duration_cast<T>(duration);
+      std::lock_guard<std::mutex> lock(mtx);
+      time_accumulate += cast_duration;
+      return result;
+    }
+
+    template<typename T>
+    void count_stats(std::mutex& mtx,
+		     T& counter) {
+      std::lock_guard<std::mutex> lock(mtx);
+      ++counter;
+    }
+
+    struct TestRequest {
+      ServerId server; // allows debugging
+      uint32_t epoch;
+      uint32_t op;
+
+      TestRequest(ServerId _server,
+		  uint32_t _epoch,
+		  uint32_t _op) :
+	server(_server),
+	epoch(_epoch),
+	op(_op)
+      {
+	// empty
+      }
+
+      TestRequest(const TestRequest& r) :
+	TestRequest(r.server, r.epoch, r.op)
+      {
+	// empty
+      }
+    }; // struct TestRequest
+
+
+    struct TestResponse {
+      uint32_t epoch;
+
+      explicit TestResponse(uint32_t _epoch) :
+	epoch(_epoch)
+      {
+	// empty
+      }
+
+      TestResponse(const TestResponse& r) :
+	epoch(r.epoch)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out, const TestResponse& resp) {
+	out << "{ ";
+	out << "epoch:" << resp.epoch;
+	out << " }";
+	return out;
+      }
+    }; // class TestResponse
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/src/dmclock/sim/src/sim_server.h b/src/dmclock/sim/src/sim_server.h
new file mode 100644
index 000000000..743c6e79a
--- /dev/null
+++ b/src/dmclock/sim/src/sim_server.h
@@ -0,0 +1,245 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#include <chrono>
+#include <deque>
+
+#include "sim_recs.h"
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    template<typename Q, typename ReqPm, typename RespPm, typename Accum>
+    class SimulatedServer {
+
+      struct QueueItem {
+	ClientId                     client;
+	std::unique_ptr<TestRequest> request;
+	RespPm                       additional;
+	Cost                         request_cost;
+
+	QueueItem(const ClientId&                _client,
+		  std::unique_ptr<TestRequest>&& _request,
+		  const RespPm&                  _additional,
+		  const Cost                     _request_cost) :
+	  client(_client),
+	  request(std::move(_request)),
+	  additional(_additional),
+	  request_cost(_request_cost)
+	{
+	  // empty
+	}
+      }; // QueueItem
+
+    public:
+
+      struct InternalStats {
+	std::mutex mtx;
+	std::chrono::nanoseconds add_request_time;
+	std::chrono::nanoseconds request_complete_time;
+	uint32_t add_request_count;
+	uint32_t request_complete_count;
+
+	InternalStats() :
+	  add_request_time(0),
+	  request_complete_time(0),
+	  add_request_count(0),
+	  request_complete_count(0)
+	{
+	  // empty
+	}
+      };
+
+      using ClientRespFunc = std::function<void(ClientId,
+						const TestResponse&,
+						const ServerId&,
+						const RespPm&,
+						const Cost)>;
+
+      using ServerAccumFunc = std::function<void(Accum& accumulator,
+						 const RespPm& additional)>;
+
+    protected:
+
+      const ServerId                 id;
+      Q*                             priority_queue;
+      ClientRespFunc                 client_resp_f;
+      int                            iops;
+      size_t                         thread_pool_size;
+
+      bool                           finishing;
+      std::chrono::microseconds      op_time;
+
+      std::mutex                     inner_queue_mtx;
+      std::condition_variable        inner_queue_cv;
+      std::deque<QueueItem>          inner_queue;
+
+      std::thread*                   threads;
+
+      using InnerQGuard = std::lock_guard<decltype(inner_queue_mtx)>;
+      using Lock = std::unique_lock<std::mutex>;
+
+      // data collection
+
+      ServerAccumFunc accum_f;
+      Accum accumulator;
+
+      InternalStats internal_stats;
+
+    public:
+
+      using CanHandleRequestFunc = std::function<bool(void)>;
+      using HandleRequestFunc =
+	std::function<void(const ClientId&,std::unique_ptr<TestRequest>,const RespPm&, uint64_t)>;
+      using CreateQueueF = std::function<Q*(CanHandleRequestFunc,HandleRequestFunc)>;
+					
+
+      SimulatedServer(ServerId _id,
+		      int _iops,
+		      size_t _thread_pool_size,
+		      const ClientRespFunc& _client_resp_f,
+		      const ServerAccumFunc& _accum_f,
+		      CreateQueueF _create_queue_f) :
+	id(_id),
+	priority_queue(_create_queue_f(std::bind(&SimulatedServer::has_avail_thread,
+						 this),
+				       std::bind(&SimulatedServer::inner_post,
+						 this,
+						 std::placeholders::_1,
+						 std::placeholders::_2,
+						 std::placeholders::_3,
+						 std::placeholders::_4))),
+	client_resp_f(_client_resp_f),
+	iops(_iops),
+	thread_pool_size(_thread_pool_size),
+	finishing(false),
+	accum_f(_accum_f)
+      {
+	op_time =
+	  std::chrono::microseconds((int) (0.5 +
+					   thread_pool_size * 1000000.0 / iops));
+	std::chrono::milliseconds finishing_check_period(1000);
+	threads = new std::thread[thread_pool_size];
+	for (size_t i = 0; i < thread_pool_size; ++i) {
+	  threads[i] = std::thread(&SimulatedServer::run, this, finishing_check_period);
+	}
+      }
+
+      virtual ~SimulatedServer() {
+	Lock l(inner_queue_mtx);
+	finishing = true;
+	inner_queue_cv.notify_all();
+	l.unlock();
+
+	for (size_t i = 0; i < thread_pool_size; ++i) {
+	  threads[i].join();
+	}
+
+	delete[] threads;
+
+	delete priority_queue;
+      }
+
+      void post(TestRequest&& request,
+		const ClientId& client_id,
+		const ReqPm& req_params,
+		const Cost request_cost)
+      {
+	time_stats(internal_stats.mtx,
+		   internal_stats.add_request_time,
+		   [&](){
+		     priority_queue->add_request(std::move(request),
+						 client_id,
+						 req_params,
+						 request_cost);
+		   });
+	count_stats(internal_stats.mtx,
+		    internal_stats.add_request_count);
+      }
+
+      bool has_avail_thread() {
+	InnerQGuard g(inner_queue_mtx);
+	return inner_queue.size() <= thread_pool_size;
+      }
+
+      const Accum& get_accumulator() const { return accumulator; }
+      const Q& get_priority_queue() const { return *priority_queue; }
+      const InternalStats& get_internal_stats() const { return internal_stats; }
+
+    protected:
+
+      void inner_post(const ClientId& client,
+		      std::unique_ptr<TestRequest> request,
+		      const RespPm& additional,
+		      const Cost request_cost) {
+	Lock l(inner_queue_mtx);
+	assert(!finishing);
+	accum_f(accumulator, additional);
+	inner_queue.emplace_back(QueueItem(client,
+					   std::move(request),
+					   additional,
+					   request_cost));
+	inner_queue_cv.notify_one();
+      }
+
+      void run(std::chrono::milliseconds check_period) {
+	Lock l(inner_queue_mtx);
+	while(true) {
+	  while(inner_queue.empty() && !finishing) {
+	    inner_queue_cv.wait_for(l, check_period);
+	  }
+	  if (!inner_queue.empty()) {
+	    auto& front = inner_queue.front();
+	    auto client = front.client;
+	    auto req = std::move(front.request);
+	    auto additional = front.additional;
+	    auto request_cost = front.request_cost;
+	    inner_queue.pop_front();
+
+	    l.unlock();
+
+	    // simulation operation by sleeping; then call function to
+	    // notify server of completion
+	    std::this_thread::sleep_for(op_time * request_cost);
+
+	    // TODO: rather than assuming this constructor exists, perhaps
+	    // pass in a function that does this mapping?
+	    client_resp_f(client, TestResponse{req->epoch}, id, additional, request_cost);
+
+	    time_stats(internal_stats.mtx,
+		       internal_stats.request_complete_time,
+		       [&](){
+			 priority_queue->request_completed();
+		       });
+	    count_stats(internal_stats.mtx,
+			internal_stats.request_complete_count);
+
+	    l.lock(); // in prep for next iteration of loop
+	  } else {
+	    break;
+	  }
+	}
+      }
+    }; // class SimulatedServer
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/src/dmclock/sim/src/simulate.h b/src/dmclock/sim/src/simulate.h
new file mode 100644
index 000000000..44a09ca31
--- /dev/null
+++ b/src/dmclock/sim/src/simulate.h
@@ -0,0 +1,448 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <assert.h>
+
+#include <memory>
+#include <chrono>
+#include <map>
+#include <random>
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+
+namespace crimson {
+  namespace qos_simulation {
+
+    template<typename ServerId, typename ClientId, typename TS, typename TC>
+    class Simulation {
+  
+    public:
+
+      using TimePoint = std::chrono::time_point<std::chrono::steady_clock>;
+
+    protected:
+
+      using ClientMap = std::map<ClientId,TC*>;
+      using ServerMap = std::map<ServerId,TS*>;
+
+      unsigned server_count = 0;
+      unsigned client_count = 0;
+
+      ServerMap servers;
+      ClientMap clients;
+      std::vector<ServerId> server_ids;
+
+      TimePoint early_time;
+      TimePoint servers_created_time;
+      TimePoint clients_created_time;
+      TimePoint clients_finished_time;
+      TimePoint late_time;
+
+      std::default_random_engine prng;
+
+      bool has_run = false;
+
+
+    public:
+
+      double fmt_tp(const TimePoint& t) {
+	auto c = t.time_since_epoch().count();
+	return uint64_t(c / 1000000.0 + 0.5) % 100000 / 1000.0;
+      }
+
+      TimePoint now() {
+	return std::chrono::steady_clock::now();
+      }
+
+      using ClientBasedServerSelectFunc =
+	std::function<const ServerId&(uint64_t, uint16_t)>;
+
+      using ClientFilter = std::function<bool(const ClientId&)>;
+
+      using ServerFilter = std::function<bool(const ServerId&)>;
+
+      using ServerDataOutF =
+	std::function<void(std::ostream& out,
+			   Simulation* sim, ServerFilter,
+			   int header_w, int data_w, int data_prec)>;
+
+      using ClientDataOutF =
+	std::function<void(std::ostream& out,
+			   Simulation* sim, ClientFilter,
+			   int header_w, int data_w, int data_prec)>;
+
+      Simulation() :
+	early_time(now()),
+	prng(std::chrono::system_clock::now().time_since_epoch().count())
+      {
+	// empty
+      }
+
+      ~Simulation() {
+	for (auto c : clients) {
+	  TC* cp = c.second;
+	  delete cp;
+	}
+
+	for (auto s : servers) {
+	  delete s.second;
+	}
+      }
+
+      unsigned get_client_count() const { return client_count; }
+      unsigned get_server_count() const { return server_count; }
+      TC& get_client(ClientId id) { return *clients[id]; }
+      TS& get_server(ServerId id) { return *servers[id]; }
+      const ServerId& get_server_id(std::size_t index) const {
+	return server_ids[index];
+      }
+
+
+      void add_servers(unsigned count,
+		       std::function<TS*(ServerId)> create_server_f) {
+	unsigned i = server_count;
+
+	// increment server_count before creating servers since they
+	// will start running immediately and may use the server_count
+	// value; NB: this could still be an issue if servers are
+	// added with multiple add_servers calls; consider using a
+	// separate start function after all servers (and clients?)
+	// have been added
+	server_count += count;
+
+	for (; i < server_count; ++i) {
+	  server_ids.push_back(i);
+	  servers[i] = create_server_f(i);
+	}
+
+	servers_created_time = now();
+      }
+
+
+      void add_clients(unsigned count,
+		       std::function<TC*(ClientId)> create_client_f) {
+	unsigned i = client_count;
+
+	// increment client_count before creating clients since they
+	// will start running immediately and may use the client_count
+	// value (e.g., in the server selection function); NB: this could
+	// still be an issue if clients are added with multiple
+	// add_clients calls; consider using a separate start function
+	// after all clients have been added
+	client_count += count;
+
+	for (; i < client_count; ++i) {
+	  clients[i] = create_client_f(i);
+	}
+
+	clients_created_time = now();
+      }
+
+
+      void run() {
+	assert(server_count > 0);
+	assert(client_count > 0);
+
+	std::cout << "simulation started" << std::endl;
+
+	// clients are now running; wait for all to finish
+
+	for (auto const &i : clients) {
+	  i.second->wait_until_done();
+	}
+
+	late_time = clients_finished_time = now();
+
+	std::cout << "simulation completed in " <<
+	  std::chrono::duration_cast<std::chrono::milliseconds>(clients_finished_time - servers_created_time).count() <<
+	  " millisecs" << std::endl;
+
+	has_run = true;
+      } // run
+
+
+      void display_stats(std::ostream& out,
+			 ServerDataOutF server_out_f, ClientDataOutF client_out_f,
+			 ServerFilter server_filter =
+			 [] (const ServerId&) { return true; },
+			 ClientFilter client_filter =
+			 [] (const ClientId&) { return true; },
+			 int head_w = 12, int data_w = 7, int data_prec = 2) {
+	assert(has_run);
+
+	// skip first 2 secondsd of data
+	const std::chrono::seconds skip_amount(0);
+	// calculate in groups of 5 seconds
+	const std::chrono::seconds measure_unit(2);
+	// unit to output reports in
+	const std::chrono::seconds report_unit(1);
+
+	// compute and display stats
+
+	TimePoint earliest_start = late_time;
+	TimePoint latest_start = early_time;
+	TimePoint earliest_finish = late_time;
+	TimePoint latest_finish = early_time;
+
+	for (auto const &c : clients) {
+	  auto start = c.second->get_op_times().front();
+	  auto end = c.second->get_op_times().back();
+
+	  if (start < earliest_start) { earliest_start = start; }
+	  if (start > latest_start) { latest_start = start; }
+	  if (end < earliest_finish) { earliest_finish = end; }
+	  if (end > latest_finish) { latest_finish = end; }
+	}
+
+	double ops_factor =
+	  std::chrono::duration_cast<std::chrono::duration<double>>(measure_unit) /
+	  std::chrono::duration_cast<std::chrono::duration<double>>(report_unit);
+
+	const auto start_edge = clients_created_time + skip_amount;
+
+	std::map<ClientId,std::vector<double>> ops_data;
+
+	for (auto const &c : clients) {
+	  auto it = c.second->get_op_times().begin();
+	  const auto end = c.second->get_op_times().end();
+	  while (it != end && *it < start_edge) { ++it; }
+
+	  for (auto time_edge = start_edge + measure_unit;
+	       time_edge <= latest_finish + measure_unit;
+	       time_edge += measure_unit) {
+	    int count = 0;
+	    for (; it != end && *it < time_edge; ++count, ++it) { /* empty */ }
+	    double ops_per_second = double(count) / ops_factor;
+	    ops_data[c.first].push_back(ops_per_second);
+	  }
+	}
+
+	out << "==== Client Data ====" << std::endl;
+
+	out << std::setw(head_w) << "client:";
+	for (auto const &c : clients) {
+	  if (!client_filter(c.first)) continue;
+	  out << " " << std::setw(data_w) << c.first;
+	}
+	out << std::setw(data_w) << "total" << std::endl;
+
+	{
+	  bool has_data;
+	  size_t i = 0;
+	  do {
+	    std::string line_header = "t_" + std::to_string(i) + ":";
+	    out << std::setw(head_w) << line_header;
+	    has_data = false;
+	    double total = 0.0;
+	    for (auto const &c : clients) {
+	      double data = 0.0;
+	      if (i < ops_data[c.first].size()) {
+		data = ops_data[c.first][i];
+		has_data = true;
+	      }
+	      total += data;
+
+	      if (!client_filter(c.first)) continue;
+
+	      out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+		std::fixed << data;
+	    }
+	    out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+	      std::fixed << total << std::endl;
+	    ++i;
+	  } while(has_data);
+	}
+
+	client_out_f(out, this, client_filter, head_w, data_w, data_prec);
+
+	display_client_internal_stats<std::chrono::nanoseconds>(out,
+								"nanoseconds");
+
+	out << std::endl << "==== Server Data ====" << std::endl;
+
+	out << std::setw(head_w) << "server:";
+	for (auto const &s : servers) {
+	  if (!server_filter(s.first)) continue;
+	  out << " " << std::setw(data_w) << s.first;
+	}
+	out << " " << std::setw(data_w) << "total" << std::endl;
+
+	server_out_f(out, this, server_filter, head_w, data_w, data_prec);
+
+	display_server_internal_stats<std::chrono::nanoseconds>(out,
+								"nanoseconds");
+
+	// clean up clients then servers
+
+	for (auto i = clients.begin(); i != clients.end(); ++i) {
+	  delete i->second;
+	  i->second = nullptr;
+	}
+
+	for (auto i = servers.begin(); i != servers.end(); ++i) {
+	  delete i->second;
+	  i->second = nullptr;
+	}
+      } // display_stats
+
+
+      template<typename T>
+      void display_server_internal_stats(std::ostream& out,
+					 const std::string& time_unit) {
+	T add_request_time(0);
+	T request_complete_time(0);
+	uint32_t add_request_count = 0;
+	uint32_t request_complete_count = 0;
+
+	for (unsigned i = 0; i < get_server_count(); ++i) {
+	  const auto& server = get_server(i);
+	  const auto& is = server.get_internal_stats();
+	  add_request_time +=
+	    std::chrono::duration_cast<T>(is.add_request_time);
+	  request_complete_time +=
+	    std::chrono::duration_cast<T>(is.request_complete_time);
+	  add_request_count += is.add_request_count;
+	  request_complete_count += is.request_complete_count;
+	}
+
+	double add_request_time_per_unit =
+	  double(add_request_time.count()) / add_request_count ;
+	out << "total time to add requests: " <<
+	  std::fixed << add_request_time.count() << " " << time_unit <<
+	  ";" << std::endl <<
+	  "    count: " << add_request_count << ";" << std::endl <<
+	  "    average: " << add_request_time_per_unit <<
+	  " " << time_unit << " per request/response" << std::endl;
+
+	double request_complete_time_unit =
+	  double(request_complete_time.count()) / request_complete_count ;
+	out << "total time to note requests complete: " << std::fixed <<
+	  request_complete_time.count() << " " << time_unit << ";" <<
+	  std::endl << 
+	  "    count: " << request_complete_count << ";" << std::endl <<
+	  "    average: " << request_complete_time_unit <<
+	  " " << time_unit << " per request/response" << std::endl;
+
+	out << std::endl;
+
+	assert(add_request_count == request_complete_count);
+	out << "server timing for QOS algorithm: " <<
+	  add_request_time_per_unit + request_complete_time_unit <<
+	  " " << time_unit << " per request/response" << std::endl;
+      }
+
+
+      template<typename T>
+      void display_client_internal_stats(std::ostream& out,
+					 const std::string& time_unit) {
+	T track_resp_time(0);
+	T get_req_params_time(0);
+	uint32_t track_resp_count = 0;
+	uint32_t get_req_params_count = 0;
+
+	for (unsigned i = 0; i < get_client_count(); ++i) {
+	  const auto& client = get_client(i);
+	  const auto& is = client.get_internal_stats();
+	  track_resp_time +=
+	    std::chrono::duration_cast<T>(is.track_resp_time);
+	  get_req_params_time +=
+	    std::chrono::duration_cast<T>(is.get_req_params_time);
+	  track_resp_count += is.track_resp_count;
+	  get_req_params_count += is.get_req_params_count;
+	}
+
+	double track_resp_time_unit =
+	  double(track_resp_time.count()) / track_resp_count;
+	out << "total time to track responses: " <<
+	  std::fixed << track_resp_time.count() << " " << time_unit << ";" <<
+	  std::endl <<
+	  "    count: " << track_resp_count << ";" << std::endl <<
+	  "    average: " << track_resp_time_unit << " " << time_unit <<
+	  " per request/response" << std::endl;
+
+	double get_req_params_time_unit =
+	  double(get_req_params_time.count()) / get_req_params_count;
+	out << "total time to get request parameters: " <<
+	  std::fixed << get_req_params_time.count() << " " << time_unit <<
+	  ";" << std::endl <<
+	  "    count: " << get_req_params_count << ";" << std::endl <<
+	  "    average: " << get_req_params_time_unit << " " << time_unit <<
+	  " per request/response" << std::endl;
+
+	out << std::endl;
+
+	assert(track_resp_count == get_req_params_count);
+	out << "client timing for QOS algorithm: " <<
+	  track_resp_time_unit + get_req_params_time_unit << " " <<
+	  time_unit << " per request/response" << std::endl;
+      }
+
+
+      // **** server selection functions ****
+
+
+      const ServerId& server_select_alternate(uint64_t seed,
+					      uint16_t client_idx) {
+	size_t index = (client_idx + seed) % server_count;
+	return server_ids[index];
+      }
+
+
+      // returns a lambda using the range specified as servers_per (client)
+      ClientBasedServerSelectFunc
+      make_server_select_alt_range(uint16_t servers_per) {
+	return [servers_per,this](uint64_t seed, uint16_t client_idx)
+	  -> const ServerId& {
+	  double factor = double(server_count) / client_count;
+	  size_t offset = seed % servers_per;
+	  size_t index = (size_t(0.5 + client_idx * factor) + offset) % server_count;
+	  return server_ids[index];
+	};
+      }
+
+
+      // function to choose a server randomly
+      const ServerId& server_select_random(uint64_t seed, uint16_t client_idx) {
+	size_t index = prng() % server_count;
+	return server_ids[index];
+      }
+
+  
+      // function to choose a server randomly
+      ClientBasedServerSelectFunc
+      make_server_select_ran_range(uint16_t servers_per) {
+	return [servers_per,this](uint64_t seed, uint16_t client_idx)
+	  -> const ServerId& {
+	  double factor = double(server_count) / client_count;
+	  size_t offset = prng() % servers_per;
+	  size_t index = (size_t(0.5 + client_idx * factor) + offset) % server_count;
+	  return server_ids[index];
+	};
+      }
+
+
+      // function to always choose the first server
+      const ServerId& server_select_0(uint64_t seed, uint16_t client_idx) {
+	return server_ids[0];
+      }
+    }; // class Simulation
+
+  }; // namespace qos_simulation
+}; // namespace crimson
diff --git a/src/dmclock/sim/src/ssched/ssched_client.h b/src/dmclock/sim/src/ssched/ssched_client.h
new file mode 100644
index 000000000..6764a09a8
--- /dev/null
+++ b/src/dmclock/sim/src/ssched/ssched_client.h
@@ -0,0 +1,51 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+#include "ssched_recs.h"
+
+
+namespace crimson {
+  namespace simple_scheduler {
+
+    // S is server identifier type
+    template<typename S>
+    class ServiceTracker {
+
+    public:
+
+      // we have to start the counters at 1, as 0 is used in the
+      // cleaning process
+      ServiceTracker()
+      {
+	// empty
+      }
+
+      void track_resp(const S& server_id,
+		      const NullData& ignore,
+		      uint64_t request_cost) {
+	// empty
+      }
+
+      /*
+       * Returns the ReqParams for the given server.
+       */
+      ReqParams get_req_params(const S& server) {
+	return ReqParams();
+      } // get_req_params
+    }; // class ServiceTracker
+  } // namespace simple_scheduler
+} // namespace crimson
diff --git a/src/dmclock/sim/src/ssched/ssched_recs.h b/src/dmclock/sim/src/ssched/ssched_recs.h
new file mode 100644
index 000000000..935e678c1
--- /dev/null
+++ b/src/dmclock/sim/src/ssched/ssched_recs.h
@@ -0,0 +1,44 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <ostream>
+#include <assert.h>
+
+
+namespace crimson {
+  namespace simple_scheduler {
+
+    // since we send no additional data out
+    // NOTE: Change name to RespParams? Is it used elsewhere?
+    struct NullData {
+      friend std::ostream& operator<<(std::ostream& out, const NullData& n) {
+	out << "NullData{ EMPTY }";
+	return out;
+      }
+    }; // struct NullData
+
+
+    struct ReqParams {
+      friend std::ostream& operator<<(std::ostream& out, const ReqParams& rp) {
+	out << "ReqParams{ EMPTY }";
+	return out;
+      }
+    };
+
+  }
+}
diff --git a/src/dmclock/sim/src/ssched/ssched_server.h b/src/dmclock/sim/src/ssched/ssched_server.h
new file mode 100644
index 000000000..c4e057a88
--- /dev/null
+++ b/src/dmclock/sim/src/ssched/ssched_server.h
@@ -0,0 +1,194 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <deque>
+#include <functional>
+
+#include "boost/variant.hpp"
+
+#include "ssched_recs.h"
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+namespace crimson {
+
+  namespace simple_scheduler {
+
+    template<typename C, typename R, typename Time>
+    class SimpleQueue {
+
+    public:
+
+      using RequestRef = std::unique_ptr<R>;
+
+      // a function to see whether the server can handle another request
+      using CanHandleRequestFunc = std::function<bool(void)>;
+
+      // a function to submit a request to the server; the second
+      // parameter is a callback when it's completed
+      using HandleRequestFunc =
+	std::function<void(const C&,RequestRef,NullData,uint64_t)>;
+
+      struct PullReq {
+	enum class Type { returning, none };
+
+	struct Retn {
+	  C           client;
+	  RequestRef  request;
+	};
+
+	Type                 type;
+	boost::variant<Retn> data;
+      };
+
+    protected:
+
+      enum class Mechanism { push, pull };
+
+      struct QRequest {
+	C          client;
+	RequestRef request;
+      };
+
+      bool finishing = false;
+      Mechanism mechanism;
+
+      CanHandleRequestFunc can_handle_f;
+      HandleRequestFunc handle_f;
+
+      mutable std::mutex queue_mtx;
+      using DataGuard = std::lock_guard<decltype(queue_mtx)>;
+
+      std::deque<QRequest> queue;
+
+#ifdef PROFILE
+    public:
+      ProfileTimer<std::chrono::nanoseconds> pull_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> request_complete_timer;
+    protected:
+#endif
+
+    public:
+
+      // push full constructor
+      SimpleQueue(CanHandleRequestFunc _can_handle_f,
+		  HandleRequestFunc _handle_f) :
+	mechanism(Mechanism::push),
+	can_handle_f(_can_handle_f),
+	handle_f(_handle_f)
+      {
+	// empty
+      }
+
+      SimpleQueue() :
+	mechanism(Mechanism::pull)
+      {
+	// empty
+      }
+
+      ~SimpleQueue() {
+	finishing = true;
+      }
+
+      void add_request(R&& request,
+		       const C& client_id,
+		       const ReqParams& req_params,
+		       uint64_t request_cost) {
+	add_request(RequestRef(new R(std::move(request))),
+		    client_id, req_params, request_cost);
+      }
+
+      void add_request(RequestRef&& request,
+		       const C& client_id,
+		       const ReqParams& req_params,
+		       uint64_t request_cost) {
+	DataGuard g(queue_mtx);
+
+#ifdef PROFILE
+	add_request_timer.start();
+#endif
+	queue.emplace_back(QRequest{client_id, std::move(request)});
+
+	if (Mechanism::push == mechanism) {
+	  schedule_request();
+	}
+
+#ifdef PROFILE
+	add_request_timer.stop();
+#endif
+      } // add_request
+
+      void request_completed() {
+	assert(Mechanism::push == mechanism);
+	DataGuard g(queue_mtx);
+
+#ifdef PROFILE
+	request_complete_timer.start();
+#endif
+	schedule_request();
+
+#ifdef PROFILE
+	request_complete_timer.stop();
+#endif
+      } // request_completed
+
+      PullReq pull_request() {
+	assert(Mechanism::pull == mechanism);
+	PullReq result;
+	DataGuard g(queue_mtx);
+
+#ifdef PROFILE
+	pull_request_timer.start();
+#endif
+
+	if (queue.empty()) {
+	  result.type = PullReq::Type::none;
+	} else {
+	  auto front = queue.front();
+	  result.type = PullReq::Type::returning;
+	  result.data =
+	    typename PullReq::Retn{front.client, std::move(front.request)};
+	  queue.pop();
+	}
+
+#ifdef PROFILE
+	pull_request_timer.stop();
+#endif
+
+	return result;
+      }
+
+    protected:
+
+      // queue_mtx should be held when called; should only be called
+      // when mechanism is push
+      void schedule_request() {
+	if (!queue.empty() && can_handle_f()) {
+	  auto& front = queue.front();
+	  static NullData null_data;
+	  handle_f(front.client, std::move(front.request), null_data, 1u);
+	  queue.pop_front();
+	}
+      }
+    };
+  };
+};
diff --git a/src/dmclock/sim/src/str_list.cc b/src/dmclock/sim/src/str_list.cc
new file mode 100644
index 000000000..22109e008
--- /dev/null
+++ b/src/dmclock/sim/src/str_list.cc
@@ -0,0 +1,106 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2009-2010 Dreamhost
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#include "str_list.h"
+
+using std::string;
+using std::vector;
+using std::set;
+using std::list;
+
+static bool get_next_token(const string &s, size_t& pos, const char *delims, string& token)
+{
+  int start = s.find_first_not_of(delims, pos);
+  int end;
+
+  if (start < 0){
+    pos = s.size();
+    return false;
+  }
+
+  end = s.find_first_of(delims, start);
+  if (end >= 0)
+    pos = end + 1;
+  else {
+    pos = end = s.size();
+  }
+
+  token = s.substr(start, end - start);
+  return true;
+}
+
+void get_str_list(const string& str, const char *delims, list<string>& str_list)
+{
+  size_t pos = 0;
+  string token;
+
+  str_list.clear();
+
+  while (pos < str.size()) {
+    if (get_next_token(str, pos, delims, token)) {
+      if (token.size() > 0) {
+        str_list.push_back(token);
+      }
+    }
+  }
+}
+
+void get_str_list(const string& str, list<string>& str_list)
+{
+  const char *delims = ";,= \t";
+  return get_str_list(str, delims, str_list);
+}
+
+void get_str_vec(const string& str, const char *delims, vector<string>& str_vec)
+{
+  size_t pos = 0;
+  string token;
+  str_vec.clear();
+
+  while (pos < str.size()) {
+    if (get_next_token(str, pos, delims, token)) {
+      if (token.size() > 0) {
+        str_vec.push_back(token);
+      }
+    }
+  }
+}
+
+void get_str_vec(const string& str, vector<string>& str_vec)
+{
+  const char *delims = ";,= \t";
+  return get_str_vec(str, delims, str_vec);
+}
+
+void get_str_set(const string& str, const char *delims, set<string>& str_set)
+{
+  size_t pos = 0;
+  string token;
+
+  str_set.clear();
+
+  while (pos < str.size()) {
+    if (get_next_token(str, pos, delims, token)) {
+      if (token.size() > 0) {
+        str_set.insert(token);
+      }
+    }
+  }
+}
+
+void get_str_set(const string& str, set<string>& str_set)
+{
+  const char *delims = ";,= \t";
+  return get_str_set(str, delims, str_set);
+}
diff --git a/src/dmclock/sim/src/str_list.h b/src/dmclock/sim/src/str_list.h
new file mode 100644
index 000000000..4a6dcc57f
--- /dev/null
+++ b/src/dmclock/sim/src/str_list.h
@@ -0,0 +1,109 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Forked from Red Hat's Ceph project.
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#ifndef CEPH_STRLIST_H
+#define CEPH_STRLIST_H
+
+#include <list>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+
+/**
+ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as list
+ * @param [out] str_list List modified containing str after it has been split
+**/
+extern void get_str_list(const std::string& str,
+			 std::list<std::string>& str_list);
+
+/**
+ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as list
+ * @param [in] delims characters used to split **str**
+ * @param [out] str_list List modified containing str after it has been split
+**/
+extern void get_str_list(const std::string& str,
+                         const char *delims,
+			 std::list<std::string>& str_list);
+
+/**
+ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_vec**.
+ * 
+ * @param [in] str String to split and save as Vector
+ * @param [out] str_vec Vector modified containing str after it has been split
+**/
+extern void get_str_vec(const std::string& str,
+			 std::vector<std::string>& str_vec);
+
+/**
+ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_vec**.
+ * 
+ * @param [in] str String to split and save as Vector
+ * @param [in] delims characters used to split **str**
+ * @param [out] str_vec Vector modified containing str after it has been split
+**/
+extern void get_str_vec(const std::string& str,
+                         const char *delims,
+			 std::vector<std::string>& str_vec);
+
+/**
+ * Split **str** into a list of strings, using the ";,= \t" delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as Set
+ * @param [out] str_list Set modified containing str after it has been split
+**/
+extern void get_str_set(const std::string& str,
+			std::set<std::string>& str_list);
+
+/**
+ * Split **str** into a list of strings, using the **delims** delimiters and output the result in **str_list**.
+ * 
+ * @param [in] str String to split and save as Set
+ * @param [in] delims characters used to split **str**
+ * @param [out] str_list Set modified containing str after it has been split
+**/
+extern void get_str_set(const std::string& str,
+                        const char *delims,
+			std::set<std::string>& str_list);
+
+/**
+ * Return a String containing the vector **v** joined with **sep**
+ * 
+ * If **v** is empty, the function returns an empty string
+ * For each element in **v**,
+ * it will concatenate this element and **sep** with result
+ * 
+ * @param [in] v Vector to join as a String
+ * @param [in] sep String used to join each element from **v**
+ * @return empty string if **v** is empty or concatenated string
+**/
+inline std::string str_join(const std::vector<std::string>& v, const std::string& sep)
+{
+  if (v.empty())
+    return std::string();
+  std::vector<std::string>::const_iterator i = v.begin();
+  std::string r = *i;
+  for (++i; i != v.end(); ++i) {
+    r += sep;
+    r += *i;
+  }
+  return r;
+}
+
+#endif
diff --git a/src/dmclock/sim/src/test_dmclock.cc b/src/dmclock/sim/src/test_dmclock.cc
new file mode 100644
index 000000000..402a47335
--- /dev/null
+++ b/src/dmclock/sim/src/test_dmclock.cc
@@ -0,0 +1,55 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+#include <type_traits>
+
+#include "dmclock_recs.h"
+#include "dmclock_server.h"
+#include "dmclock_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "test_dmclock.h"
+
+
+namespace test = crimson::test_dmc;
+
+
+// Note: if this static_assert fails then our two definitions of Cost
+// do not match; change crimson::qos_simulation::Cost to match the
+// definition of crimson::dmclock::Cost.
+static_assert(std::is_same<crimson::qos_simulation::Cost,crimson::dmclock::Cost>::value,
+	      "Please make sure the simulator type crimson::qos_simulation::Cost matches the dmclock type crimson::dmclock::Cost.");
+
+
+void test::dmc_server_accumulate_f(test::DmcAccum& a,
+				   const test::dmc::PhaseType& phase) {
+  if (test::dmc::PhaseType::reservation == phase) {
+    ++a.reservation_count;
+  } else {
+    ++a.proportion_count;
+  }
+}
+
+
+void test::dmc_client_accumulate_f(test::DmcAccum& a,
+				   const test::dmc::PhaseType& phase) {
+  if (test::dmc::PhaseType::reservation == phase) {
+    ++a.reservation_count;
+  } else {
+    ++a.proportion_count;
+  }
+}
diff --git a/src/dmclock/sim/src/test_dmclock.h b/src/dmclock/sim/src/test_dmclock.h
new file mode 100644
index 000000000..1c7a55968
--- /dev/null
+++ b/src/dmclock/sim/src/test_dmclock.h
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include "dmclock_recs.h"
+#include "dmclock_server.h"
+#include "dmclock_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "simulate.h"
+
+
+namespace crimson {
+  namespace test_dmc {
+    
+    namespace dmc = crimson::dmclock;
+    namespace sim = crimson::qos_simulation;
+
+    struct DmcAccum {
+      uint64_t reservation_count = 0;
+      uint64_t proportion_count = 0;
+    };
+
+    using DmcQueue = dmc::PushPriorityQueue<ClientId,sim::TestRequest>;
+    using DmcServiceTracker = dmc::ServiceTracker<ServerId,dmc::OrigTracker>;
+
+    using DmcServer = sim::SimulatedServer<DmcQueue,
+					   dmc::ReqParams,
+					   dmc::PhaseType,
+					   DmcAccum>;
+
+    using DmcClient = sim::SimulatedClient<DmcServiceTracker,
+					   dmc::ReqParams,
+					   dmc::PhaseType,
+					   DmcAccum>;
+
+    using CreateQueueF = std::function<DmcQueue*(DmcQueue::CanHandleRequestFunc,
+						 DmcQueue::HandleRequestFunc)>;
+
+    using MySim = sim::Simulation<ServerId,ClientId,DmcServer,DmcClient>;
+
+    using SubmitFunc = DmcClient::SubmitFunc;
+
+    extern void dmc_server_accumulate_f(DmcAccum& a,
+					const dmc::PhaseType& phase);
+
+    extern void dmc_client_accumulate_f(DmcAccum& a,
+					const dmc::PhaseType& phase);
+  } // namespace test_dmc
+} // namespace crimson
diff --git a/src/dmclock/sim/src/test_dmclock_main.cc b/src/dmclock/sim/src/test_dmclock_main.cc
new file mode 100644
index 000000000..5cf656608
--- /dev/null
+++ b/src/dmclock/sim/src/test_dmclock_main.cc
@@ -0,0 +1,342 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include "test_dmclock.h"
+#include "config.h"
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+
+namespace dmc = crimson::dmclock;
+namespace test = crimson::test_dmc;
+namespace sim = crimson::qos_simulation;
+
+using namespace std::placeholders;
+
+
+namespace crimson {
+  namespace test_dmc {
+    void server_data(std::ostream& out,
+		     test::MySim* sim,
+		     test::MySim::ServerFilter server_disp_filter,
+		     int head_w, int data_w, int data_prec);
+
+    void client_data(std::ostream& out,
+		     test::MySim* sim,
+		     test::MySim::ClientFilter client_disp_filter,
+		     int head_w, int data_w, int data_prec);
+  }
+}
+
+
+int main(int argc, char* argv[]) {
+  std::vector<const char*> args;
+  for (int i = 1; i < argc; ++i) {
+    args.push_back(argv[i]);
+  }
+
+  std::string conf_file_list;
+  sim::ceph_argparse_early_args(args, &conf_file_list);
+
+  sim::sim_config_t g_conf;
+  std::vector<sim::cli_group_t> &cli_group = g_conf.cli_group;
+  std::vector<sim::srv_group_t> &srv_group = g_conf.srv_group;
+
+  if (!conf_file_list.empty()) {
+    int ret;
+    ret = sim::parse_config_file(conf_file_list, g_conf);
+    if (ret) {
+      // error
+      _exit(1);
+    }
+  } else {
+    // default simulation parameter
+    g_conf.client_groups = 2;
+
+    sim::srv_group_t st;
+    srv_group.push_back(st);
+
+    sim::cli_group_t ct1(99, 0);
+    cli_group.push_back(ct1);
+
+    sim::cli_group_t ct2(1, 10);
+    cli_group.push_back(ct2);
+  }
+
+  const unsigned server_groups = g_conf.server_groups;
+  const unsigned client_groups = g_conf.client_groups;
+  const bool server_random_selection = g_conf.server_random_selection;
+  const bool server_soft_limit = g_conf.server_soft_limit;
+  const double anticipation_timeout = g_conf.anticipation_timeout;
+  unsigned server_total_count = 0;
+  unsigned client_total_count = 0;
+
+  for (unsigned i = 0; i < client_groups; ++i) {
+    client_total_count += cli_group[i].client_count;
+  }
+
+  for (unsigned i = 0; i < server_groups; ++i) {
+    server_total_count += srv_group[i].server_count;
+  }
+
+  std::vector<test::dmc::ClientInfo> client_info;
+  for (unsigned i = 0; i < client_groups; ++i) {
+    client_info.push_back(test::dmc::ClientInfo
+			  { cli_group[i].client_reservation,
+			      cli_group[i].client_weight,
+			      cli_group[i].client_limit } );
+  }
+
+  auto ret_client_group_f = [&](const ClientId& c) -> unsigned {
+    unsigned group_max = 0;
+    unsigned i = 0;
+    for (; i < client_groups; ++i) {
+      group_max += cli_group[i].client_count;
+      if (c < group_max) {
+	break;
+      }
+    }
+    return i;
+  };
+
+  auto ret_server_group_f = [&](const ServerId& s) -> unsigned {
+    unsigned group_max = 0;
+    unsigned i = 0;
+    for (; i < server_groups; ++i) {
+      group_max += srv_group[i].server_count;
+      if (s < group_max) {
+	break;
+      }
+    }
+    return i;
+  };
+
+  auto client_info_f =
+    [=](const ClientId& c) -> const test::dmc::ClientInfo* {
+    return &client_info[ret_client_group_f(c)];
+  };
+
+  auto client_disp_filter = [=] (const ClientId& i) -> bool {
+    return i < 3 || i >= (client_total_count - 3);
+  };
+
+  auto server_disp_filter = [=] (const ServerId& i) -> bool {
+    return i < 3 || i >= (server_total_count - 3);
+  };
+
+
+  test::MySim *simulation;
+
+
+  // lambda to post a request to the identified server; called by client
+  test::SubmitFunc server_post_f =
+    [&simulation,
+     &cli_group,
+     &ret_client_group_f](const ServerId& server,
+			  sim::TestRequest&& request,
+			  const ClientId& client_id,
+			  const test::dmc::ReqParams& req_params) {
+    test::DmcServer& s = simulation->get_server(server);
+    sim::Cost request_cost = cli_group[ret_client_group_f(client_id)].client_req_cost;
+    s.post(std::move(request), client_id, req_params, request_cost);
+  };
+
+  std::vector<std::vector<sim::CliInst>> cli_inst;
+  for (unsigned i = 0; i < client_groups; ++i) {
+    if (cli_group[i].client_wait == std::chrono::seconds(0)) {
+      cli_inst.push_back(
+	{ { sim::req_op,
+	      (uint32_t)cli_group[i].client_total_ops,
+	      (double)cli_group[i].client_iops_goal,
+	      (uint16_t)cli_group[i].client_outstanding_ops } } );
+    } else {
+      cli_inst.push_back(
+	{ { sim::wait_op, cli_group[i].client_wait },
+	  { sim::req_op,
+	      (uint32_t)cli_group[i].client_total_ops,
+	      (double)cli_group[i].client_iops_goal,
+	      (uint16_t)cli_group[i].client_outstanding_ops } } );
+    }
+  }
+
+  simulation = new test::MySim();
+
+  test::DmcServer::ClientRespFunc client_response_f =
+    [&simulation](ClientId client_id,
+		  const sim::TestResponse& resp,
+		  const ServerId& server_id,
+		  const dmc::PhaseType& phase,
+		  const sim::Cost request_cost) {
+    simulation->get_client(client_id).receive_response(resp,
+						       server_id,
+						       phase,
+						       request_cost);
+  };
+
+  test::CreateQueueF create_queue_f =
+    [&](test::DmcQueue::CanHandleRequestFunc can_f,
+	test::DmcQueue::HandleRequestFunc handle_f) -> test::DmcQueue* {
+    return new test::DmcQueue(client_info_f,
+			      can_f,
+			      handle_f,
+			      server_soft_limit ? dmc::AtLimit::Allow : dmc::AtLimit::Wait,
+			      anticipation_timeout);
+  };
+
+
+  auto create_server_f = [&](ServerId id) -> test::DmcServer* {
+    unsigned i = ret_server_group_f(id);
+    return new test::DmcServer(id,
+			       srv_group[i].server_iops,
+			       srv_group[i].server_threads,
+			       client_response_f,
+			       test::dmc_server_accumulate_f,
+			       create_queue_f);
+  };
+
+  auto create_client_f = [&](ClientId id) -> test::DmcClient* {
+    unsigned i = ret_client_group_f(id);
+    test::MySim::ClientBasedServerSelectFunc server_select_f;
+    unsigned client_server_select_range = cli_group[i].client_server_select_range;
+    if (!server_random_selection) {
+      server_select_f = simulation->make_server_select_alt_range(client_server_select_range);
+    } else {
+      server_select_f = simulation->make_server_select_ran_range(client_server_select_range);
+    }
+    return new test::DmcClient(id,
+			       server_post_f,
+			       std::bind(server_select_f, _1, id),
+			       test::dmc_client_accumulate_f,
+			       cli_inst[i]);
+  };
+
+#if 1
+  std::cout << "[global]" << std::endl << g_conf << std::endl;
+  for (unsigned i = 0; i < client_groups; ++i) {
+    std::cout << std::endl << "[client." << i << "]" << std::endl;
+    std::cout << cli_group[i] << std::endl;
+  }
+  for (unsigned i = 0; i < server_groups; ++i) {
+    std::cout << std::endl << "[server." << i << "]" << std::endl;
+    std::cout << srv_group[i] << std::endl;
+  }
+  std::cout << std::endl;
+#endif
+
+  simulation->add_servers(server_total_count, create_server_f);
+  simulation->add_clients(client_total_count, create_client_f);
+
+  simulation->run();
+  simulation->display_stats(std::cout,
+			    &test::server_data, &test::client_data,
+			    server_disp_filter, client_disp_filter);
+
+  delete simulation;
+} // main
+
+
+void test::client_data(std::ostream& out,
+		       test::MySim* sim,
+		       test::MySim::ClientFilter client_disp_filter,
+		       int head_w, int data_w, int data_prec) {
+  // report how many ops were done by reservation and proportion for
+  // each client
+
+  int total_r = 0;
+  out << std::setw(head_w) << "res_ops:";
+  for (unsigned i = 0; i < sim->get_client_count(); ++i) {
+    const auto& client = sim->get_client(i);
+    auto r = client.get_accumulator().reservation_count;
+    total_r += r;
+    if (!client_disp_filter(i)) continue;
+    out << " " << std::setw(data_w) << r;
+  }
+  out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+    std::fixed << total_r << std::endl;
+
+  int total_p = 0;
+  out << std::setw(head_w) << "prop_ops:";
+  for (unsigned i = 0; i < sim->get_client_count(); ++i) {
+    const auto& client = sim->get_client(i);
+    auto p = client.get_accumulator().proportion_count;
+    total_p += p;
+    if (!client_disp_filter(i)) continue;
+    out << " " << std::setw(data_w) << p;
+  }
+  out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+    std::fixed << total_p << std::endl;
+}
+
+
+void test::server_data(std::ostream& out,
+		       test::MySim* sim,
+		       test::MySim::ServerFilter server_disp_filter,
+		       int head_w, int data_w, int data_prec) {
+  out << std::setw(head_w) << "res_ops:";
+  int total_r = 0;
+  for (unsigned i = 0; i < sim->get_server_count(); ++i) {
+    const auto& server = sim->get_server(i);
+    auto rc = server.get_accumulator().reservation_count;
+    total_r += rc;
+    if (!server_disp_filter(i)) continue;
+    out << " " << std::setw(data_w) << rc;
+  }
+  out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+    std::fixed << total_r << std::endl;
+
+  out << std::setw(head_w) << "prop_ops:";
+  int total_p = 0;
+  for (unsigned i = 0; i < sim->get_server_count(); ++i) {
+    const auto& server = sim->get_server(i);
+    auto pc = server.get_accumulator().proportion_count;
+    total_p += pc;
+    if (!server_disp_filter(i)) continue;
+    out << " " << std::setw(data_w) << pc;
+  }
+  out << " " << std::setw(data_w) << std::setprecision(data_prec) <<
+    std::fixed << total_p << std::endl;
+
+  const auto& q = sim->get_server(0).get_priority_queue();
+  out << std::endl <<
+    " k-way heap: " << q.get_heap_branching_factor() << std::endl
+      << std::endl;
+
+#ifdef PROFILE
+  crimson::ProfileCombiner<std::chrono::nanoseconds> art_combiner;
+  crimson::ProfileCombiner<std::chrono::nanoseconds> rct_combiner;
+  for (unsigned i = 0; i < sim->get_server_count(); ++i) {
+    const auto& q = sim->get_server(i).get_priority_queue();
+    const auto& art = q.add_request_timer;
+    art_combiner.combine(art);
+    const auto& rct = q.request_complete_timer;
+    rct_combiner.combine(rct);
+  }
+  out << "Server add_request_timer: count:" << art_combiner.get_count() <<
+    ", mean:" << art_combiner.get_mean() <<
+    ", std_dev:" << art_combiner.get_std_dev() <<
+    ", low:" << art_combiner.get_low() <<
+    ", high:" << art_combiner.get_high() << std::endl;
+  out << "Server request_complete_timer: count:" << rct_combiner.get_count() <<
+    ", mean:" << rct_combiner.get_mean() <<
+    ", std_dev:" << rct_combiner.get_std_dev() <<
+    ", low:" << rct_combiner.get_low() <<
+    ", high:" << rct_combiner.get_high() << std::endl;
+  out << "Server combined mean: " <<
+    (art_combiner.get_mean() + rct_combiner.get_mean()) <<
+    std::endl;
+#endif
+}
diff --git a/src/dmclock/sim/src/test_ssched.cc b/src/dmclock/sim/src/test_ssched.cc
new file mode 100644
index 000000000..b06273dc0
--- /dev/null
+++ b/src/dmclock/sim/src/test_ssched.cc
@@ -0,0 +1,40 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include "ssched_recs.h"
+#include "ssched_server.h"
+#include "ssched_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "test_ssched.h"
+
+
+namespace test = crimson::test_simple_scheduler;
+namespace ssched = crimson::simple_scheduler;
+
+
+void test::simple_server_accumulate_f(test::SimpleAccum& a,
+				      const ssched::NullData& add_info) {
+  ++a.request_count;
+}
+
+
+void test::simple_client_accumulate_f(test::SimpleAccum& a,
+				      const ssched::NullData& ignore) {
+  // empty
+}
diff --git a/src/dmclock/sim/src/test_ssched.h b/src/dmclock/sim/src/test_ssched.h
new file mode 100644
index 000000000..0d778709a
--- /dev/null
+++ b/src/dmclock/sim/src/test_ssched.h
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include "ssched_server.h"
+#include "ssched_client.h"
+
+#include "sim_recs.h"
+#include "sim_server.h"
+#include "sim_client.h"
+
+#include "simulate.h"
+
+
+namespace crimson {
+  namespace test_simple_scheduler {
+
+    namespace ssched = crimson::simple_scheduler;
+    namespace sim = crimson::qos_simulation;
+
+    using Time = double;
+
+    struct SimpleAccum {
+      uint32_t request_count = 0;
+    };
+
+    using SimpleQueue = ssched::SimpleQueue<ClientId,sim::TestRequest,Time>;
+
+    using SimpleServer = sim::SimulatedServer<SimpleQueue,
+					      ssched::ReqParams,
+					      ssched::NullData,
+					      SimpleAccum>;
+    using SimpleClient = sim::SimulatedClient<ssched::ServiceTracker<ServerId>,
+					      ssched::ReqParams,
+					      ssched::NullData,
+					      SimpleAccum>;
+
+    using CreateQueueF =
+      std::function<SimpleQueue*(SimpleQueue::CanHandleRequestFunc,
+				 SimpleQueue::HandleRequestFunc)>;
+
+
+    using MySim = sim::Simulation<ServerId,ClientId,SimpleServer,SimpleClient>;
+  
+    using SubmitFunc = SimpleClient::SubmitFunc;
+
+    extern void simple_server_accumulate_f(SimpleAccum& a,
+					   const ssched::NullData& add_info);
+
+    extern void simple_client_accumulate_f(SimpleAccum& a,
+					   const ssched::NullData& ignore);
+  } // namespace test_simple
+} // namespace crimson
diff --git a/src/dmclock/sim/src/test_ssched_main.cc b/src/dmclock/sim/src/test_ssched_main.cc
new file mode 100644
index 000000000..ace4f8cce
--- /dev/null
+++ b/src/dmclock/sim/src/test_ssched_main.cc
@@ -0,0 +1,199 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include "test_ssched.h"
+
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+
+namespace test = crimson::test_simple_scheduler;
+namespace ssched = crimson::simple_scheduler;
+namespace sim = crimson::qos_simulation;
+
+using namespace std::placeholders;
+
+
+namespace crimson {
+  namespace test_simple_scheduler {
+    void client_data(std::ostream& out,
+		     test::MySim* sim,
+		     test::MySim::ClientFilter client_disp_filter,
+		     int head_w, int data_w, int data_prec);
+
+    void server_data(std::ostream& out,
+		     test::MySim* sim,
+		     test::MySim::ServerFilter server_disp_filter,
+		     int head_w, int data_w, int data_prec);
+  } // namespace test_simple
+} // namespace crimson
+
+
+using Cost = uint32_t;
+    
+
+int main(int argc, char* argv[]) {
+  // server params
+
+  const unsigned server_count = 100;
+  const unsigned server_iops = 40;
+  const unsigned server_threads = 1;
+
+  // client params
+
+  const unsigned client_total_ops = 1000;
+  const unsigned client_count = 100;
+  const unsigned client_server_select_range = 10;
+  const unsigned client_wait_count = 1;
+  const unsigned client_iops_goal = 50;
+  const unsigned client_outstanding_ops = 100;
+  const std::chrono::seconds client_wait(10);
+
+  auto client_disp_filter = [=] (const ClientId& i) -> bool {
+    return i < 3 || i >= (client_count - 3);
+  };
+
+  auto server_disp_filter = [=] (const ServerId& i) -> bool {
+    return i < 3 || i >= (server_count - 3);
+  };
+
+
+  test::MySim *simulation;
+
+  // lambda to post a request to the identified server; called by client
+  test::SubmitFunc server_post_f =
+    [&simulation](const ServerId& server_id,
+		  sim::TestRequest&& request,
+		  const ClientId& client_id,
+		  const ssched::ReqParams& req_params) {
+    auto& server = simulation->get_server(server_id);
+    server.post(std::move(request), client_id, req_params, 1u);
+  };
+
+  static std::vector<sim::CliInst> no_wait =
+    { { sim::req_op, client_total_ops, client_iops_goal, client_outstanding_ops } };
+  static std::vector<sim::CliInst> wait =
+    { { sim::wait_op, client_wait },
+      { sim::req_op, client_total_ops, client_iops_goal, client_outstanding_ops } };
+
+  simulation = new test::MySim();
+
+#if 1
+  test::MySim::ClientBasedServerSelectFunc server_select_f =
+    simulation->make_server_select_alt_range(client_server_select_range);
+#elif 0
+  test::MySim::ClientBasedServerSelectFunc server_select_f =
+    std::bind(&test::MySim::server_select_random, simulation, _1, _2);
+#else
+  test::MySim::ClientBasedServerSelectFunc server_select_f =
+    std::bind(&test::MySim::server_select_0, simulation, _1, _2);
+#endif
+
+  test::SimpleServer::ClientRespFunc client_response_f =
+    [&simulation](ClientId client_id,
+		  const sim::TestResponse& resp,
+		  const ServerId& server_id,
+		  const ssched::NullData& resp_params,
+		  Cost request_cost) {
+    simulation->get_client(client_id).receive_response(resp,
+						       server_id,
+						       resp_params,
+						       request_cost);
+  };
+
+  test::CreateQueueF create_queue_f =
+    [&](test::SimpleQueue::CanHandleRequestFunc can_f,
+	test::SimpleQueue::HandleRequestFunc handle_f) -> test::SimpleQueue* {
+    return new test::SimpleQueue(can_f, handle_f);
+  };
+
+  auto create_server_f = [&](ServerId id) -> test::SimpleServer* {
+    return new test::SimpleServer(id,
+				  server_iops, server_threads,
+				  client_response_f,
+				  test::simple_server_accumulate_f,
+				  create_queue_f);
+  };
+
+  auto create_client_f = [&](ClientId id) -> test::SimpleClient* {
+    return new test::SimpleClient(id,
+				  server_post_f,
+				  std::bind(server_select_f, _1, id),
+				  test::simple_client_accumulate_f,
+				  id < (client_count - client_wait_count)
+				  ? no_wait : wait);
+  };
+
+  simulation->add_servers(server_count, create_server_f);
+  simulation->add_clients(client_count, create_client_f);
+
+  simulation->run();
+  simulation->display_stats(std::cout,
+			    &test::server_data, &test::client_data,
+			    server_disp_filter, client_disp_filter);
+} // main
+
+
+void test::client_data(std::ostream& out,
+		       test::MySim* sim,
+		       test::MySim::ClientFilter client_disp_filter,
+		       int head_w, int data_w, int data_prec) {
+  // empty
+}
+
+
+void test::server_data(std::ostream& out,
+		       test::MySim* sim,
+		       test::MySim::ServerFilter server_disp_filter,
+		       int head_w, int data_w, int data_prec) {
+  out << std::setw(head_w) << "requests:";
+  int total_req = 0;
+  for (unsigned i = 0; i < sim->get_server_count(); ++i) {
+    const auto& server = sim->get_server(i);
+    auto req_count = server.get_accumulator().request_count;
+    total_req += req_count;
+    if (!server_disp_filter(i)) continue;
+    out << std::setw(data_w) << req_count;
+  }
+  out << std::setw(data_w) << std::setprecision(data_prec) <<
+    std::fixed << total_req << std::endl;
+
+#ifdef PROFILE
+    crimson::ProfileCombiner<std::chrono::nanoseconds> art_combiner;
+    crimson::ProfileCombiner<std::chrono::nanoseconds> rct_combiner;
+    for (unsigned i = 0; i < sim->get_server_count(); ++i) {
+      const auto& q = sim->get_server(i).get_priority_queue();
+      const auto& art = q.add_request_timer;
+      art_combiner.combine(art);
+      const auto& rct = q.request_complete_timer;
+      rct_combiner.combine(rct);
+    }
+    out << "Server add_request_timer: count:" << art_combiner.get_count() <<
+      ", mean:" << art_combiner.get_mean() <<
+      ", std_dev:" << art_combiner.get_std_dev() <<
+      ", low:" << art_combiner.get_low() <<
+      ", high:" << art_combiner.get_high() << std::endl;
+    out << "Server request_complete_timer: count:" << rct_combiner.get_count() <<
+      ", mean:" << rct_combiner.get_mean() <<
+      ", std_dev:" << rct_combiner.get_std_dev() <<
+      ", low:" << rct_combiner.get_low() <<
+      ", high:" << rct_combiner.get_high() << std::endl;
+    out << "Server combined mean: " <<
+      (art_combiner.get_mean() + rct_combiner.get_mean()) <<
+      std::endl;
+#endif
+}
diff --git a/src/dmclock/src/CMakeLists.txt b/src/dmclock/src/CMakeLists.txt
new file mode 100644
index 000000000..d13229e40
--- /dev/null
+++ b/src/dmclock/src/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(dmc_srcs dmclock_util.cc ../support/src/run_every.cc)
+
+add_library(dmclock STATIC ${dmc_srcs})
+add_library(dmclock::dmclock ALIAS dmclock)
+
+target_compile_options(dmclock PRIVATE
+  "-Wno-write-strings" "-Wall")
+target_include_directories(dmclock PUBLIC
+  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>
+  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/support/src>)
+
+target_link_libraries(dmclock
+  PUBLIC Boost::boost
+  PRIVATE Threads::Threads)
diff --git a/src/dmclock/src/dmclock_client.h b/src/dmclock/src/dmclock_client.h
new file mode 100644
index 000000000..1143147ce
--- /dev/null
+++ b/src/dmclock/src/dmclock_client.h
@@ -0,0 +1,289 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+#include <map>
+#include <deque>
+#include <chrono>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+#include "../support/src/run_every.h"
+#include "dmclock_util.h"
+#include "dmclock_recs.h"
+
+
+namespace crimson {
+  namespace dmclock {
+
+    // OrigTracker is a best-effort implementation of the the original
+    // dmClock calculations of delta and rho. It adheres to an
+    // interface, implemented via a template type, that allows it to
+    // be replaced with an alternative. The interface consists of the
+    // static create, prepare_req, resp_update, and get_last_delta
+    // functions.
+    class OrigTracker {
+      Counter   delta_prev_req;
+      Counter   rho_prev_req;
+      uint32_t  my_delta;
+      uint32_t  my_rho;
+
+    public:
+
+      OrigTracker(Counter global_delta,
+		  Counter global_rho) :
+	delta_prev_req(global_delta),
+	rho_prev_req(global_rho),
+	my_delta(0),
+	my_rho(0)
+      { /* empty */ }
+
+      static inline OrigTracker create(Counter the_delta, Counter the_rho) {
+	return OrigTracker(the_delta, the_rho);
+      }
+
+      inline ReqParams prepare_req(Counter& the_delta, Counter& the_rho) {
+	Counter delta_out = the_delta - delta_prev_req - my_delta;
+	Counter rho_out = the_rho - rho_prev_req - my_rho;
+	delta_prev_req = the_delta;
+	rho_prev_req = the_rho;
+	my_delta = 0;
+	my_rho = 0;
+	return ReqParams(uint32_t(delta_out), uint32_t(rho_out));
+      }
+
+      inline void resp_update(PhaseType phase,
+			      Counter& the_delta,
+			      Counter& the_rho,
+			      Cost cost) {
+	the_delta += cost;
+	my_delta += cost;
+	if (phase == PhaseType::reservation) {
+	  the_rho += cost;
+	  my_rho += cost;
+	}
+      }
+
+      inline Counter get_last_delta() const {
+	return delta_prev_req;
+      }
+    }; // struct OrigTracker
+
+
+    // BorrowingTracker always returns a positive delta and rho. If
+    // not enough responses have come in to allow that, we will borrow
+    // a future response and repay it later.
+    class BorrowingTracker {
+      Counter delta_prev_req;
+      Counter rho_prev_req;
+      Counter delta_borrow;
+      Counter rho_borrow;
+
+    public:
+
+      BorrowingTracker(Counter global_delta, Counter global_rho) :
+	delta_prev_req(global_delta),
+	rho_prev_req(global_rho),
+	delta_borrow(0),
+	rho_borrow(0)
+      { /* empty */ }
+
+      static inline BorrowingTracker create(Counter the_delta,
+					    Counter the_rho) {
+	return BorrowingTracker(the_delta, the_rho);
+      }
+
+      inline Counter calc_with_borrow(const Counter& global,
+				      const Counter& previous,
+				      Counter& borrow) {
+	Counter result = global - previous;
+	if (0 == result) {
+	  // if no replies have come in, borrow one from the future
+	  ++borrow;
+	  return 1;
+	} else if (result > borrow) {
+	  // if we can give back all of what we borrowed, do so
+	  result -= borrow;
+	  borrow = 0;
+	  return result;
+	} else {
+	  // can only return part of what was borrowed in order to
+	  // return positive
+	  borrow = borrow - result + 1;
+	  return 1;
+	}
+      }
+
+      inline ReqParams prepare_req(Counter& the_delta, Counter& the_rho) {
+	Counter delta_out =
+	  calc_with_borrow(the_delta, delta_prev_req, delta_borrow);
+	Counter rho_out =
+	  calc_with_borrow(the_rho, rho_prev_req, rho_borrow);
+	delta_prev_req = the_delta;
+	rho_prev_req = the_rho;
+	return ReqParams(uint32_t(delta_out), uint32_t(rho_out));
+      }
+
+      inline void resp_update(PhaseType phase,
+			      Counter& the_delta,
+			      Counter& the_rho,
+			      Counter cost) {
+	the_delta += cost;
+	if (phase == PhaseType::reservation) {
+	  the_rho += cost;
+	}
+      }
+
+      inline Counter get_last_delta() const {
+	return delta_prev_req;
+      }
+    }; // struct BorrowingTracker
+
+
+    /*
+     * S is server identifier type
+     *
+     * T is the server info class that adheres to ServerTrackerIfc
+     * interface
+     */
+    template<typename S, typename T = OrigTracker>
+    class ServiceTracker {
+      // we don't want to include gtest.h just for FRIEND_TEST
+      friend class dmclock_client_server_erase_Test;
+
+      using TimePoint = decltype(std::chrono::steady_clock::now());
+      using Duration = std::chrono::milliseconds;
+      using MarkPoint = std::pair<TimePoint,Counter>;
+
+      Counter                 delta_counter; // # reqs completed
+      Counter                 rho_counter;   // # reqs completed via reservation
+      std::map<S,T>           server_map;
+      mutable std::mutex      data_mtx;      // protects Counters and map
+
+      using DataGuard = std::lock_guard<decltype(data_mtx)>;
+
+      // clean config
+
+      std::deque<MarkPoint>     clean_mark_points;
+      Duration                  clean_age;     // age at which server tracker cleaned
+
+      // NB: All threads declared at end, so they're destructed firs!
+
+      std::unique_ptr<RunEvery> cleaning_job;
+
+
+    public:
+
+      // we have to start the counters at 1, as 0 is used in the
+      // cleaning process
+      template<typename Rep, typename Per>
+      ServiceTracker(std::chrono::duration<Rep,Per> _clean_every,
+		     std::chrono::duration<Rep,Per> _clean_age) :
+	delta_counter(1),
+	rho_counter(1),
+	clean_age(std::chrono::duration_cast<Duration>(_clean_age))
+      {
+	cleaning_job =
+	  std::unique_ptr<RunEvery>(
+	    new RunEvery(_clean_every,
+			 std::bind(&ServiceTracker::do_clean, this)));
+      }
+
+
+      // the reason we're overloading the constructor rather than
+      // using default values for the arguments is so that callers
+      // have to either use all defaults or specify all timings; with
+      // default arguments they could specify some without others
+      ServiceTracker() :
+	ServiceTracker(std::chrono::minutes(5), std::chrono::minutes(10))
+      {
+	// empty
+      }
+
+
+      /*
+       * Incorporates the response data received into the counters.
+       */
+      void track_resp(const S& server_id,
+		      const PhaseType& phase,
+		      Counter request_cost = 1u) {
+	DataGuard g(data_mtx);
+
+	auto it = server_map.find(server_id);
+	if (server_map.end() == it) {
+	  // this code can only run if a request did not precede the
+	  // response or if the record was cleaned up b/w when
+	  // the request was made and now
+	  auto i = server_map.emplace(server_id,
+				      T::create(delta_counter, rho_counter));
+	  it = i.first;
+	}
+	it->second.resp_update(phase, delta_counter, rho_counter, request_cost);
+      }
+
+      /*
+       * Returns the ReqParams for the given server.
+       */
+      ReqParams get_req_params(const S& server) {
+	DataGuard g(data_mtx);
+	auto it = server_map.find(server);
+	if (server_map.end() == it) {
+	  server_map.emplace(server,
+			     T::create(delta_counter, rho_counter));
+	  return ReqParams(1, 1);
+	} else {
+	  return it->second.prepare_req(delta_counter, rho_counter);
+	}
+      }
+
+    private:
+
+      /*
+       * This is being called regularly by RunEvery. Every time it's
+       * called it notes the time and delta counter (mark point) in a
+       * deque. It also looks at the deque to find the most recent
+       * mark point that is older than clean_age. It then walks the
+       * map and delete all server entries that were last used before
+       * that mark point.
+       */
+      void do_clean() {
+	TimePoint now = std::chrono::steady_clock::now();
+	DataGuard g(data_mtx);
+	clean_mark_points.emplace_back(MarkPoint(now, delta_counter));
+
+	Counter earliest = 0;
+	auto point = clean_mark_points.front();
+	while (point.first <= now - clean_age) {
+	  earliest = point.second;
+	  clean_mark_points.pop_front();
+	  point = clean_mark_points.front();
+	}
+
+	if (earliest > 0) {
+	  for (auto i = server_map.begin();
+	       i != server_map.end();
+	       /* empty */) {
+	    auto i2 = i++;
+	    if (i2->second.get_last_delta() <= earliest) {
+	      server_map.erase(i2);
+	    }
+	  }
+	}
+      } // do_clean
+    }; // class ServiceTracker
+  }
+}
diff --git a/src/dmclock/src/dmclock_recs.h b/src/dmclock/src/dmclock_recs.h
new file mode 100644
index 000000000..a7dc44108
--- /dev/null
+++ b/src/dmclock/src/dmclock_recs.h
@@ -0,0 +1,74 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <ostream>
+#include <assert.h>
+
+
+namespace crimson {
+  namespace dmclock {
+    using Counter = uint64_t;
+
+    // we're abstracting cost to its own type to better allow for
+    // future changes; we're assuming that Cost is relatively small
+    // and that it would be more efficient to pass-by-value than
+    // by-reference.
+    using Cost = uint32_t;
+
+    enum class PhaseType : uint8_t { reservation, priority };
+
+    inline std::ostream& operator<<(std::ostream& out, const PhaseType& phase) {
+      out << (PhaseType::reservation == phase ? "reservation" : "priority");
+      return out;
+    }
+
+    struct ReqParams {
+      // count of all replies since last request
+      uint32_t delta;
+
+      // count of reservation replies since last request
+      uint32_t rho;
+
+      ReqParams(uint32_t _delta, uint32_t _rho) :
+	delta(_delta),
+	rho(_rho)
+      {
+	assert(rho <= delta);
+      }
+
+      ReqParams() :
+	ReqParams(0, 0)
+      {
+	// empty
+      }
+
+      ReqParams(const ReqParams& other) :
+	delta(other.delta),
+	rho(other.rho)
+      {
+	// empty
+      }
+
+      friend std::ostream& operator<<(std::ostream& out, const ReqParams& rp) {
+	out << "ReqParams{ delta:" << rp.delta <<
+	  ", rho:" << rp.rho << " }";
+	return out;
+      }
+    }; // class ReqParams
+  }
+}
diff --git a/src/dmclock/src/dmclock_server.h b/src/dmclock/src/dmclock_server.h
new file mode 100644
index 000000000..79e7abcef
--- /dev/null
+++ b/src/dmclock/src/dmclock_server.h
@@ -0,0 +1,1799 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+/* COMPILATION OPTIONS
+ *
+ * The prop_heap does not seem to be necessary. The only thing it
+ * would help with is quickly finding the minimum proportion/prioity
+ * when an idle client became active. To have the code maintain the
+ * proportional heap, define USE_PROP_HEAP (i.e., compiler argument
+ * -DUSE_PROP_HEAP).
+ */
+
+#include <assert.h>
+
+#include <cmath>
+#include <memory>
+#include <map>
+#include <deque>
+#include <queue>
+#include <atomic>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <iostream>
+#include <sstream>
+#include <limits>
+
+#include <boost/variant.hpp>
+
+#include "indirect_intrusive_heap.h"
+#include "../support/src/run_every.h"
+#include "dmclock_util.h"
+#include "dmclock_recs.h"
+
+#ifdef PROFILE
+#include "profile.h"
+#endif
+
+
+namespace crimson {
+
+  namespace dmclock {
+
+    namespace c = crimson;
+
+    constexpr double max_tag = std::numeric_limits<double>::is_iec559 ?
+      std::numeric_limits<double>::infinity() :
+      std::numeric_limits<double>::max();
+    constexpr double min_tag = std::numeric_limits<double>::is_iec559 ?
+      -std::numeric_limits<double>::infinity() :
+      std::numeric_limits<double>::lowest();
+    constexpr unsigned tag_modulo = 1000000;
+
+    constexpr auto standard_idle_age  = std::chrono::seconds(300);
+    constexpr auto standard_erase_age = std::chrono::seconds(600);
+    constexpr auto standard_check_time = std::chrono::seconds(60);
+    constexpr auto aggressive_check_time = std::chrono::seconds(5);
+    constexpr unsigned standard_erase_max = 2000;
+
+    enum class AtLimit {
+      // requests are delayed until the limit is restored
+      Wait,
+      // requests are allowed to exceed their limit, if all other reservations
+      // are met and below their limits
+      Allow,
+      // if an incoming request would exceed its limit, add_request() will
+      // reject it with EAGAIN instead of adding it to the queue. cannot be used
+      // with DelayedTagCalc, because add_request() needs an accurate tag
+      Reject,
+    };
+
+    // when AtLimit::Reject is used, only start rejecting requests once their
+    // limit is above this threshold. requests under this threshold are
+    // enqueued and processed like AtLimit::Wait
+    using RejectThreshold = Time;
+
+    // the AtLimit constructor parameter can either accept AtLimit or a value
+    // for RejectThreshold (which implies AtLimit::Reject)
+    using AtLimitParam = boost::variant<AtLimit, RejectThreshold>;
+
+    struct ClientInfo {
+      double reservation;  // minimum
+      double weight;       // proportional
+      double limit;        // maximum
+
+      // multiplicative inverses of above, which we use in calculations
+      // and don't want to recalculate repeatedly
+      double reservation_inv;
+      double weight_inv;
+      double limit_inv;
+
+      // order parameters -- min, "normal", max
+      ClientInfo(double _reservation, double _weight, double _limit) {
+	update(_reservation, _weight, _limit);
+      }
+ 
+      inline void update(double _reservation, double _weight, double _limit) {
+       reservation = _reservation;
+       weight = _weight;
+       limit = _limit;
+       reservation_inv = (0.0 == reservation) ? 0.0 : 1.0 / reservation;
+       weight_inv = (0.0 == weight) ? 0.0 : 1.0 / weight;
+       limit_inv = (0.0 == limit) ? 0.0 : 1.0 / limit;
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+				      const ClientInfo& client) {
+	out <<
+	  "{ ClientInfo:: r:" << client.reservation <<
+	  " w:" << std::fixed << client.weight <<
+	  " l:" << std::fixed << client.limit <<
+	  " 1/r:" << std::fixed << client.reservation_inv <<
+	  " 1/w:" << std::fixed << client.weight_inv <<
+	  " 1/l:" << std::fixed << client.limit_inv <<
+	  " }";
+	return out;
+      }
+    }; // class ClientInfo
+
+
+    struct RequestTag {
+      double   reservation;
+      double   proportion;
+      double   limit;
+      uint32_t delta;
+      uint32_t rho;
+      Cost     cost;
+      bool     ready; // true when within limit
+      Time     arrival;
+
+      RequestTag(const RequestTag& prev_tag,
+		 const ClientInfo& client,
+		 const uint32_t _delta,
+		 const uint32_t _rho,
+		 const Time time,
+		 const Cost _cost = 1u,
+		 const double anticipation_timeout = 0.0) :
+	delta(_delta),
+	rho(_rho),
+	cost(_cost),
+	ready(false),
+	arrival(time)
+      {
+	assert(cost > 0);
+	Time max_time = time;
+	if (time - anticipation_timeout < prev_tag.arrival)
+	  max_time -= anticipation_timeout;
+	
+	reservation = tag_calc(max_time,
+			       prev_tag.reservation,
+			       client.reservation_inv,
+			       rho,
+			       true,
+			       cost);
+	proportion = tag_calc(max_time,
+			      prev_tag.proportion,
+			      client.weight_inv,
+			      delta,
+			      true,
+			      cost);
+	limit = tag_calc(max_time,
+			 prev_tag.limit,
+			 client.limit_inv,
+			 delta,
+			 false,
+			 cost);
+
+	assert(reservation < max_tag || proportion < max_tag);
+      }
+
+      RequestTag(const RequestTag& prev_tag,
+		 const ClientInfo& client,
+		 const ReqParams req_params,
+		 const Time time,
+		 const Cost cost = 1u,
+		 const double anticipation_timeout = 0.0) :
+	RequestTag(prev_tag, client, req_params.delta, req_params.rho, time,
+		   cost, anticipation_timeout)
+      { /* empty */ }
+
+      RequestTag(const double _res, const double _prop, const double _lim,
+		 const Time _arrival,
+		 const uint32_t _delta = 0,
+		 const uint32_t _rho = 0,
+		 const Cost _cost = 1u) :
+	reservation(_res),
+	proportion(_prop),
+	limit(_lim),
+	delta(_delta),
+	rho(_rho),
+	cost(_cost),
+	ready(false),
+	arrival(_arrival)
+      {
+	assert(cost > 0);
+	assert(reservation < max_tag || proportion < max_tag);
+      }
+
+      RequestTag(const RequestTag& other) :
+	reservation(other.reservation),
+	proportion(other.proportion),
+	limit(other.limit),
+	delta(other.delta),
+	rho(other.rho),
+	cost(other.cost),
+	ready(other.ready),
+	arrival(other.arrival)
+      { /* empty */ }
+
+      static std::string format_tag_change(double before, double after) {
+	if (before == after) {
+	  return std::string("same");
+	} else {
+	  std::stringstream ss;
+	  ss << format_tag(before) << "=>" << format_tag(after);
+	  return ss.str();
+	}
+      }
+
+      static std::string format_tag(double value) {
+	if (max_tag == value) {
+	  return std::string("max");
+	} else if (min_tag == value) {
+	  return std::string("min");
+	} else {
+	  return format_time(value, tag_modulo);
+	}
+      }
+
+    private:
+
+      static double tag_calc(const Time time,
+			     const double prev,
+			     const double increment,
+			     const uint32_t dist_req_val,
+			     const bool extreme_is_high,
+			     const Cost cost) {
+	if (0.0 == increment) {
+	  return extreme_is_high ? max_tag : min_tag;
+	} else {
+	  // insure 64-bit arithmetic before conversion to double
+	  double tag_increment = increment * (uint64_t(dist_req_val) + cost);
+	  return std::max(time, prev + tag_increment);
+	}
+      }
+
+      friend std::ostream& operator<<(std::ostream& out,
+				      const RequestTag& tag) {
+	out <<
+	  "{ RequestTag:: ready:" << (tag.ready ? "true" : "false") <<
+	  " r:" << format_tag(tag.reservation) <<
+	  " p:" << format_tag(tag.proportion) <<
+	  " l:" << format_tag(tag.limit) <<
+#if 0 // try to resolve this to make sure Time is operator<<'able.
+	  " arrival:" << tag.arrival <<
+#endif
+	  " }";
+	return out;
+      }
+    }; // class RequestTag
+
+    // C is client identifier type, R is request type,
+    // IsDelayed controls whether tag calculation is delayed until the request
+    //   reaches the front of its queue. This is an optimization over the
+    //   originally published dmclock algorithm, allowing it to use the most
+    //   recent values of rho and delta.
+    // U1 determines whether to use client information function dynamically,
+    // B is heap branching factor
+    template<typename C, typename R, bool IsDelayed, bool U1, unsigned B>
+    class PriorityQueueBase {
+      // we don't want to include gtest.h just for FRIEND_TEST
+      friend class dmclock_server_client_idle_erase_Test;
+
+      // types used for tag dispatch to select between implementations
+      using TagCalc = std::integral_constant<bool, IsDelayed>;
+      using DelayedTagCalc = std::true_type;
+      using ImmediateTagCalc = std::false_type;
+
+    public:
+
+      using RequestRef = std::unique_ptr<R>;
+
+    protected:
+
+      using Clock = std::chrono::steady_clock;
+      using TimePoint = Clock::time_point;
+      using Duration = std::chrono::milliseconds;
+      using MarkPoint = std::pair<TimePoint,Counter>;
+
+      enum class ReadyOption {ignore, lowers, raises};
+
+      // forward decl for friend decls
+      template<double RequestTag::*, ReadyOption, bool>
+      struct ClientCompare;
+
+      class ClientReq {
+	friend PriorityQueueBase;
+
+	RequestTag tag;
+	C          client_id;
+	RequestRef request;
+
+      public:
+
+	ClientReq(const RequestTag& _tag,
+		  const C&          _client_id,
+		  RequestRef&&      _request) :
+	  tag(_tag),
+	  client_id(_client_id),
+	  request(std::move(_request))
+	{
+	  // empty
+	}
+
+	friend std::ostream& operator<<(std::ostream& out, const ClientReq& c) {
+	  out << "{ ClientReq:: tag:" << c.tag << " client:" <<
+	    c.client_id << " }";
+	  return out;
+	}
+      }; // class ClientReq
+
+      struct RequestMeta {
+        C          client_id;
+        RequestTag tag;
+
+        RequestMeta(const C&  _client_id, const RequestTag& _tag) :
+          client_id(_client_id),
+          tag(_tag)
+        {
+          // empty
+        }
+      };
+
+    public:
+
+      // NOTE: ClientRec is in the "public" section for compatibility
+      // with g++ 4.8.4, which complains if it's not. By g++ 6.3.1
+      // ClientRec could be "protected" with no issue. [See comments
+      // associated with function submit_top_request.]
+      class ClientRec {
+	friend PriorityQueueBase<C,R,IsDelayed,U1,B>;
+
+	C                     client;
+	RequestTag            prev_tag;
+	std::deque<ClientReq> requests;
+
+	// amount added from the proportion tag as a result of
+	// an idle client becoming unidle
+	double                prop_delta = 0.0;
+
+	c::IndIntruHeapData   reserv_heap_data {};
+	c::IndIntruHeapData   lim_heap_data {};
+	c::IndIntruHeapData   ready_heap_data {};
+#if USE_PROP_HEAP
+	c::IndIntruHeapData   prop_heap_data {};
+#endif
+
+      public:
+
+	const ClientInfo*     info;
+	bool                  idle;
+	Counter               last_tick;
+	uint32_t              cur_rho;
+	uint32_t              cur_delta;
+
+	ClientRec(C _client,
+		  const ClientInfo* _info,
+		  Counter current_tick) :
+	  client(_client),
+	  prev_tag(0.0, 0.0, 0.0, TimeZero),
+	  info(_info),
+	  idle(true),
+	  last_tick(current_tick),
+	  cur_rho(1),
+	  cur_delta(1)
+	{
+	  // empty
+	}
+
+	inline const RequestTag& get_req_tag() const {
+	  return prev_tag;
+	}
+
+	static inline void assign_unpinned_tag(double& lhs, const double rhs) {
+	  if (rhs != max_tag && rhs != min_tag) {
+	    lhs = rhs;
+	  }
+	}
+
+	inline void update_req_tag(const RequestTag& _prev,
+				   const Counter& _tick) {
+	  assign_unpinned_tag(prev_tag.reservation, _prev.reservation);
+	  assign_unpinned_tag(prev_tag.limit, _prev.limit);
+	  assign_unpinned_tag(prev_tag.proportion, _prev.proportion);
+	  prev_tag.arrival = _prev.arrival;
+	  last_tick = _tick;
+	}
+
+	inline void add_request(const RequestTag& tag, RequestRef&& request) {
+	  requests.emplace_back(tag, client, std::move(request));
+	}
+
+	inline const ClientReq& next_request() const {
+	  return requests.front();
+	}
+
+	inline ClientReq& next_request() {
+	  return requests.front();
+	}
+
+	inline void pop_request() {
+	  requests.pop_front();
+	}
+
+	inline bool has_request() const {
+	  return !requests.empty();
+	}
+
+	inline size_t request_count() const {
+	  return requests.size();
+	}
+
+	// NB: because a deque is the underlying structure, this
+	// operation might be expensive
+	bool remove_by_req_filter_fw(std::function<bool(RequestRef&&)> filter_accum) {
+	  bool any_removed = false;
+	  for (auto i = requests.begin();
+	       i != requests.end();
+	       /* no inc */) {
+	    if (filter_accum(std::move(i->request))) {
+	      any_removed = true;
+	      i = requests.erase(i);
+	    } else {
+	      ++i;
+	    }
+	  }
+	  return any_removed;
+	}
+
+	// NB: because a deque is the underlying structure, this
+	// operation might be expensive
+	bool remove_by_req_filter_bw(std::function<bool(RequestRef&&)> filter_accum) {
+	  bool any_removed = false;
+	  for (auto i = requests.rbegin();
+	       i != requests.rend();
+	       /* no inc */) {
+	    if (filter_accum(std::move(i->request))) {
+	      any_removed = true;
+	      i = decltype(i){ requests.erase(std::next(i).base()) };
+	    } else {
+	      ++i;
+	    }
+	  }
+	  return any_removed;
+	}
+
+	inline bool
+	remove_by_req_filter(std::function<bool(RequestRef&&)> filter_accum,
+			     bool visit_backwards) {
+	  if (visit_backwards) {
+	    return remove_by_req_filter_bw(filter_accum);
+	  } else {
+	    return remove_by_req_filter_fw(filter_accum);
+	  }
+	}
+
+	friend std::ostream&
+	operator<<(std::ostream& out,
+		   const typename PriorityQueueBase::ClientRec& e) {
+	  out << "{ ClientRec::" <<
+	    " client:" << e.client <<
+	    " prev_tag:" << e.prev_tag <<
+	    " req_count:" << e.requests.size() <<
+	    " top_req:";
+	  if (e.has_request()) {
+	    out << e.next_request();
+	  } else {
+	    out << "none";
+	  }
+	  out << " }";
+
+	  return out;
+	}
+      }; // class ClientRec
+
+      using ClientRecRef = std::shared_ptr<ClientRec>;
+
+      // when we try to get the next request, we'll be in one of three
+      // situations -- we'll have one to return, have one that can
+      // fire in the future, or not have any
+      enum class NextReqType { returning, future, none };
+
+      // specifies which queue next request will get popped from
+      enum class HeapId { reservation, ready };
+
+      // this is returned from next_req to tell the caller the situation
+      struct NextReq {
+	NextReqType type;
+	union {
+	  HeapId    heap_id;
+	  Time      when_ready;
+	};
+
+	inline explicit NextReq() :
+	  type(NextReqType::none)
+	{ }
+
+	inline NextReq(HeapId _heap_id) :
+	  type(NextReqType::returning),
+	  heap_id(_heap_id)
+	{ }
+
+	inline NextReq(Time _when_ready) :
+	  type(NextReqType::future),
+	  when_ready(_when_ready)
+	{ }
+
+	// calls to this are clearer than calls to the default
+	// constructor
+	static inline NextReq none() {
+	  return NextReq();
+	}
+      };
+
+
+      // a function that can be called to look up client information
+      using ClientInfoFunc = std::function<const ClientInfo*(const C&)>;
+
+
+      bool empty() const {
+	DataGuard g(data_mtx);
+	return (resv_heap.empty() || ! resv_heap.top().has_request());
+      }
+
+
+      size_t client_count() const {
+	DataGuard g(data_mtx);
+	return resv_heap.size();
+      }
+
+
+      size_t request_count() const {
+	DataGuard g(data_mtx);
+	size_t total = 0;
+	for (auto i = resv_heap.cbegin(); i != resv_heap.cend(); ++i) {
+	  total += i->request_count();
+	}
+	return total;
+      }
+
+
+      bool remove_by_req_filter(std::function<bool(RequestRef&&)> filter_accum,
+				bool visit_backwards = false) {
+	bool any_removed = false;
+	DataGuard g(data_mtx);
+	for (auto i : client_map) {
+	  bool modified =
+	    i.second->remove_by_req_filter(filter_accum, visit_backwards);
+	  if (modified) {
+	    resv_heap.adjust(*i.second);
+	    limit_heap.adjust(*i.second);
+	    ready_heap.adjust(*i.second);
+#if USE_PROP_HEAP
+	    prop_heap.adjust(*i.second);
+#endif
+	    any_removed = true;
+	  }
+	}
+	return any_removed;
+      }
+
+
+      // use as a default value when no accumulator is provide
+      static void request_sink(RequestRef&& req) {
+	// do nothing
+      }
+
+
+      void remove_by_client(const C& client,
+			    bool reverse = false,
+			    std::function<void (RequestRef&&)> accum = request_sink) {
+	DataGuard g(data_mtx);
+
+	auto i = client_map.find(client);
+
+	if (i == client_map.end()) return;
+
+	if (reverse) {
+	  for (auto j = i->second->requests.rbegin();
+	       j != i->second->requests.rend();
+	       ++j) {
+	    accum(std::move(j->request));
+	  }
+	} else {
+	  for (auto j = i->second->requests.begin();
+	       j != i->second->requests.end();
+	       ++j) {
+	    accum(std::move(j->request));
+	  }
+	}
+
+	i->second->requests.clear();
+
+	resv_heap.adjust(*i->second);
+	limit_heap.adjust(*i->second);
+	ready_heap.adjust(*i->second);
+#if USE_PROP_HEAP
+	prop_heap.adjust(*i->second);
+#endif
+      }
+
+
+      unsigned get_heap_branching_factor() const {
+	return B;
+      }
+
+
+      void update_client_info(const C& client_id) {
+	DataGuard g(data_mtx);
+	auto client_it = client_map.find(client_id);
+	if (client_map.end() != client_it) {
+	  ClientRec& client = (*client_it->second);
+	  client.info = client_info_f(client_id);
+	}
+      }
+
+
+      void update_client_infos() {
+	DataGuard g(data_mtx);
+	for (auto i : client_map) {
+	  i.second->info = client_info_f(i.second->client);
+	}
+      }
+
+
+      friend std::ostream& operator<<(std::ostream& out,
+				      const PriorityQueueBase& q) {
+	std::lock_guard<decltype(q.data_mtx)> guard(q.data_mtx);
+
+	out << "{ PriorityQueue::";
+	for (const auto& c : q.client_map) {
+	  out << "  { client:" << c.first << ", record:" << *c.second <<
+	    " }";
+	}
+	if (!q.resv_heap.empty()) {
+	  const auto& resv = q.resv_heap.top();
+	  out << " { reservation_top:" << resv << " }";
+	  const auto& ready = q.ready_heap.top();
+	  out << " { ready_top:" << ready << " }";
+	  const auto& limit = q.limit_heap.top();
+	  out << " { limit_top:" << limit << " }";
+	} else {
+	  out << " HEAPS-EMPTY";
+	}
+	out << " }";
+
+	return out;
+      }
+
+      // for debugging
+      void display_queues(std::ostream& out,
+			  bool show_res = true,
+			  bool show_lim = true,
+			  bool show_ready = true,
+			  bool show_prop = true) const {
+	auto filter = [](const ClientRec& e)->bool { return true; };
+	DataGuard g(data_mtx);
+	if (show_res) {
+	  resv_heap.display_sorted(out << "RESER:", filter);
+	}
+	if (show_lim) {
+	  limit_heap.display_sorted(out << "LIMIT:", filter);
+	}
+	if (show_ready) {
+	  ready_heap.display_sorted(out << "READY:", filter);
+	}
+#if USE_PROP_HEAP
+	if (show_prop) {
+	  prop_heap.display_sorted(out << "PROPO:", filter);
+	}
+#endif
+      } // display_queues
+
+
+    protected:
+
+      // The ClientCompare functor is essentially doing a precedes?
+      // operator, returning true if and only if the first parameter
+      // must precede the second parameter. If the second must precede
+      // the first, or if they are equivalent, false should be
+      // returned. The reason for this behavior is that it will be
+      // called to test if two items are out of order and if true is
+      // returned it will reverse the items. Therefore false is the
+      // default return when it doesn't matter to prevent unnecessary
+      // re-ordering.
+      //
+      // The template is supporting variations in sorting based on the
+      // heap in question and allowing these variations to be handled
+      // at compile-time.
+      //
+      // tag_field determines which tag is being used for comparison
+      //
+      // ready_opt determines how the ready flag influences the sort
+      //
+      // use_prop_delta determines whether the proportional delta is
+      // added in for comparison
+      template<double RequestTag::*tag_field,
+	       ReadyOption ready_opt,
+	       bool use_prop_delta>
+      struct ClientCompare {
+	bool operator()(const ClientRec& n1, const ClientRec& n2) const {
+	  if (n1.has_request()) {
+	    if (n2.has_request()) {
+	      const auto& t1 = n1.next_request().tag;
+	      const auto& t2 = n2.next_request().tag;
+	      if (ReadyOption::ignore == ready_opt || t1.ready == t2.ready) {
+		// if we don't care about ready or the ready values are the same
+		if (use_prop_delta) {
+		  return (t1.*tag_field + n1.prop_delta) <
+		    (t2.*tag_field + n2.prop_delta);
+		} else {
+		  return t1.*tag_field < t2.*tag_field;
+		}
+	      } else if (ReadyOption::raises == ready_opt) {
+		// use_ready == true && the ready fields are different
+		return t1.ready;
+	      } else {
+		return t2.ready;
+	      }
+	    } else {
+	      // n1 has request but n2 does not
+	      return true;
+	    }
+	  } else if (n2.has_request()) {
+	    // n2 has request but n1 does not
+	    return false;
+	  } else {
+	    // both have none; keep stable w false
+	    return false;
+	  }
+	}
+      };
+
+      ClientInfoFunc        client_info_f;
+      static constexpr bool is_dynamic_cli_info_f = U1;
+
+      mutable std::mutex data_mtx;
+      using DataGuard = std::lock_guard<decltype(data_mtx)>;
+
+      // stable mapping between client ids and client queues
+      std::map<C,ClientRecRef> client_map;
+
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::reserv_heap_data,
+		      ClientCompare<&RequestTag::reservation,
+				    ReadyOption::ignore,
+				    false>,
+		      B> resv_heap;
+#if USE_PROP_HEAP
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::prop_heap_data,
+		      ClientCompare<&RequestTag::proportion,
+				    ReadyOption::ignore,
+				    true>,
+		      B> prop_heap;
+#endif
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::lim_heap_data,
+		      ClientCompare<&RequestTag::limit,
+				    ReadyOption::lowers,
+				    false>,
+		      B> limit_heap;
+      c::IndIntruHeap<ClientRecRef,
+		      ClientRec,
+		      &ClientRec::ready_heap_data,
+		      ClientCompare<&RequestTag::proportion,
+				    ReadyOption::raises,
+				    true>,
+		      B> ready_heap;
+
+      AtLimit          at_limit;
+      RejectThreshold  reject_threshold = 0;
+
+      double           anticipation_timeout;
+
+      std::atomic_bool finishing;
+
+      // every request creates a tick
+      Counter tick = 0;
+
+      // performance data collection
+      size_t reserv_sched_count = 0;
+      size_t prop_sched_count = 0;
+      size_t limit_break_sched_count = 0;
+
+      Duration                  idle_age;
+      Duration                  erase_age;
+      Duration                  check_time;
+      std::deque<MarkPoint>     clean_mark_points;
+      // max number of clients to erase at a time
+      Counter erase_max;
+      // unfinished last erase point
+      Counter last_erase_point = 0;
+
+      // NB: All threads declared at end, so they're destructed first!
+
+      std::unique_ptr<RunEvery> cleaning_job;
+
+      // helper function to return the value of a variant if it matches the
+      // given type T, or a default value of T otherwise
+      template <typename T, typename Variant>
+      static T get_or_default(const Variant& param, T default_value) {
+	const T *p = boost::get<T>(&param);
+	return p ? *p : default_value;
+      }
+
+      // COMMON constructor that others feed into; we can accept three
+      // different variations of durations
+      template<typename Rep, typename Per>
+      PriorityQueueBase(ClientInfoFunc _client_info_f,
+			std::chrono::duration<Rep,Per> _idle_age,
+			std::chrono::duration<Rep,Per> _erase_age,
+			std::chrono::duration<Rep,Per> _check_time,
+			AtLimitParam at_limit_param,
+			double _anticipation_timeout) :
+	client_info_f(_client_info_f),
+	at_limit(get_or_default(at_limit_param, AtLimit::Reject)),
+	reject_threshold(get_or_default(at_limit_param, RejectThreshold{0})),
+	anticipation_timeout(_anticipation_timeout),
+	finishing(false),
+	idle_age(std::chrono::duration_cast<Duration>(_idle_age)),
+	erase_age(std::chrono::duration_cast<Duration>(_erase_age)),
+	check_time(std::chrono::duration_cast<Duration>(_check_time)),
+	erase_max(standard_erase_max)
+      {
+	assert(_erase_age >= _idle_age);
+	assert(_check_time < _idle_age);
+	// AtLimit::Reject depends on ImmediateTagCalc
+	assert(at_limit != AtLimit::Reject || !IsDelayed);
+	cleaning_job =
+	  std::unique_ptr<RunEvery>(
+	    new RunEvery(check_time,
+			 std::bind(&PriorityQueueBase::do_clean, this)));
+      }
+
+
+      ~PriorityQueueBase() {
+	finishing = true;
+      }
+
+
+      inline const ClientInfo* get_cli_info(ClientRec& client) const {
+	if (is_dynamic_cli_info_f) {
+	  client.info = client_info_f(client.client);
+	}
+	return client.info;
+      }
+
+      // data_mtx must be held by caller
+      RequestTag initial_tag(DelayedTagCalc delayed, ClientRec& client,
+			     const ReqParams& params, Time time, Cost cost) {
+	RequestTag tag(0, 0, 0, time, 0, 0, cost);
+
+	// only calculate a tag if the request is going straight to the front
+	if (!client.has_request()) {
+	  const ClientInfo* client_info = get_cli_info(client);
+	  assert(client_info);
+	  tag = RequestTag(client.get_req_tag(), *client_info,
+			   params, time, cost, anticipation_timeout);
+
+	  // copy tag to previous tag for client
+	  client.update_req_tag(tag, tick);
+	}
+	return tag;
+      }
+
+      // data_mtx must be held by caller
+      RequestTag initial_tag(ImmediateTagCalc imm, ClientRec& client,
+			     const ReqParams& params, Time time, Cost cost) {
+	// calculate the tag unconditionally
+	const ClientInfo* client_info = get_cli_info(client);
+	assert(client_info);
+	RequestTag tag(client.get_req_tag(), *client_info,
+		       params, time, cost, anticipation_timeout);
+
+	// copy tag to previous tag for client
+	client.update_req_tag(tag, tick);
+	return tag;
+      }
+
+      // data_mtx must be held by caller. returns 0 on success. when using
+      // AtLimit::Reject, requests that would exceed their limit are rejected
+      // with EAGAIN, and the queue will not take ownership of the given
+      // 'request' argument
+      int do_add_request(RequestRef&& request,
+			 const C& client_id,
+			 const ReqParams& req_params,
+			 const Time time,
+			 const Cost cost = 1u) {
+	++tick;
+
+        auto insert = client_map.emplace(client_id, ClientRecRef{});
+        if (insert.second) {
+          // new client entry
+	  const ClientInfo* info = client_info_f(client_id);
+	  auto client_rec = std::make_shared<ClientRec>(client_id, info, tick);
+	  resv_heap.push(client_rec);
+#if USE_PROP_HEAP
+	  prop_heap.push(client_rec);
+#endif
+	  limit_heap.push(client_rec);
+	  ready_heap.push(client_rec);
+	  insert.first->second = std::move(client_rec);
+	}
+
+	// for convenience, we'll create a reference to the shared pointer
+	ClientRec& client = *insert.first->second;
+
+	if (client.idle) {
+	  // We need to do an adjustment so that idle clients compete
+	  // fairly on proportional tags since those tags may have
+	  // drifted from real-time. Either use the lowest existing
+	  // proportion tag -- O(1) -- or the client with the lowest
+	  // previous proportion tag -- O(n) where n = # clients.
+	  //
+	  // So we don't have to maintain a proportional queue that
+	  // keeps the minimum on proportional tag alone (we're
+	  // instead using a ready queue), we'll have to check each
+	  // client.
+	  //
+	  // The alternative would be to maintain a proportional queue
+	  // (define USE_PROP_TAG) and do an O(1) operation here.
+
+	  // Was unable to confirm whether equality testing on
+	  // std::numeric_limits<double>::max() is guaranteed, so
+	  // we'll use a compile-time calculated trigger that is one
+	  // third the max, which should be much larger than any
+	  // expected organic value.
+	  constexpr double lowest_prop_tag_trigger =
+	    std::numeric_limits<double>::max() / 3.0;
+
+	  double lowest_prop_tag = std::numeric_limits<double>::max();
+	  for (auto const &c : client_map) {
+	    // don't use ourselves (or anything else that might be
+	    // listed as idle) since we're now in the map
+	    if (!c.second->idle) {
+	      double p;
+	      // use either lowest proportion tag or previous proportion tag
+	      if (c.second->has_request()) {
+		p = c.second->next_request().tag.proportion +
+		  c.second->prop_delta;
+	      } else {
+	        p = c.second->get_req_tag().proportion + c.second->prop_delta;
+	      }
+
+	      if (p < lowest_prop_tag) {
+		lowest_prop_tag = p;
+	      }
+	    }
+	  }
+
+	  // if this conditional does not fire, it
+	  if (lowest_prop_tag < lowest_prop_tag_trigger) {
+	    client.prop_delta = lowest_prop_tag - time;
+	  }
+	  client.idle = false;
+	} // if this client was idle
+
+	RequestTag tag = initial_tag(TagCalc{}, client, req_params, time, cost);
+
+	if (at_limit == AtLimit::Reject &&
+            tag.limit > time + reject_threshold) {
+	  // if the client is over its limit, reject it here
+	  return EAGAIN;
+	}
+
+	client.add_request(tag, std::move(request));
+	if (1 == client.requests.size()) {
+	  // NB: can the following 4 calls to adjust be changed
+	  // promote? Can adding a request ever demote a client in the
+	  // heaps?
+	  resv_heap.adjust(client);
+	  limit_heap.adjust(client);
+	  ready_heap.adjust(client);
+#if USE_PROP_HEAP
+	  prop_heap.adjust(client);
+#endif
+	}
+
+	client.cur_rho = req_params.rho;
+	client.cur_delta = req_params.delta;
+
+	resv_heap.adjust(client);
+	limit_heap.adjust(client);
+	ready_heap.adjust(client);
+#if USE_PROP_HEAP
+	prop_heap.adjust(client);
+#endif
+	return 0;
+      } // do_add_request
+
+      // data_mtx must be held by caller
+      void update_next_tag(DelayedTagCalc delayed, ClientRec& top,
+			   const RequestTag& tag) {
+	if (top.has_request()) {
+	  // perform delayed tag calculation on the next request
+	  ClientReq& next_first = top.next_request();
+	  const ClientInfo* client_info = get_cli_info(top);
+	  assert(client_info);
+	  next_first.tag = RequestTag(tag, *client_info,
+				      top.cur_delta, top.cur_rho,
+				      next_first.tag.arrival,
+				      next_first.tag.cost,
+				      anticipation_timeout);
+	  // copy tag to previous tag for client
+	  top.update_req_tag(next_first.tag, tick);
+	}
+      }
+
+      void update_next_tag(ImmediateTagCalc imm, ClientRec& top,
+			   const RequestTag& tag) {
+	// the next tag was already calculated on insertion
+      }
+
+      // data_mtx should be held when called; top of heap should have
+      // a ready request
+      template<typename C1, IndIntruHeapData ClientRec::*C2, typename C3>
+      RequestTag pop_process_request(IndIntruHeap<C1, ClientRec, C2, C3, B>& heap,
+			       std::function<void(const C& client,
+						  const Cost cost,
+						  RequestRef& request)> process) {
+	// gain access to data
+	ClientRec& top = heap.top();
+
+	Cost request_cost = top.next_request().tag.cost;
+	RequestRef request = std::move(top.next_request().request);
+	RequestTag tag = top.next_request().tag;
+
+	// pop request and adjust heaps
+	top.pop_request();
+
+	update_next_tag(TagCalc{}, top, tag);
+
+	resv_heap.demote(top);
+	limit_heap.adjust(top);
+#if USE_PROP_HEAP
+	prop_heap.demote(top);
+#endif
+	ready_heap.demote(top);
+
+	// process
+	process(top.client, request_cost, request);
+
+	return tag;
+      } // pop_process_request
+
+
+      // data_mtx must be held by caller
+      void reduce_reservation_tags(DelayedTagCalc delayed, ClientRec& client,
+                                   const RequestTag& tag) {
+	if (!client.requests.empty()) {
+	  // only maintain a tag for the first request
+	  auto& r = client.requests.front();
+	  r.tag.reservation -=
+	    client.info->reservation_inv * std::max(uint32_t(1), tag.rho);
+	}
+      }
+
+      // data_mtx should be held when called
+      void reduce_reservation_tags(ImmediateTagCalc imm, ClientRec& client,
+                                   const RequestTag& tag) {
+        double res_offset =
+          client.info->reservation_inv * std::max(uint32_t(1), tag.rho);
+	for (auto& r : client.requests) {
+	  r.tag.reservation -= res_offset;
+	}
+      }
+
+      // data_mtx should be held when called
+      void reduce_reservation_tags(const C& client_id, const RequestTag& tag) {
+	auto client_it = client_map.find(client_id);
+
+	// means the client was cleaned from map; should never happen
+	// as long as cleaning times are long enough
+	assert(client_map.end() != client_it);
+	ClientRec& client = *client_it->second;
+	reduce_reservation_tags(TagCalc{}, client, tag);
+
+	// don't forget to update previous tag
+	client.prev_tag.reservation -=
+	  client.info->reservation_inv * std::max(uint32_t(1), tag.rho);
+	resv_heap.promote(client);
+      }
+
+
+      // data_mtx should be held when called
+      NextReq do_next_request(Time now) {
+	// if reservation queue is empty, all are empty (i.e., no
+	// active clients)
+	if(resv_heap.empty()) {
+	  return NextReq::none();
+	}
+
+	// try constraint (reservation) based scheduling
+
+	auto& reserv = resv_heap.top();
+	if (reserv.has_request() &&
+	    reserv.next_request().tag.reservation <= now) {
+	  return NextReq(HeapId::reservation);
+	}
+
+	// no existing reservations before now, so try weight-based
+	// scheduling
+
+	// all items that are within limit are eligible based on
+	// priority
+	auto limits = &limit_heap.top();
+	while (limits->has_request() &&
+	       !limits->next_request().tag.ready &&
+	       limits->next_request().tag.limit <= now) {
+	  limits->next_request().tag.ready = true;
+	  ready_heap.promote(*limits);
+	  limit_heap.demote(*limits);
+
+	  limits = &limit_heap.top();
+	}
+
+	auto& readys = ready_heap.top();
+	if (readys.has_request() &&
+	    readys.next_request().tag.ready &&
+	    readys.next_request().tag.proportion < max_tag) {
+	  return NextReq(HeapId::ready);
+	}
+
+	// if nothing is schedulable by reservation or
+	// proportion/weight, and if we allow limit break, try to
+	// schedule something with the lowest proportion tag or
+	// alternatively lowest reservation tag.
+	if (at_limit == AtLimit::Allow) {
+	  if (readys.has_request() &&
+	      readys.next_request().tag.proportion < max_tag) {
+	    return NextReq(HeapId::ready);
+	  } else if (reserv.has_request() &&
+		     reserv.next_request().tag.reservation < max_tag) {
+	    return NextReq(HeapId::reservation);
+	  }
+	}
+
+	// nothing scheduled; make sure we re-run when next
+	// reservation item or next limited item comes up
+
+	Time next_call = TimeMax;
+	if (resv_heap.top().has_request()) {
+	  next_call =
+	    min_not_0_time(next_call,
+			   resv_heap.top().next_request().tag.reservation);
+	}
+	if (limit_heap.top().has_request()) {
+	  const auto& next = limit_heap.top().next_request();
+	  assert(!next.tag.ready || max_tag == next.tag.proportion);
+	  next_call = min_not_0_time(next_call, next.tag.limit);
+	}
+	if (next_call < TimeMax) {
+	  return NextReq(next_call);
+	} else {
+	  return NextReq::none();
+	}
+      } // do_next_request
+
+
+      // if possible is not zero and less than current then return it;
+      // otherwise return current; the idea is we're trying to find
+      // the minimal time but ignoring zero
+      static inline const Time& min_not_0_time(const Time& current,
+					       const Time& possible) {
+	return TimeZero == possible ? current : std::min(current, possible);
+      }
+
+
+      /*
+       * This is being called regularly by RunEvery. Every time it's
+       * called it notes the time and delta counter (mark point) in a
+       * deque. It also looks at the deque to find the most recent
+       * mark point that is older than clean_age. It then walks the
+       * map and delete all server entries that were last used before
+       * that mark point.
+       */
+      void do_clean() {
+	TimePoint now = std::chrono::steady_clock::now();
+	DataGuard g(data_mtx);
+	clean_mark_points.emplace_back(MarkPoint(now, tick));
+
+	// first erase the super-old client records
+
+	Counter erase_point = last_erase_point;
+	auto point = clean_mark_points.front();
+	while (point.first <= now - erase_age) {
+	  last_erase_point = point.second;
+	  erase_point = last_erase_point;
+	  clean_mark_points.pop_front();
+	  point = clean_mark_points.front();
+	}
+
+	Counter idle_point = 0;
+	for (auto i : clean_mark_points) {
+	  if (i.first <= now - idle_age) {
+	    idle_point = i.second;
+	  } else {
+	    break;
+	  }
+	}
+
+	Counter erased_num = 0;
+	if (erase_point > 0 || idle_point > 0) {
+	  for (auto i = client_map.begin(); i != client_map.end(); /* empty */) {
+	    auto i2 = i++;
+	    if (erase_point &&
+	        erased_num < erase_max &&
+	        i2->second->last_tick <= erase_point) {
+	      delete_from_heaps(i2->second);
+	      client_map.erase(i2);
+	      erased_num++;
+	    } else if (idle_point && i2->second->last_tick <= idle_point) {
+	      i2->second->idle = true;
+	    }
+	  } // for
+
+	  auto wperiod = check_time;
+	  if (erased_num >= erase_max) {
+	    wperiod = duration_cast<milliseconds>(aggressive_check_time);
+	  } else {
+	    // clean finished, refresh
+	    last_erase_point = 0;
+	  }
+	  cleaning_job->try_update(wperiod);
+	} // if
+      } // do_clean
+
+
+      // data_mtx must be held by caller
+      template<IndIntruHeapData ClientRec::*C1,typename C2>
+      void delete_from_heap(ClientRecRef& client,
+			    c::IndIntruHeap<ClientRecRef,ClientRec,C1,C2,B>& heap) {
+	auto i = heap.at(client);
+	heap.remove(i);
+      }
+
+
+      // data_mtx must be held by caller
+      void delete_from_heaps(ClientRecRef& client) {
+	delete_from_heap(client, resv_heap);
+#if USE_PROP_HEAP
+	delete_from_heap(client, prop_heap);
+#endif
+	delete_from_heap(client, limit_heap);
+	delete_from_heap(client, ready_heap);
+      }
+    }; // class PriorityQueueBase
+
+
+    template<typename C, typename R, bool IsDelayed=false, bool U1=false, unsigned B=2>
+    class PullPriorityQueue : public PriorityQueueBase<C,R,IsDelayed,U1,B> {
+      using super = PriorityQueueBase<C,R,IsDelayed,U1,B>;
+
+    public:
+
+      // When a request is pulled, this is the return type.
+      struct PullReq {
+	struct Retn {
+	  C                          client;
+	  typename super::RequestRef request;
+	  PhaseType                  phase;
+	  Cost                       cost;
+	};
+
+	typename super::NextReqType   type;
+	boost::variant<Retn,Time>     data;
+
+	bool is_none() const { return type == super::NextReqType::none; }
+
+	bool is_retn() const { return type == super::NextReqType::returning; }
+	Retn& get_retn() {
+	  return boost::get<Retn>(data);
+	}
+
+	bool is_future() const { return type == super::NextReqType::future; }
+	Time getTime() const { return boost::get<Time>(data); }
+      };
+
+
+#ifdef PROFILE
+      ProfileTimer<std::chrono::nanoseconds> pull_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
+#endif
+
+      template<typename Rep, typename Per>
+      PullPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			std::chrono::duration<Rep,Per> _idle_age,
+			std::chrono::duration<Rep,Per> _erase_age,
+			std::chrono::duration<Rep,Per> _check_time,
+			AtLimitParam at_limit_param = AtLimit::Wait,
+			double _anticipation_timeout = 0.0) :
+	super(_client_info_f,
+	      _idle_age, _erase_age, _check_time,
+	      at_limit_param, _anticipation_timeout)
+      {
+	// empty
+      }
+
+
+      // pull convenience constructor
+      PullPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			AtLimitParam at_limit_param = AtLimit::Wait,
+			double _anticipation_timeout = 0.0) :
+	PullPriorityQueue(_client_info_f,
+			  standard_idle_age,
+			  standard_erase_age,
+			  standard_check_time,
+			  at_limit_param,
+			  _anticipation_timeout)
+      {
+	// empty
+      }
+
+
+      int add_request(R&& request,
+		      const C& client_id,
+		      const ReqParams& req_params,
+		      const Cost cost = 1u) {
+	return add_request(typename super::RequestRef(new R(std::move(request))),
+			   client_id,
+			   req_params,
+			   get_time(),
+			   cost);
+      }
+
+
+      int add_request(R&& request,
+		      const C& client_id,
+		      const Cost cost = 1u) {
+	static const ReqParams null_req_params;
+	return add_request(typename super::RequestRef(new R(std::move(request))),
+			   client_id,
+			   null_req_params,
+			   get_time(),
+			   cost);
+      }
+
+
+      int add_request_time(R&& request,
+			   const C& client_id,
+			   const ReqParams& req_params,
+			   const Time time,
+			   const Cost cost = 1u) {
+	return add_request(typename super::RequestRef(new R(std::move(request))),
+			   client_id,
+			   req_params,
+			   time,
+			   cost);
+      }
+
+
+      int add_request(typename super::RequestRef&& request,
+		      const C& client_id,
+		      const ReqParams& req_params,
+		      const Cost cost = 1u) {
+	return add_request(request, req_params, client_id, get_time(), cost);
+      }
+
+
+      int add_request(typename super::RequestRef&& request,
+		      const C& client_id,
+		      const Cost cost = 1u) {
+	static const ReqParams null_req_params;
+	return add_request(request, null_req_params, client_id, get_time(), cost);
+      }
+
+
+      // this does the work; the versions above provide alternate interfaces
+      int add_request(typename super::RequestRef&& request,
+		      const C& client_id,
+		      const ReqParams& req_params,
+		      const Time time,
+		      const Cost cost = 1u) {
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	add_request_timer.start();
+#endif
+	int r = super::do_add_request(std::move(request),
+				      client_id,
+				      req_params,
+				      time,
+				      cost);
+	// no call to schedule_request for pull version
+#ifdef PROFILE
+	add_request_timer.stop();
+#endif
+	return r;
+      }
+
+
+      inline PullReq pull_request() {
+	return pull_request(get_time());
+      }
+
+
+      PullReq pull_request(const Time now) {
+	PullReq result;
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	pull_request_timer.start();
+#endif
+
+	typename super::NextReq next = super::do_next_request(now);
+	result.type = next.type;
+	switch(next.type) {
+	case super::NextReqType::none:
+	  return result;
+	case super::NextReqType::future:
+	  result.data = next.when_ready;
+	  return result;
+	case super::NextReqType::returning:
+	  // to avoid nesting, break out and let code below handle this case
+	  break;
+	default:
+	  assert(false);
+	}
+
+	// we'll only get here if we're returning an entry
+
+	auto process_f =
+	  [&] (PullReq& pull_result, PhaseType phase) ->
+	  std::function<void(const C&,
+			     uint64_t,
+			     typename super::RequestRef&)> {
+	  return [&pull_result, phase](const C& client,
+				       const Cost request_cost,
+				       typename super::RequestRef& request) {
+	    pull_result.data = typename PullReq::Retn{ client,
+						       std::move(request),
+						       phase,
+						       request_cost };
+	  };
+	};
+
+	switch(next.heap_id) {
+	case super::HeapId::reservation:
+	  (void) super::pop_process_request(this->resv_heap,
+				     process_f(result,
+					       PhaseType::reservation));
+	  ++this->reserv_sched_count;
+	  break;
+	case super::HeapId::ready:
+	  {
+	    auto tag = super::pop_process_request(this->ready_heap,
+				     process_f(result, PhaseType::priority));
+	    // need to use retn temporarily
+	    auto& retn = boost::get<typename PullReq::Retn>(result.data);
+	    super::reduce_reservation_tags(retn.client, tag);
+	  }
+	  ++this->prop_sched_count;
+	  break;
+	default:
+	  assert(false);
+	}
+
+#ifdef PROFILE
+	pull_request_timer.stop();
+#endif
+	return result;
+      } // pull_request
+
+
+    protected:
+
+
+      // data_mtx should be held when called; unfortunately this
+      // function has to be repeated in both push & pull
+      // specializations
+      typename super::NextReq next_request() {
+	return next_request(get_time());
+      }
+    }; // class PullPriorityQueue
+
+
+    // PUSH version
+    template<typename C, typename R, bool IsDelayed=false, bool U1=false, unsigned B=2>
+    class PushPriorityQueue : public PriorityQueueBase<C,R,IsDelayed,U1,B> {
+
+    protected:
+
+      using super = PriorityQueueBase<C,R,IsDelayed,U1,B>;
+
+    public:
+
+      // a function to see whether the server can handle another request
+      using CanHandleRequestFunc = std::function<bool(void)>;
+
+      // a function to submit a request to the server; the second
+      // parameter is a callback when it's completed
+      using HandleRequestFunc =
+	std::function<void(const C&,typename super::RequestRef,PhaseType,uint64_t)>;
+
+    protected:
+
+      CanHandleRequestFunc can_handle_f;
+      HandleRequestFunc    handle_f;
+      // for handling timed scheduling
+      std::mutex  sched_ahead_mtx;
+      std::condition_variable sched_ahead_cv;
+      Time sched_ahead_when = TimeZero;
+
+#ifdef PROFILE
+    public:
+      ProfileTimer<std::chrono::nanoseconds> add_request_timer;
+      ProfileTimer<std::chrono::nanoseconds> request_complete_timer;
+    protected:
+#endif
+
+      // NB: threads declared last, so constructed last and destructed first
+
+      std::thread sched_ahead_thd;
+
+    public:
+
+      // push full constructor
+      template<typename Rep, typename Per>
+      PushPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			CanHandleRequestFunc _can_handle_f,
+			HandleRequestFunc _handle_f,
+			std::chrono::duration<Rep,Per> _idle_age,
+			std::chrono::duration<Rep,Per> _erase_age,
+			std::chrono::duration<Rep,Per> _check_time,
+			AtLimitParam at_limit_param = AtLimit::Wait,
+			double anticipation_timeout = 0.0) :
+	super(_client_info_f,
+	      _idle_age, _erase_age, _check_time,
+	      at_limit_param, anticipation_timeout)
+      {
+	can_handle_f = _can_handle_f;
+	handle_f = _handle_f;
+	sched_ahead_thd = std::thread(&PushPriorityQueue::run_sched_ahead, this);
+      }
+
+
+      // push convenience constructor
+      PushPriorityQueue(typename super::ClientInfoFunc _client_info_f,
+			CanHandleRequestFunc _can_handle_f,
+			HandleRequestFunc _handle_f,
+			AtLimitParam at_limit_param = AtLimit::Wait,
+			double _anticipation_timeout = 0.0) :
+	PushPriorityQueue(_client_info_f,
+			  _can_handle_f,
+			  _handle_f,
+			  standard_idle_age,
+			  standard_erase_age,
+			  standard_check_time,
+			  at_limit_param,
+			  _anticipation_timeout)
+      {
+	// empty
+      }
+
+
+      ~PushPriorityQueue() {
+	this->finishing = true;
+	{
+	  std::lock_guard<std::mutex> l(sched_ahead_mtx);
+	  sched_ahead_cv.notify_one();
+	}
+	sched_ahead_thd.join();
+      }
+
+    public:
+
+      int add_request(R&& request,
+		      const C& client_id,
+		      const ReqParams& req_params,
+		      const Cost cost = 1u) {
+	return add_request(typename super::RequestRef(new R(std::move(request))),
+			   client_id,
+			   req_params,
+			   get_time(),
+			   cost);
+      }
+
+
+      int add_request(typename super::RequestRef&& request,
+		      const C& client_id,
+		      const ReqParams& req_params,
+		      const Cost cost = 1u) {
+	return add_request(request, req_params, client_id, get_time(), cost);
+      }
+
+
+      int add_request_time(const R& request,
+			   const C& client_id,
+			   const ReqParams& req_params,
+			   const Time time,
+			   const Cost cost = 1u) {
+	return add_request(typename super::RequestRef(new R(request)),
+			   client_id,
+			   req_params,
+			   time,
+			   cost);
+      }
+
+
+      int add_request(typename super::RequestRef&& request,
+		      const C& client_id,
+		      const ReqParams& req_params,
+		      const Time time,
+		      const Cost cost = 1u) {
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	add_request_timer.start();
+#endif
+	int r = super::do_add_request(std::move(request),
+				      client_id,
+				      req_params,
+				      time,
+				      cost);
+        if (r == 0) {
+	  schedule_request();
+        }
+#ifdef PROFILE
+	add_request_timer.stop();
+#endif
+	return r;
+      }
+
+
+      void request_completed() {
+	typename super::DataGuard g(this->data_mtx);
+#ifdef PROFILE
+	request_complete_timer.start();
+#endif
+	schedule_request();
+#ifdef PROFILE
+	request_complete_timer.stop();
+#endif
+      }
+
+    protected:
+
+      // data_mtx should be held when called; furthermore, the heap
+      // should not be empty and the top element of the heap should
+      // not be already handled
+      //
+      // NOTE: the use of "super::ClientRec" in either the template
+      // construct or as a parameter to submit_top_request generated
+      // a compiler error in g++ 4.8.4, when ClientRec was
+      // "protected" rather than "public". By g++ 6.3.1 this was not
+      // an issue. But for backwards compatibility
+      // PriorityQueueBase::ClientRec is public.
+      template<typename C1,
+	       IndIntruHeapData super::ClientRec::*C2,
+	       typename C3,
+	       unsigned B4>
+      typename super::RequestMeta
+      submit_top_request(IndIntruHeap<C1,typename super::ClientRec,C2,C3,B4>& heap,
+			 PhaseType phase) {
+	C client_result;
+	RequestTag tag = super::pop_process_request(heap,
+				   [this, phase, &client_result]
+				   (const C& client,
+				    const Cost request_cost,
+				    typename super::RequestRef& request) {
+				     client_result = client;
+				     handle_f(client, std::move(request), phase, request_cost);
+				   });
+	typename super::RequestMeta req(client_result, tag);
+	return req;
+      }
+
+
+      // data_mtx should be held when called
+      void submit_request(typename super::HeapId heap_id) {
+	switch(heap_id) {
+	case super::HeapId::reservation:
+	  // don't need to note client
+	  (void) submit_top_request(this->resv_heap, PhaseType::reservation);
+	  // unlike the other two cases, we do not reduce reservation
+	  // tags here
+	  ++this->reserv_sched_count;
+	  break;
+	case super::HeapId::ready:
+	  {
+	    auto req = submit_top_request(this->ready_heap, PhaseType::priority);
+	    super::reduce_reservation_tags(req.client_id, req.tag);
+	  }
+	  ++this->prop_sched_count;
+	  break;
+	default:
+	  assert(false);
+	}
+      } // submit_request
+
+
+      // data_mtx should be held when called; unfortunately this
+      // function has to be repeated in both push & pull
+      // specializations
+      typename super::NextReq next_request() {
+	return next_request(get_time());
+      }
+
+
+      // data_mtx should be held when called; overrides member
+      // function in base class to add check for whether a request can
+      // be pushed to the server
+      typename super::NextReq next_request(Time now) {
+	if (!can_handle_f()) {
+	  typename super::NextReq result;
+	  result.type = super::NextReqType::none;
+	  return result;
+	} else {
+	  return super::do_next_request(now);
+	}
+      } // next_request
+
+
+      // data_mtx should be held when called
+      void schedule_request() {
+	typename super::NextReq next_req = next_request();
+	switch (next_req.type) {
+	case super::NextReqType::none:
+	  return;
+	case super::NextReqType::future:
+	  sched_at(next_req.when_ready);
+	  break;
+	case super::NextReqType::returning:
+	  submit_request(next_req.heap_id);
+	  break;
+	default:
+	  assert(false);
+	}
+      }
+
+
+      // this is the thread that handles running schedule_request at
+      // future times when nothing can be scheduled immediately
+      void run_sched_ahead() {
+	std::unique_lock<std::mutex> l(sched_ahead_mtx);
+
+	while (!this->finishing) {
+	  // predicate for cond.wait()
+	  const auto pred = [this] () -> bool { return this->finishing; };
+
+	  if (TimeZero == sched_ahead_when) {
+	    sched_ahead_cv.wait(l, pred);
+	  } else {
+	    // cast from Time -> duration<Time> -> Duration -> TimePoint
+	    const auto until = typename super::TimePoint{
+		duration_cast<typename super::Duration>(
+		    std::chrono::duration<Time>{sched_ahead_when})};
+	    sched_ahead_cv.wait_until(l, until, pred);
+	    sched_ahead_when = TimeZero;
+	    if (this->finishing) return;
+
+	    l.unlock();
+	    if (!this->finishing) {
+	      typename super::DataGuard g(this->data_mtx);
+	      schedule_request();
+	    }
+	    l.lock();
+	  }
+	}
+      }
+
+
+      void sched_at(Time when) {
+	std::lock_guard<std::mutex> l(sched_ahead_mtx);
+	if (this->finishing) return;
+	if (TimeZero == sched_ahead_when || when < sched_ahead_when) {
+	  sched_ahead_when = when;
+	  sched_ahead_cv.notify_one();
+	}
+      }
+    }; // class PushPriorityQueue
+
+  } // namespace dmclock
+} // namespace crimson
diff --git a/src/dmclock/src/dmclock_util.cc b/src/dmclock/src/dmclock_util.cc
new file mode 100644
index 000000000..75960ed74
--- /dev/null
+++ b/src/dmclock/src/dmclock_util.cc
@@ -0,0 +1,34 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <signal.h>
+
+#include <iomanip>
+#include <sstream>
+
+#include "dmclock_util.h"
+
+
+std::string crimson::dmclock::format_time(const Time& time, unsigned modulo) {
+  long subtract = long(time / modulo) * modulo;
+  std::stringstream ss;
+  ss << std::fixed << std::setprecision(4) << (time - subtract);
+  return ss.str();
+}
+
+
+void crimson::dmclock::debugger() {
+  raise(SIGCONT);
+}
diff --git a/src/dmclock/src/dmclock_util.h b/src/dmclock/src/dmclock_util.h
new file mode 100644
index 000000000..f7e561ff7
--- /dev/null
+++ b/src/dmclock/src/dmclock_util.h
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <unistd.h>
+#include <assert.h>
+#include <sys/time.h>
+
+#include <limits>
+#include <cmath>
+#include <chrono>
+
+
+namespace crimson {
+  namespace dmclock {
+    // we're using double to represent time, but we could change it by
+    // changing the following declarations (and by making sure a min
+    // function existed)
+    using Time = double;
+    static const Time TimeZero = 0.0;
+    static const Time TimeMax = std::numeric_limits<Time>::max();
+    static const double NaN = nan("");
+
+
+    inline Time get_time() {
+#if defined(__linux__)
+      struct timespec now;
+      auto result = clock_gettime(CLOCK_REALTIME, &now);
+      (void) result; // reference result in case assert is compiled out
+      assert(0 == result);
+      return now.tv_sec + (now.tv_nsec / 1.0e9);
+#else
+      struct timeval now;
+      auto result = gettimeofday(&now, NULL);
+      (void) result; // reference result in case assert is compiled out
+      assert(0 == result);
+      return now.tv_sec + (now.tv_usec / 1.0e6);
+#endif
+    }
+
+    std::string format_time(const Time& time, unsigned modulo = 1000);
+
+    void debugger();
+
+  } // namespace dmclock
+} // namespace crimson
diff --git a/src/dmclock/support/src/debug.h b/src/dmclock/support/src/debug.h
new file mode 100644
index 000000000..d8e6713fd
--- /dev/null
+++ b/src/dmclock/support/src/debug.h
@@ -0,0 +1,24 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <signal.h>
+
+
+inline void debugger() {
+    raise(SIGCONT);
+}
diff --git a/src/dmclock/support/src/heap.h b/src/dmclock/support/src/heap.h
new file mode 100644
index 000000000..6a1f9963a
--- /dev/null
+++ b/src/dmclock/support/src/heap.h
@@ -0,0 +1,247 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <vector>
+#include <ostream>
+
+#include "assert.h"
+
+
+namespace crimson {
+
+  /*
+   * T : type of data held in the heap.
+   *
+   * C : class that implements operator() with two arguments and
+   * returns a boolean when the first argument is greater than (higher
+   * in priority than) the second.
+   */
+  template<typename T, typename C>
+  class Heap {
+
+  public:
+
+    class iterator {
+
+      friend Heap<T,C>;
+
+      Heap<T,C>& heap;
+      int        index;
+
+      iterator(Heap<T,C>& _heap, int _index) :
+	heap(_heap),
+	index(_index)
+      {
+	// empty
+      }
+
+    public:
+
+      iterator(iterator&& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      iterator& operator++() {
+	++index;
+	return *this;
+      }
+
+      bool operator==(const iterator& other) const {
+	return index == other.index;
+      }
+
+      bool operator!=(const iterator& other) const {
+	return !(*this == other);
+      }
+
+      T& operator*() {
+	return heap.data[index];
+      }
+
+      // the item this iterator refers to
+      void increase() {
+	heap.siftUp(index);
+      }
+    }; // class iterator
+
+    friend iterator;
+
+  protected:
+
+    std::vector<T> data;
+    int count;
+    C comparator;
+
+    // parent(0) should be a negative value, which it is due to
+    // truncating towards negative infinity
+    static inline int parent(int i) { return (i - 1) / 2; }
+
+    static inline int lhs(int i) { return 2*i + 1; }
+
+    static inline int rhs(int i) { return 2*i + 2; }
+
+    void siftUp(int i) {
+      assert(i < count);
+
+      while (i > 0) {
+	int pi = parent(i);
+	if (!comparator(data[i], data[pi])) {
+	  break;
+	}
+
+	std::swap(data[i], data[pi]);
+	i = pi;
+      }
+    }
+
+    void siftDown(int i) {
+      while (i < count) {
+	int li = lhs(i);
+	int ri = rhs(i);
+
+	if (li < count) {
+	  if (comparator(data[li], data[i])) {
+	    if (ri < count && comparator(data[ri], data[li])) {
+	      std::swap(data[i], data[ri]);
+	      i = ri;
+	    } else {
+	      std::swap(data[i], data[li]);
+	      i = li;
+	    }
+	  } else if (ri < count && comparator(data[ri], data[i])) {
+	    std::swap(data[i], data[ri]);
+	    i = ri;
+	  } else {
+	    break;
+	  }
+	} else {
+	  break;
+	}
+      }
+    }
+
+
+  public:
+
+    Heap() :
+      count(0)
+    {
+      // empty
+    }
+
+    Heap(const Heap<T,C>& other) {
+      data.resize(other.data.size());
+      for (int i = 0; i < other.count; ++i) {
+	data[i] = other.data[i];
+      }
+      count = other.count;
+    }
+
+    const Heap<T,C>& operator=(const Heap<T,C>& other) {
+      data.resize(other.data.size());
+      for (int i = 0; i < other.count; ++i) {
+	data[i] = other.data[i];
+      }
+      count = other.count;
+      return *this;
+    }
+
+    bool empty() const { return 0 == count; }
+
+    T& top() { return data[0]; }
+
+    void push(T item) {
+      int i = count++;
+      data.push_back(item);
+      siftUp(i);
+    }
+
+    void pop() {
+      data[0] = data[--count];
+      data.resize(count);
+      siftDown(0);
+    }
+
+    void updateTop() {
+      siftDown(0);
+    }
+
+    void clear() {
+      count = 0;
+      data.resize(0);
+    }
+
+    iterator begin() {
+      return iterator(*this, 0);
+    }
+
+    iterator end() {
+      return iterator(*this, count);
+    }
+
+    std::ostream& displaySorted(std::ostream& out,
+				std::function<bool(const T&)> filter,
+				bool insert_line_breaks = true) const {
+      Heap<T,C> temp = *this;
+
+      bool first = true;
+      out << "[ ";
+
+      while(!temp.empty()) {
+	const T& top = temp.top();
+	if (filter(top)) {
+	  if (!first) {
+	    out << ", ";
+	  }
+	  if (insert_line_breaks) {
+	    out << std::endl << "    ";
+	  }
+	  out << temp.top();
+	  first = false;
+	}
+	temp.pop();
+      }
+
+      out << " ]";
+      if (insert_line_breaks) {
+	out << std::endl;
+      }
+      return out;
+    }
+
+    template<typename T1, typename T2>
+    friend std::ostream& operator<<(std::ostream&, const Heap<T1,T2>&);
+  }; // class Heap
+
+
+  template<typename T1, typename T2>
+  std::ostream& operator<<(std::ostream& out, const Heap<T1,T2>& h) {
+    out << "[ ";
+    if (h.count) {
+      out << h.data[0];
+    }
+    for (int i = 1; i < h.count; i++) {
+      out << ", " << h.data[i];
+    }
+    out << " ]";
+    return out;
+  }
+} // namespace
diff --git a/src/dmclock/support/src/indirect_intrusive_heap.h b/src/dmclock/support/src/indirect_intrusive_heap.h
new file mode 100644
index 000000000..d84a48784
--- /dev/null
+++ b/src/dmclock/support/src/indirect_intrusive_heap.h
@@ -0,0 +1,567 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <memory>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <functional>
+#include <algorithm>
+
+#include "assert.h"
+
+
+namespace crimson {
+  using IndIntruHeapData = size_t;
+
+  /* T is the ultimate data that's being stored in the heap, although
+   *   through indirection.
+   *
+   * I is the indirect type that will actually be stored in the heap
+   *   and that must allow dereferencing (via operator*) to yield a
+   *   T&.
+   *
+   * C is a functor when given two T&'s will return true if the first
+   *   must precede the second.
+   *
+   * heap_info is a data member pointer as to where the heap data in T
+   * is stored.
+   *
+   * K is the branching factor of the heap, default is 2 (binary heap).
+   */
+  template<typename I,
+	   typename T,
+	   IndIntruHeapData T::*heap_info,
+	   typename C,
+	   unsigned K = 2>
+  class IndIntruHeap {
+
+    // shorthand
+    using HeapIndex = IndIntruHeapData;
+
+    static_assert(
+      std::is_same<T,typename std::pointer_traits<I>::element_type>::value,
+      "class I must resolve to class T by indirection (pointer dereference)");
+
+    static_assert(
+      std::is_same<bool,
+      typename std::result_of<C(const T&,const T&)>::type>::value,
+      "class C must define operator() to take two const T& and return a bool");
+
+    static_assert(K >= 2, "K (degree of branching) must be at least 2");
+
+    class Iterator {
+      friend IndIntruHeap<I, T, heap_info, C, K>;
+
+      IndIntruHeap<I, T, heap_info, C, K>* heap;
+      HeapIndex                            index;
+
+      Iterator(IndIntruHeap<I, T, heap_info, C, K>& _heap, HeapIndex _index) :
+	heap(&_heap),
+	index(_index)
+      {
+	// empty
+      }
+
+    public:
+
+      Iterator(Iterator&& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      Iterator(const Iterator& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      Iterator& operator=(Iterator&& other) {
+	std::swap(heap, other.heap);
+	std::swap(index, other.index);
+	return *this;
+      }
+
+      Iterator& operator=(const Iterator& other) {
+	heap = other.heap;
+	index = other.index;
+      }
+
+      Iterator& operator++() {
+	if (index <= heap->count) {
+	  ++index;
+	}
+	return *this;
+      }
+
+      bool operator==(const Iterator& other) const {
+	return heap == other.heap && index == other.index;
+      }
+
+      bool operator!=(const Iterator& other) const {
+	return !(*this == other);
+      }
+
+      T& operator*() {
+	return *heap->data[index];
+      }
+
+      T* operator->() {
+	return &(*heap->data[index]);
+      }
+
+#if 0
+      // the item this iterator refers to
+      void increase() {
+	heap.sift_up(index);
+      }
+#endif
+    }; // class Iterator
+
+
+    class ConstIterator {
+      friend IndIntruHeap<I, T, heap_info, C, K>;
+
+      const IndIntruHeap<I, T, heap_info, C, K>* heap;
+      HeapIndex                                  index;
+
+      ConstIterator(const IndIntruHeap<I, T, heap_info, C, K>& _heap,
+		    HeapIndex _index) :
+	heap(&_heap),
+	index(_index)
+      {
+	// empty
+      }
+
+    public:
+
+      ConstIterator(ConstIterator&& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      ConstIterator(const ConstIterator& other) :
+	heap(other.heap),
+	index(other.index)
+      {
+	// empty
+      }
+
+      ConstIterator& operator=(ConstIterator&& other) {
+	std::swap(heap, other.heap);
+	std::swap(index, other.index);
+	return *this;
+      }
+
+      ConstIterator& operator=(const ConstIterator& other) {
+	heap = other.heap;
+	index = other.index;
+      }
+
+      ConstIterator& operator++() {
+	if (index <= heap->count) {
+	  ++index;
+	}
+	return *this;
+      }
+
+      bool operator==(const ConstIterator& other) const {
+	return heap == other.heap && index == other.index;
+      }
+
+      bool operator!=(const ConstIterator& other) const {
+	return !(*this == other);
+      }
+
+      const T& operator*() {
+	return *heap->data[index];
+      }
+
+      const T* operator->() {
+	return &(*heap->data[index]);
+      }
+    }; // class ConstIterator
+
+
+  protected:
+
+    std::vector<I> data;
+    HeapIndex      count;
+    C              comparator;
+
+  public:
+
+    IndIntruHeap() :
+      count(0)
+    {
+      // empty
+    }
+
+    IndIntruHeap(const IndIntruHeap<I,T,heap_info,C,K>& other) :
+      count(other.count)
+    {
+      for (HeapIndex i = 0; i < other.count; ++i) {
+	data.push_back(other.data[i]);
+      }
+    }
+
+    bool empty() const { return 0 == count; }
+
+    size_t size() const { return (size_t) count; }
+
+    T& top() { return *data[0]; }
+
+    const T& top() const { return *data[0]; }
+
+    I& top_ind() { return data[0]; }
+
+    const I& top_ind() const { return data[0]; }
+
+    void push(I&& item) {
+      HeapIndex i = count++;
+      intru_data_of(item) = i;
+      data.emplace_back(std::move(item));
+      sift_up(i);
+    }
+
+    void push(const I& item) {
+      I copy(item);
+      push(std::move(copy));
+    }
+
+    void pop() {
+      remove(HeapIndex(0));
+    }
+
+    void remove(Iterator& i) {
+      remove(i.index);
+      i = end();
+    }
+
+    Iterator find(const I& ind_item) {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (data[i] == ind_item) {
+	  return Iterator(*this, i);
+	}
+      }
+      return end();
+    }
+
+    // when passing in value we do a comparison via operator==
+    Iterator find(const T& item) {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (*data[i] == item) {
+	  return Iterator(*this, i);
+	}
+      }
+      return end();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    Iterator rfind(const I& ind_item) {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (data[i-1] == ind_item) {
+	  return Iterator(*this, i-1);
+	}
+      }
+      return end();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    Iterator rfind(const T& item) {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (*data[i-1] == item) {
+	  return Iterator(*this, i-1);
+	}
+      }
+      return end();
+    }
+
+    ConstIterator find(const I& ind_item) const {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (data[i] == ind_item) {
+	  return ConstIterator(*this, i);
+	}
+      }
+      return cend();
+    }
+
+    // when passing in value we do a comparison via operator==
+    ConstIterator find(const T& item) const {
+      for (HeapIndex i = 0; i < count; ++i) {
+	if (*data[i] == item) {
+	  return ConstIterator(*this, i);
+	}
+      }
+      return cend();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    ConstIterator rfind(const I& ind_item) const {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (data[i-1] == ind_item) {
+	  return ConstIterator(*this, i-1);
+	}
+      }
+      return cend();
+    }
+
+    // reverse find -- start looking from bottom of heap
+    ConstIterator rfind(const T& item) const {
+      // HeapIndex is unsigned, so we can't allow to go negative; so
+      // we'll keep it one more than actual index
+      for (HeapIndex i = count; i > 0; --i) {
+	if (*data[i-1] == item) {
+	  return ConstIterator(*this, i-1);
+	}
+      }
+      return cend();
+    }
+
+    Iterator at(const I& ind_item) {
+      auto ind = intru_data_of(ind_item);
+      if (ind >= count) {
+        throw std::out_of_range(
+          std::to_string(ind) + " >= " + std::to_string(count));
+      }
+      assert(data[ind] == ind_item);
+      return Iterator(*this, ind);
+    }
+
+    void promote(T& item) {
+      sift_up(item.*heap_info);
+    }
+
+    void demote(T& item) {
+      sift_down(item.*heap_info);
+    }
+
+    void adjust(T& item) {
+      sift(item.*heap_info);
+    }
+
+    Iterator begin() {
+      return Iterator(*this, 0);
+    }
+
+    Iterator end() {
+      return Iterator(*this, count);
+    }
+
+    ConstIterator cbegin() const {
+      return ConstIterator(*this, 0);
+    }
+
+    ConstIterator cend() const {
+      return ConstIterator(*this, count);
+    }
+
+    friend std::ostream& operator<<(std::ostream& out, const IndIntruHeap& h) {
+      auto i = h.data.cbegin();
+      if (i != h.data.cend()) {
+	out << **i;
+	++i;
+	while (i != h.data.cend()) {
+	  out << ", " << **i;
+	}
+      }
+      return out;
+    }
+
+    // can only be called if I is copiable; copies heap into a vector
+    // and sorts it before displaying it
+    std::ostream&
+    display_sorted(std::ostream& out,
+		   std::function<bool(const T&)> filter = all_filter) const {
+      static_assert(std::is_copy_constructible<I>::value,
+		    "cannot call display_sorted when class I is not copy"
+		    " constructible");
+      auto compare = [this] (const I first, const I second) -> bool {
+	return this->comparator(*first, *second);
+      };
+      std::vector<I> copy(data);
+      std::sort(copy.begin(), copy.end(), compare);
+
+      bool first = true;
+      for (auto c = copy.begin(); c != copy.end(); ++c) {
+	if (filter(**c)) {
+	  if (!first) {
+	    out << ", ";
+	  } else {
+	    first = false;
+	  }
+	  out << **c;
+	}
+      }
+
+      return out;
+    }
+
+
+  protected:
+
+    static IndIntruHeapData& intru_data_of(const I& item) {
+      return (*item).*heap_info;
+    }
+
+    void remove(HeapIndex i) {
+      std::swap(data[i], data[--count]);
+      intru_data_of(data[i]) = i;
+
+      // the following needs to be sift (and not sift_down) as it can
+      // go up or down the heap; imagine the heap vector contains 0,
+      // 10, 100, 20, 30, 200, 300, 40; then 200 is removed, and 40
+      // would have to be sifted upwards
+      // sift(i);
+      sift(i);
+
+      data.pop_back();
+    }
+
+    // default value of filter parameter to display_sorted
+    static bool all_filter(const T& data) { return true; }
+
+    // when i is negative?
+    static inline HeapIndex parent(HeapIndex i) {
+      assert(0 != i);
+      return (i - 1) / K;
+    }
+
+    // index of left child when K==2, index of left-most child when K>2
+    static inline HeapIndex lhs(HeapIndex i) { return K*i + 1; }
+
+    // index of right child when K==2, index of right-most child when K>2
+    static inline HeapIndex rhs(HeapIndex i) { return K*i + K; }
+
+    void sift_up(HeapIndex i) {
+      while (i > 0) {
+	HeapIndex pi = parent(i);
+	if (!comparator(*data[i], *data[pi])) {
+	  break;
+	}
+
+	std::swap(data[i], data[pi]);
+	intru_data_of(data[i]) = i;
+	intru_data_of(data[pi]) = pi;
+	i = pi;
+      }
+    } // sift_up
+
+    // use this sift_down definition when K>2; it's more general and
+    // uses a loop; EnableBool insures template uses a template
+    // parameter
+    template<bool EnableBool=true>
+    typename std::enable_if<(K>2)&&EnableBool,void>::type sift_down(HeapIndex i) {
+      if (i >= count) return;
+      while (true) {
+	HeapIndex li = lhs(i);
+
+	if (li < count) {
+	  HeapIndex ri = std::min(rhs(i), count - 1);
+
+	  // find the index of min. child
+	  HeapIndex min_i = li;
+	  for (HeapIndex k = li + 1; k <= ri; ++k) {
+	    if (comparator(*data[k], *data[min_i])) {
+	      min_i = k;
+	    }
+	  }
+
+	  if (comparator(*data[min_i], *data[i])) {
+	    std::swap(data[i], data[min_i]);
+	    intru_data_of(data[i]) = i;
+	    intru_data_of(data[min_i]) = min_i;
+	    i = min_i;
+	  } else {
+	    // no child is smaller
+	    break;
+	  }
+	} else {
+	  // no children
+	  break;
+	}
+      }
+    } // sift_down
+
+    // use this sift_down definition when K==2; EnableBool insures
+    // template uses a template parameter
+    template<bool EnableBool=true>
+    typename std::enable_if<K==2&&EnableBool,void>::type sift_down(HeapIndex i) {
+      if (i >= count) return;
+      while (true) {
+	const HeapIndex li = lhs(i);
+	const HeapIndex ri = 1 + li;
+
+        if (li < count) {
+	  if (comparator(*data[li], *data[i])) {
+	    if (ri < count && comparator(*data[ri], *data[li])) {
+	      std::swap(data[i], data[ri]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[ri]) = ri;
+	      i = ri;
+	    } else {
+	      std::swap(data[i], data[li]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[li]) = li;
+	      i = li;
+            }
+	  } else if (ri < count && comparator(*data[ri], *data[i])) {
+	    std::swap(data[i], data[ri]);
+            intru_data_of(data[i]) = i;
+	    intru_data_of(data[ri]) = ri;
+	    i = ri;
+          } else {
+	    // no child is smaller
+            break;
+          }
+        } else {
+	  // no children
+          break;
+        }
+      } // while
+    } // sift_down
+
+    void sift(HeapIndex i) {
+      if (i == 0) {
+	// if we're at top, can only go down
+	sift_down(i);
+      } else {
+	HeapIndex pi = parent(i);
+	if (comparator(*data[i], *data[pi])) {
+	  // if we can go up, we will
+	  sift_up(i);
+	} else {
+	  // otherwise we'll try to go down
+	  sift_down(i);
+	}
+      }
+    } // sift
+  }; // class IndIntruHeap
+
+} // namespace crimson
diff --git a/src/dmclock/support/src/intrusive_heap.h b/src/dmclock/support/src/intrusive_heap.h
new file mode 100644
index 000000000..21d3ea9a0
--- /dev/null
+++ b/src/dmclock/support/src/intrusive_heap.h
@@ -0,0 +1,221 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <vector>
+#include <string>
+#include <iostream>
+#include <functional>
+
+#include "assert.h"
+
+
+namespace crimson {
+  using IntruHeapData = size_t;
+
+  // T = type of data in heap; I = functor that returns a non-const
+  // reference to IntruHeapData; C = functor that compares two const
+  // refs and return true if the first precedes the second
+  template<typename T, typename I, typename C>
+  class IntruHeap {
+
+    static_assert(
+      std::is_same<IntruHeapData&,typename std::result_of<I(T&)>::type>::value,
+      "class I must define operator() to take T& and return a IntruHeapData&.");
+
+    static_assert(
+      std::is_same<bool,typename std::result_of<C(const T&,const T&)>::type>::value,
+      "class C must define operator() to take two const T& and return a bool.");
+
+
+  protected:
+    using index_t = IntruHeapData;
+
+    std::vector<T> data;
+    index_t count;
+    I intru_data_of;
+    C comparator;
+
+  public:
+
+    IntruHeap() :
+      count(0)
+    {
+      // empty
+    }
+
+    IntruHeap(const IntruHeap<T,I,C>& other) :
+      count(other.count)
+    {
+      for (uint i = 0; i < other.count; ++i) {
+	data.push_back(other.data[i]);
+      }
+    }
+
+    bool empty() const { return 0 == count; }
+
+    T& top() { return data[0]; }
+
+    void push(T&& item) {
+      index_t i = count++;
+      intru_data_of(item) = i;
+      data.emplace_back(item);
+      sift_up(i);
+    }
+
+    void push(const T& item) {
+      T copy(item);
+      push(std::move(copy));
+    }
+
+    void pop() {
+      std::swap(data[0], data[--count]);
+      intru_data_of(data[0]) = 0;
+      data.pop_back();
+      sift_down(0);
+    }
+
+    void adjust_up(T& item) {
+      sift_up(intru_data_of(item));
+    }
+
+    void adjust_down(T& item) {
+      sift_down(intru_data_of(item));
+    }
+
+    void adjust(T& item) {
+      sift(intru_data_of(item));
+    }
+
+    friend std::ostream& operator<<(std::ostream& out, const IntruHeap& h) {
+      for (uint i = 0; i < h.count; ++i) {
+	out << h.data[i] << ", ";
+      }
+      return out;
+    }
+
+    std::ostream&
+    display_sorted(std::ostream& out,
+		   bool insert_line_breaks = true,
+		   std::function<bool(const T&)> filter = all_filter) const {
+      IntruHeap<T,I,C> copy = *this;
+
+      bool first = true;
+      out << "[ ";
+
+      while(!copy.empty()) {
+	const T& top = copy.top();
+	if (filter(top)) {
+	  if (!first) {
+	    out << ", ";
+	  }
+	  if (insert_line_breaks) {
+	    out << std::endl << "    ";
+	  }
+	  out << copy.top();
+	  first = false;
+	}
+	copy.pop();
+      }
+
+      out << " ]";
+      if (insert_line_breaks) {
+	out << std::endl;
+      }
+
+      return out;
+    }
+
+
+  protected:
+
+    // default value of filter parameter to display_sorted
+    static bool all_filter(const T& data) { return true; }
+
+    // when i is negative?
+    static inline index_t parent(index_t i) {
+      assert(0 != i);
+      return (i - 1) / 2;
+    }
+
+    static inline index_t lhs(index_t i) { return 2*i + 1; }
+
+    static inline index_t rhs(index_t i) { return 2*i + 2; }
+
+    void sift_up(index_t i) {
+      while (i > 0) {
+	index_t pi = parent(i);
+	if (!comparator(data[i], data[pi])) {
+	  break;
+	}
+
+	std::swap(data[i], data[pi]);
+	intru_data_of(data[i]) = i;
+	intru_data_of(data[pi]) = pi;
+	i = pi;
+      }
+    } // sift_up
+
+    void sift_down(index_t i) {
+      while (i < count) {
+	index_t li = lhs(i);
+	index_t ri = rhs(i);
+
+	if (li < count) {
+	  if (comparator(data[li], data[i])) {
+	    if (ri < count && comparator(data[ri], data[li])) {
+	      std::swap(data[i], data[ri]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[ri]) = ri;
+	      i = ri;
+	    } else {
+	      std::swap(data[i], data[li]);
+	      intru_data_of(data[i]) = i;
+	      intru_data_of(data[li]) = li;
+	      i = li;
+	    }
+	  } else if (ri < count && comparator(data[ri], data[i])) {
+	    std::swap(data[i], data[ri]);
+	    intru_data_of(data[i]) = i;
+	    intru_data_of(data[ri]) = ri;
+	    i = ri;
+	  } else {
+	    break;
+	  }
+	} else {
+	  break;
+	}
+      }
+    } // sift_down
+
+    void sift(index_t i) {
+      if (i == 0) {
+	// if we're at top, can only go down
+	sift_down(i);
+      } else {
+	index_t pi = parent(i);
+	if (comparator(data[i], data[pi])) {
+	  // if we can go up, we will
+	  sift_up(i);
+	} else {
+	  // otherwise we'll try to go down
+	  sift_down(i);
+	}
+      }
+    } // sift
+  }; // class IntruHeap
+} // namespace crimson
diff --git a/src/dmclock/support/src/profile.h b/src/dmclock/support/src/profile.h
new file mode 100644
index 000000000..8b357dbfc
--- /dev/null
+++ b/src/dmclock/support/src/profile.h
@@ -0,0 +1,121 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+
+#include <cmath>
+#include <chrono>
+
+
+namespace crimson {
+  template<typename T>
+  class ProfileBase {
+
+  protected:
+
+    using clock = std::chrono::steady_clock;
+
+    uint count = 0;
+    typename T::rep sum = 0;
+    typename T::rep sum_squares = 0;
+    typename T::rep low = 0;
+    typename T::rep high = 0;
+
+  public:
+
+    uint get_count() const { return count; }
+    typename T::rep get_sum() const { return sum; }
+    typename T::rep get_low() const { return low; }
+    typename T::rep get_high() const { return high; }
+    double get_mean() const {
+      if (0 == count) return nan("");
+      return sum / double(count); }
+    double get_std_dev() const {
+      if (0 == count) return nan("");
+      double variance =
+	(count * sum_squares - sum * sum) / double(count * count);
+      return sqrt(variance);
+    }
+  }; // class ProfileBase
+
+
+  // forward declaration for friend
+  template<typename T>
+  class ProfileCombiner;
+
+
+  template<typename T>
+  class ProfileTimer : public ProfileBase<T> {
+    friend ProfileCombiner<T>;
+
+    using super = ProfileBase<T>;
+
+    bool is_timing = false;
+    typename super::clock::time_point start_time;
+
+  public:
+
+    ProfileTimer() {
+    }
+
+    void start() {
+      assert(!is_timing);
+      start_time = super::clock::now();
+      is_timing = true;
+    }
+
+    void stop() {
+      assert(is_timing);
+      T duration = std::chrono::duration_cast<T>(super::clock::now() - start_time);
+      typename T::rep duration_count = duration.count();
+      this->sum += duration_count;
+      this->sum_squares += duration_count * duration_count;
+      if (0 == this->count) {
+	this->low = duration_count;
+	this->high = duration_count;
+      } else {
+	if (duration_count < this->low) this->low = duration_count;
+	else if (duration_count > this->high) this->high = duration_count;
+      }
+      ++this->count;
+      is_timing = false;
+    }
+  };  // class ProfileTimer
+
+
+  template<typename T>
+  class ProfileCombiner : public ProfileBase<T> {
+
+    using super = ProfileBase<T>;
+
+  public:
+
+    ProfileCombiner() {}
+
+    void combine(const ProfileTimer<T>& timer) {
+      if (0 == this->count) {
+	this->low = timer.low;
+	this->high = timer.high;
+      } else {
+	if (timer.low < this->low) this->low = timer.low;
+	else if (timer.high > this->high) this->high = timer.high;
+      }
+      this->count += timer.count;
+      this->sum += timer.sum;
+      this->sum_squares += timer.sum_squares;
+    }
+  }; // class ProfileCombiner
+} // namespace crimson
diff --git a/src/dmclock/support/src/run_every.cc b/src/dmclock/support/src/run_every.cc
new file mode 100644
index 000000000..14f1452b9
--- /dev/null
+++ b/src/dmclock/support/src/run_every.cc
@@ -0,0 +1,94 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include "run_every.h"
+
+
+// can define ADD_MOVE_SEMANTICS, although not fully debugged and tested
+
+
+namespace chrono = std::chrono;
+
+
+#ifdef ADD_MOVE_SEMANTICS
+crimson::RunEvery::RunEvery()
+{
+  // empty
+}
+
+
+crimson::RunEvery& crimson::RunEvery::operator=(crimson::RunEvery&& other)
+{
+  // finish run every thread
+  {
+    Guard g(mtx);
+    finishing = true;
+    cv.notify_one();
+  }
+  if (thd.joinable()) {
+    thd.join();
+  }
+
+  // transfer info over from previous thread
+  finishing.store(other.finishing);
+  wait_period = other.wait_period;
+  body = other.body;
+
+  // finish other thread
+  other.finishing.store(true);
+  other.cv.notify_one();
+
+  // start this thread
+  thd = std::thread(&RunEvery::run, this);
+
+  return *this;
+}
+#endif
+
+
+crimson::RunEvery::~RunEvery() {
+  join();
+}
+
+
+void crimson::RunEvery::join() {
+  {
+    Guard l(mtx);
+    if (finishing) return;
+    finishing = true;
+    cv.notify_all();
+  }
+  thd.join();
+}
+
+// mtx must be held by caller
+void crimson::RunEvery::try_update(milliseconds _wait_period) {
+  if (_wait_period != wait_period) {
+    wait_period = _wait_period;
+  }
+}
+
+void crimson::RunEvery::run() {
+  Lock l(mtx);
+  while(!finishing) {
+    TimePoint until = chrono::steady_clock::now() + wait_period;
+    while (!finishing && chrono::steady_clock::now() < until) {
+      cv.wait_until(l, until);
+    }
+    if (!finishing) {
+      body();
+    }
+  }
+}
diff --git a/src/dmclock/support/src/run_every.h b/src/dmclock/support/src/run_every.h
new file mode 100644
index 000000000..f93961db1
--- /dev/null
+++ b/src/dmclock/support/src/run_every.h
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#pragma once
+
+#include <chrono>
+#include <mutex>
+#include <condition_variable>
+#include <thread>
+#include <functional>
+
+
+namespace crimson {
+  using std::chrono::duration_cast;
+  using std::chrono::milliseconds;
+
+  // runs a given simple function object waiting wait_period
+  // milliseconds between; the destructor stops the other thread
+  // immediately
+  class RunEvery {
+    using Lock      = std::unique_lock<std::mutex>;
+    using Guard     = std::lock_guard<std::mutex>;
+    using TimePoint = std::chrono::steady_clock::time_point;
+
+    bool                      finishing = false;
+    std::chrono::milliseconds wait_period;
+    std::function<void()>     body;
+    std::mutex                mtx;
+    std::condition_variable   cv;
+
+    // put threads last so all other variables are initialized first
+
+    std::thread               thd;
+
+  public:
+
+#ifdef ADD_MOVE_SEMANTICS
+    RunEvery();
+#endif
+
+    template<typename D>
+    RunEvery(D                     _wait_period,
+	     const std::function<void()>& _body) :
+      wait_period(duration_cast<milliseconds>(_wait_period)),
+      body(_body)
+    {
+      thd = std::thread(&RunEvery::run, this);
+    }
+
+    RunEvery(const RunEvery& other) = delete;
+    RunEvery& operator=(const RunEvery& other) = delete;
+    RunEvery(RunEvery&& other) = delete;
+#ifdef ADD_MOVE_SEMANTICS
+    RunEvery& operator=(RunEvery&& other);
+#else
+    RunEvery& operator=(RunEvery&& other) = delete;
+#endif
+
+    ~RunEvery();
+
+    void join();
+    // update wait period in milliseconds
+    void try_update(milliseconds _wait_period);
+
+  protected:
+
+    void run();
+  };
+}
diff --git a/src/dmclock/support/test/CMakeLists.txt b/src/dmclock/support/test/CMakeLists.txt
new file mode 100644
index 000000000..24b352157
--- /dev/null
+++ b/src/dmclock/support/test/CMakeLists.txt
@@ -0,0 +1,26 @@
+include_directories(../src)
+
+set(local_flags "-Wall -pthread")
+
+# dmclock does not use intrusive heap (but it does use indirect
+# intrusive heap), so we won't use this code
+if(false)
+  set(srcs
+    test_intrusive_heap.cc)
+  add_executable(test_intru_heap test_intrusive_heap.cc)
+  set_source_files_properties(${srcs}
+    PROPERTIES
+    COMPILE_FLAGS "${local_flags}")
+endif(false)
+
+set(test_srcs test_indirect_intrusive_heap.cc)
+
+set_source_files_properties(${test_srcs}
+  PROPERTIES
+  COMPILE_FLAGS "${local_flags}"
+  )
+
+add_executable(dmclock-data-struct-tests ${test_srcs})
+
+target_link_libraries(dmclock-data-struct-tests
+  LINK_PRIVATE GTest::GTest GTest::Main Threads::Threads)
diff --git a/src/dmclock/support/test/test_ind_intru_heap.cc b/src/dmclock/support/test/test_ind_intru_heap.cc
new file mode 100644
index 000000000..8e7ee6931
--- /dev/null
+++ b/src/dmclock/support/test/test_ind_intru_heap.cc
@@ -0,0 +1,89 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <memory>
+#include <string>
+#include <iostream>
+
+#include "indirect_intrusive_heap.h"
+
+
+class TestCompare;
+
+
+class Test1 {
+    friend TestCompare;
+
+    int data;
+
+public:
+
+    crimson::IndIntruHeapData heap_data;
+
+    explicit Test1(int _data) : data(_data) {}
+
+    friend std::ostream& operator<<(std::ostream& out, const Test1& d) {
+        out << d.data << " (" << d.heap_data << ")";
+        return out;
+    }
+
+    int& the_data() { return data; }
+};
+
+
+struct TestCompare {
+    bool operator()(const Test1& d1, const Test1& d2) {
+        return d1.data < d2.data;
+    }
+};
+
+
+int main(int argc, char** argv) {
+    Test1 d1(2);
+    Test1 d2(3);
+    Test1 d3(1);
+    Test1 d4(-5);
+
+    crimson::IndIntruHeap<std::shared_ptr<Test1>, Test1, &Test1::heap_data, TestCompare> my_heap;
+
+    const std::shared_ptr<Test1> d99 = std::make_shared<Test1>(99);
+
+    my_heap.push(std::make_shared<Test1>(2));
+    my_heap.push(d99);
+    my_heap.push(std::make_shared<Test1>(1));
+    my_heap.push(std::make_shared<Test1>(-5));
+    my_heap.push(std::make_shared<Test1>(12));
+    my_heap.push(std::make_shared<Test1>(-12));
+    my_heap.push(std::make_shared<Test1>(-7));
+
+    std::cout << my_heap << std::endl;
+
+    auto& t = my_heap.top();
+    t.the_data() = 17;
+    my_heap.adjust_down(t);
+
+    std::cout << my_heap << std::endl;
+
+    my_heap.display_sorted(std::cout);
+
+    while (!my_heap.empty()) {
+        auto& top = my_heap.top();
+        std::cout << top << std::endl;
+        my_heap.pop();
+        std::cout << my_heap << std::endl;
+    }
+
+    return 0;
+}
diff --git a/src/dmclock/support/test/test_indirect_intrusive_heap.cc b/src/dmclock/support/test/test_indirect_intrusive_heap.cc
new file mode 100644
index 000000000..e74c3181e
--- /dev/null
+++ b/src/dmclock/support/test/test_indirect_intrusive_heap.cc
@@ -0,0 +1,1022 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <iostream>
+#include <memory>
+#include <set>
+#include <algorithm>
+#include <random>
+
+#include "gtest/gtest.h"
+
+#include "indirect_intrusive_heap.h"
+
+
+struct Elem {
+  int data;
+
+  crimson::IndIntruHeapData heap_data;
+  crimson::IndIntruHeapData heap_data_alt;
+
+  explicit Elem(int _data) : data(_data) { }
+
+  bool operator==(const Elem& other) const {
+    return data == other.data;
+  }
+
+  bool operator<(const Elem& other) const {
+    return data < other.data;
+  }
+
+  friend std::ostream& operator<<(std::ostream& out, const Elem& d) {
+    out << d.data;
+    return out;
+  }
+};
+
+
+// sorted low to high
+struct ElemCompare {
+  bool operator()(const Elem& d1, const Elem& d2) const {
+    return d1.data < d2.data;
+  }
+};
+
+
+// first all evens precede all odds, then they're sorted high to low
+struct ElemCompareAlt {
+  bool operator()(const Elem& d1, const Elem& d2) {
+    if (0 == d1.data % 2) {
+      if (0 == d2.data % 2) {
+	return d1.data > d2.data;
+      } else {
+	return true;
+      }
+    } else if (0 == d2.data % 2) {
+      return false;
+    } else {
+      return d1.data > d2.data;
+    }
+  }
+};
+
+
+class HeapFixture1: public ::testing::Test {
+
+public:
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  std::shared_ptr<Elem> data1, data2, data3, data4, data5, data6, data7;
+
+  void SetUp() {
+    data1 = std::make_shared<Elem>(2);
+    data2 = std::make_shared<Elem>(99);
+    data3 = std::make_shared<Elem>(1);
+    data4 = std::make_shared<Elem>(-5);
+    data5 = std::make_shared<Elem>(12);
+    data6 = std::make_shared<Elem>(-12);
+    data7 = std::make_shared<Elem>(-7);
+
+    heap.push(data1);
+    heap.push(data2);
+    heap.push(data3);
+    heap.push(data4);
+    heap.push(data5);
+    heap.push(data6);
+    heap.push(data7);
+  }
+
+  void TearDown() {
+    // nothing to do
+  }
+}; // class HeapFixture1
+
+TEST(IndIntruHeap, shared_ptr) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, unique_ptr) {
+  crimson::IndIntruHeap<std::unique_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::unique_ptr<Elem>(new Elem(2)));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::unique_ptr<Elem>(new Elem(99)));
+  heap.push(std::unique_ptr<Elem>(new Elem(1)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
+  heap.push(std::unique_ptr<Elem>(new Elem(12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, regular_ptr) {
+  crimson::IndIntruHeap<Elem*, Elem, &Elem::heap_data, ElemCompare> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(new Elem(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(new Elem(99));
+  heap.push(new Elem(1));
+  heap.push(new Elem(-5));
+  heap.push(new Elem(12));
+  heap.push(new Elem(-12));
+  heap.push(new Elem(-7));
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  {
+    auto i = &heap.top();
+    heap.pop();
+    delete i;
+  }
+
+  EXPECT_EQ(-7, heap.top().data);
+  {
+    auto i = &heap.top();
+    heap.pop();
+    delete i;
+  }
+
+  EXPECT_EQ(-5, heap.top().data);
+  {
+    auto i = &heap.top();
+    heap.pop();
+    delete i;
+  }
+
+  EXPECT_EQ(1, heap.top().data);
+  {
+    auto i = &heap.top();
+    heap.pop();
+    delete i;
+  }
+
+  EXPECT_EQ(2, heap.top().data);
+  {
+    auto i = &heap.top();
+    heap.pop();
+    delete i;
+  }
+
+  EXPECT_EQ(12, heap.top().data);
+  {
+    auto i = &heap.top();
+    heap.pop();
+    delete i;
+  }
+
+  EXPECT_EQ(99, heap.top().data);
+  {
+    auto i = &heap.top();
+    EXPECT_FALSE(heap.empty());
+    heap.pop();
+    delete i;
+  }
+
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, K_3) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			3> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, K_4) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			4> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, K_10) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			10> heap;
+
+  EXPECT_TRUE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(2));
+
+  EXPECT_FALSE(heap.empty());
+
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(std::make_shared<Elem>(1));
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // std::cout << heap << std::endl;
+
+  EXPECT_FALSE(heap.empty());
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_FALSE(heap.empty());
+  heap.pop();
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST(IndIntruHeap, multi_K) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			2> heap2;
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			3> heap3;
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			4> heap4;
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			10> heap10;
+
+  // 250 should give us at least 4 levels on all heaps
+  constexpr size_t count = 250;
+
+  std::srand(std::time(0)); // use current time as seed for random generator
+
+  // insert same set of random values into the four heaps
+  for (size_t i = 0; i < count; ++i) {
+    int value = std::rand() % 201 - 100; // -100...+100
+    auto data = std::make_shared<Elem>(value);
+    heap2.push(data);
+    heap3.push(data);
+    heap4.push(data);
+    heap10.push(data);
+  }
+
+  auto bound = std::numeric_limits<decltype(Elem::data)>::min();
+
+  for (size_t i = 0; i < count; ++i) {
+    auto current = heap2.top().data;
+
+    EXPECT_GE(current, bound) <<
+      "we should never go down, only increase or remain the same";
+    EXPECT_EQ(current, heap3.top().data) <<
+      "heap1's data and heap3's data should match";
+    EXPECT_EQ(current, heap4.top().data) <<
+      "heap1's data and heap4's data should match";
+    EXPECT_EQ(current, heap10.top().data) <<
+      "heap1's data and heap10's data should match";
+
+    heap2.pop();
+    heap3.pop();
+    heap4.pop();
+    heap10.pop();
+
+    bound = current;
+  }
+
+  EXPECT_TRUE(heap2.empty()) << "should be empty after all elements popped";
+  EXPECT_TRUE(heap3.empty()) << "should be empty after all elements popped";
+  EXPECT_TRUE(heap4.empty()) << "should be empty after all elements popped";
+  EXPECT_TRUE(heap10.empty()) << "should be empty after all elements popped";
+}
+
+
+TEST(IndIntruHeap, demote) {
+  crimson::IndIntruHeap<std::unique_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  heap.push(std::unique_ptr<Elem>(new Elem(2)));
+  heap.push(std::unique_ptr<Elem>(new Elem(99)));
+  heap.push(std::unique_ptr<Elem>(new Elem(1)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
+  heap.push(std::unique_ptr<Elem>(new Elem(12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
+
+  heap.top().data = 24;
+
+  heap.demote(heap.top());
+
+  EXPECT_EQ(-7, heap.top().data);
+
+  heap.pop();
+  heap.pop();
+  heap.pop();
+  heap.pop();
+  heap.pop();
+
+  EXPECT_EQ(24, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, demote_not) {
+  crimson::IndIntruHeap<std::unique_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  heap.push(std::unique_ptr<Elem>(new Elem(2)));
+  heap.push(std::unique_ptr<Elem>(new Elem(99)));
+  heap.push(std::unique_ptr<Elem>(new Elem(1)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-5)));
+  heap.push(std::unique_ptr<Elem>(new Elem(12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-12)));
+  heap.push(std::unique_ptr<Elem>(new Elem(-7)));
+
+  heap.top().data = -99;
+
+  heap.demote(heap.top());
+
+  EXPECT_EQ(-99, heap.top().data);
+
+  heap.pop();
+
+  EXPECT_EQ(-7, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, promote_and_demote) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  auto data1 = std::make_shared<Elem>(1);
+
+  heap.push(std::make_shared<Elem>(2));
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(data1);
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = -99;
+  heap.promote(*data1);
+
+  EXPECT_EQ(-99, heap.top().data);
+
+  data1->data = 999;
+  heap.demote(*data1);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = 9;
+  heap.promote(*data1);
+
+  heap.pop(); // remove -12
+  heap.pop(); // remove -7
+  heap.pop(); // remove -5
+  heap.pop(); // remove 2
+
+  EXPECT_EQ(9, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, adjust) {
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare> heap;
+
+  auto data1 = std::make_shared<Elem>(1);
+
+  heap.push(std::make_shared<Elem>(2));
+  heap.push(std::make_shared<Elem>(99));
+  heap.push(data1);
+  heap.push(std::make_shared<Elem>(-5));
+  heap.push(std::make_shared<Elem>(12));
+  heap.push(std::make_shared<Elem>(-12));
+  heap.push(std::make_shared<Elem>(-7));
+
+  // heap.display_sorted(std::cout);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = 999;
+  heap.adjust(*data1);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  data1->data = -99;
+  heap.adjust(*data1);
+
+  EXPECT_EQ(-99, heap.top().data);
+
+  data1->data = 9;
+  heap.adjust(*data1);
+
+  EXPECT_EQ(-12, heap.top().data);
+
+  heap.pop(); // remove -12
+  heap.pop(); // remove -7
+  heap.pop(); // remove -5
+  heap.pop(); // remove 2
+
+  EXPECT_EQ(9, heap.top().data);
+}
+
+
+TEST(IndIntruHeap, remove_careful) {
+  // here we test whether a common mistake in implementing remove is
+  // done; if after we remove an item and move the last element of the
+  // heap to the position of the removed element, we need to sift it
+  // rather than sift_down it.
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			2> heap;
+
+  heap.push(std::make_shared<Elem>(0));
+  heap.push(std::make_shared<Elem>(10));
+  heap.push(std::make_shared<Elem>(100));
+  heap.push(std::make_shared<Elem>(20));
+  heap.push(std::make_shared<Elem>(30));
+  heap.push(std::make_shared<Elem>(200));
+  heap.push(std::make_shared<Elem>(300));
+  heap.push(std::make_shared<Elem>(40));
+
+  auto k = heap.find(Elem(200));
+  EXPECT_NE(heap.end(), k) <<
+    "we should have found an element with the value 200, which we'll remove";
+  heap.remove(k);
+
+  auto i = heap.cbegin();
+  EXPECT_EQ(0, i->data);
+  ++i;
+  EXPECT_EQ(10, i->data);
+  ++i;
+  EXPECT_EQ(40, i->data) <<
+    "this needs to be 40 or there's a mistake in implementation";
+  ++i;
+  EXPECT_EQ(20, i->data);
+  ++i;
+  EXPECT_EQ(30, i->data);
+  ++i;
+  EXPECT_EQ(100, i->data) <<
+    "this needs to be 100 or there's a mistake in implementation";
+}
+
+
+TEST(IndIntruHeap, remove_greatest) {
+  // See bug #43376 -- removing the greatest element causes an oob
+  // vector reference
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,
+			Elem,
+			&Elem::heap_data,
+			ElemCompare,
+			2> heap;
+
+  const int num = 4096;
+  std::vector<int> toinsert;
+  toinsert.reserve(num);
+  std::vector<int> toremove;
+  toremove.reserve(num - (num/4));
+  std::vector<int> tocheck;
+  tocheck.reserve(num/4);
+  for (int i = 0; i < num; ++i) {
+    toinsert.push_back(i);
+    if (i < (num/2)) {
+      tocheck.push_back(i);
+    } else {
+      toremove.push_back(i);
+    }
+  }
+
+  std::default_random_engine generator(0);
+  std::shuffle(
+    toinsert.begin(),
+    toinsert.end(),
+    generator);
+
+  for (auto i: toinsert) {
+    heap.push(std::make_shared<Elem>(i));
+  }
+
+  for (auto i: toremove) {
+    auto k = heap.find(Elem(i));
+    EXPECT_NE(heap.end(), k) <<
+    "we should have found an element with the value 300, which we'll remove";
+    heap.remove(k);
+  }
+
+  for (auto i: tocheck) {
+    EXPECT_FALSE(heap.empty());
+    EXPECT_EQ(Elem(i), heap.top());
+    heap.pop();
+  }
+  EXPECT_TRUE(heap.empty());
+}
+
+
+TEST_F(HeapFixture1, shared_data) {
+
+  crimson::IndIntruHeap<std::shared_ptr<Elem>,Elem,&Elem::heap_data_alt,ElemCompareAlt> heap2;
+
+  heap2.push(data1);
+  heap2.push(data2);
+  heap2.push(data3);
+  heap2.push(data4);
+  heap2.push(data5);
+  heap2.push(data6);
+  heap2.push(data7);
+
+  data3->data = 32;
+  heap.adjust(*data3);
+  heap2.adjust(*data3);
+
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-7, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(32, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+
+  EXPECT_EQ(32, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(12, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(2, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(-12, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(99, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(-5, heap2.top().data);
+  heap2.pop();
+  EXPECT_EQ(-7, heap2.top().data);
+}
+
+
+TEST_F(HeapFixture1, iterator_basics) {
+  {
+    unsigned count = 0;
+    for(auto i = heap.begin(); i != heap.end(); ++i) {
+      ++count;
+    }
+
+    EXPECT_EQ(7u, count) << "count should be 7";
+  }
+
+  auto i1 = heap.begin();
+
+  EXPECT_EQ(-12, i1->data) <<
+    "first member with * operator must be smallest";
+
+  EXPECT_EQ(-12, (*i1).data) <<
+    "first member with -> operator must be smallest";
+
+  Elem& e1 = *i1;
+  EXPECT_EQ(-12, e1.data) <<
+    "first member with -> operator must be smallest";
+
+  {
+    std::set<int> values;
+    values.insert(2);
+    values.insert(99);
+    values.insert(1);
+    values.insert(-5);
+    values.insert(12);
+    values.insert(-12);
+    values.insert(-7);
+
+    for(auto i = heap.begin(); i != heap.end(); ++i) {
+      auto v = *i;
+      EXPECT_NE(values.end(), values.find(v.data)) <<
+	"value in heap must be part of original set";
+      values.erase(v.data);
+    }
+    EXPECT_EQ(0u, values.size()) << "all values must have been seen";
+  }
+}
+
+
+TEST_F(HeapFixture1, const_iterator_basics) {
+  const auto& cheap = heap;
+
+  {
+    unsigned count = 0;
+    for(auto i = cheap.cbegin(); i != cheap.cend(); ++i) {
+      ++count;
+    }
+
+    EXPECT_EQ(7u, count) << "count should be 7";
+  }
+
+  auto i1 = heap.cbegin();
+
+  EXPECT_EQ(-12, i1->data) <<
+    "first member with * operator must be smallest";
+
+  EXPECT_EQ(-12, (*i1).data) <<
+    "first member with -> operator must be smallest";
+
+  const Elem& e1 = *i1;
+  EXPECT_EQ(-12, e1.data) <<
+    "first member with -> operator must be smallest";
+
+  {
+    std::set<int> values;
+    values.insert(2);
+    values.insert(99);
+    values.insert(1);
+    values.insert(-5);
+    values.insert(12);
+    values.insert(-12);
+    values.insert(-7);
+
+    for(auto i = heap.cbegin(); i != heap.cend(); ++i) {
+      auto v = *i;
+      EXPECT_NE(values.end(), values.find(v.data)) <<
+	"value in heap must be part of original set";
+      values.erase(v.data);
+    }
+    EXPECT_EQ(0u, values.size()) << "all values must have been seen";
+  }
+}
+
+
+TEST_F(HeapFixture1, iterator_find_rfind) {
+  {
+    auto it1 = heap.find(data7);
+    EXPECT_NE(heap.end(), it1) <<
+      "find by indirection for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by indirection for included element should result in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = heap.find(fake_data);
+    EXPECT_EQ(heap.end(), it2) <<
+      "find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = heap.find(Elem(-7));
+    EXPECT_NE(heap.end(), it1) <<
+      "find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by value for included element should result in right value";
+
+    auto it2 = heap.find(Elem(7));
+    EXPECT_EQ(heap.end(), it2) <<
+      "find by value for not included element should fail";
+  }
+
+  {
+    auto it1 = heap.rfind(data7);
+    EXPECT_NE(heap.end(), it1) <<
+      "reverse find by indirecton for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by indirection for included element should result "
+      "in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = heap.rfind(fake_data);
+    EXPECT_EQ(heap.end(), it2) <<
+      "reverse find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = heap.rfind(Elem(-7));
+    EXPECT_NE(heap.end(), it1) <<
+      "reverse find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by value for included element should result "
+      "in right value";
+
+    auto it2 = heap.rfind(Elem(7));
+    EXPECT_EQ(heap.end(), it2) <<
+      "reverse find by value for not included element should fail";
+  }
+}
+
+
+TEST_F(HeapFixture1, const_iterator_find_rfind) {
+  const auto& c_heap = heap;
+
+  {
+    auto it1 = c_heap.find(data7);
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "find by indirection for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by indirection for included element should result in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = c_heap.find(fake_data);
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = c_heap.find(Elem(-7));
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "find by value for included element should result in right value";
+
+    auto it2 = c_heap.find(Elem(7));
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "find by value for not included element should fail";
+  }
+
+  {
+    auto it1 = c_heap.rfind(data7);
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "reverse find by indirecton for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by indirection for included element should result "
+      "in right value";
+
+    auto fake_data = std::make_shared<Elem>(-7);
+    auto it2 = c_heap.rfind(fake_data);
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "reverse find by indirection for not included element should fail";
+  }
+
+  {
+    auto it1 = c_heap.rfind(Elem(-7));
+    EXPECT_NE(c_heap.cend(), it1) <<
+      "reverse find by value for included element should succeed";
+    EXPECT_EQ(-7, it1->data) <<
+      "reverse find by value for included element should result "
+      "in right value";
+
+    auto it2 = c_heap.rfind(Elem(7));
+    EXPECT_EQ(c_heap.cend(), it2) <<
+      "reverse find by value for not included element should fail";
+  }
+}
+
+
+TEST_F(HeapFixture1, iterator_remove) {
+  auto it1 = heap.find(data7);
+  EXPECT_NE(heap.end(), it1) << "find for included element should succeed";
+
+  heap.remove(it1);
+
+  auto it2 = heap.find(data7);
+  EXPECT_EQ(heap.end(), it2) << "find for removed element should fail";
+
+  for (auto it3 = heap.begin(); it3 != heap.end(); ++it3) {
+    EXPECT_NE(-7, it3->data) <<
+      "iterating through heap should not find removed value";
+  }
+
+  // move through heap without -7
+  EXPECT_EQ(-12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(-5, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(1, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(2, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(12, heap.top().data);
+  heap.pop();
+  EXPECT_EQ(99, heap.top().data);
+  heap.pop();
+}
+
+
+TEST_F(HeapFixture1, four_tops) {
+  Elem& top1 = heap.top();
+  EXPECT_EQ(-12, top1.data);
+
+  const Elem& top2 = heap.top();
+  EXPECT_EQ(-12, top2.data);
+
+  std::shared_ptr<Elem> top3 = heap.top_ind();
+  EXPECT_EQ(-12, top3->data);
+
+  const std::shared_ptr<Elem> top4 = heap.top_ind();
+  EXPECT_EQ(-12, top4->data);
+
+  const auto& c_heap = heap;
+
+  const Elem& top5 = c_heap.top();
+  EXPECT_EQ(-12, top5.data);
+
+  const std::shared_ptr<Elem> top6 = c_heap.top_ind();
+  EXPECT_EQ(-12, top6->data);
+}
+
+
+TEST_F(HeapFixture1, display_sorted) {
+  std::stringstream ss;
+
+  heap.display_sorted(ss);
+
+  std::string s = ss.str();
+
+  EXPECT_GT(s.length(), 0u);
+
+  auto negseven = s.find("-7");
+  EXPECT_NE(negseven, std::string::npos);
+
+  auto ninetynine = s.find("99");
+  EXPECT_NE(ninetynine, std::string::npos);
+
+  // index of -7 should be less than index of 99
+  EXPECT_LT(negseven, ninetynine);
+
+#if 0
+  std::cout << s << std::endl;
+#endif
+}
diff --git a/src/dmclock/support/test/test_intrusive_heap.cc b/src/dmclock/support/test/test_intrusive_heap.cc
new file mode 100644
index 000000000..18d770f31
--- /dev/null
+++ b/src/dmclock/support/test/test_intrusive_heap.cc
@@ -0,0 +1,93 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <string>
+#include <iostream>
+
+#include "intrusive_heap.h"
+
+
+struct TestCompare;
+struct TestIntruData;
+
+
+class Test1 {
+    friend TestCompare;
+    friend TestIntruData;
+
+    int data;
+    crimson::IntruHeapData heap_data;
+
+public:
+    explicit Test1(int _data) : data(_data) {}
+
+    friend std::ostream& operator<<(std::ostream& out, const Test1& d) {
+        out << d.data << " (" << d.heap_data << ")";
+        return out;
+    }
+
+    int& the_data() { return data; }
+};
+
+
+struct TestCompare {
+    bool operator()(const Test1& d1, const Test1& d2) {
+        return d1.data < d2.data;
+    }
+};
+
+
+struct TestIntruData {
+    crimson::IntruHeapData& operator()(Test1& d) {
+        return d.heap_data;
+    }
+};
+
+
+int main(int argc, char** argv) {
+    Test1 d1(2);
+    Test1 d2(3);
+    Test1 d3(1);
+    Test1 d4(-5);
+
+    crimson::IntruHeap<Test1, TestIntruData, TestCompare> my_heap;
+
+    my_heap.push(d1);
+    my_heap.push(d2);
+    my_heap.push(d3);
+    my_heap.push(d4);
+    my_heap.push(Test1(-9));
+    my_heap.push(Test1(99));
+    my_heap.push(Test1(0));
+
+    std::cout << my_heap << std::endl;
+
+    auto& t = my_heap.top();
+    t.the_data() = 17;
+    my_heap.adjust_down(t);
+
+    std::cout << my_heap << std::endl;
+
+    my_heap.display_sorted(std::cout);
+
+    while (!my_heap.empty()) {
+        auto& top = my_heap.top();
+        std::cout << top << std::endl;
+        my_heap.pop();
+        std::cout << my_heap << std::endl;
+    }
+
+    return 0;
+}
diff --git a/src/dmclock/test/CMakeLists.txt b/src/dmclock/test/CMakeLists.txt
new file mode 100644
index 000000000..52ab6bfea
--- /dev/null
+++ b/src/dmclock/test/CMakeLists.txt
@@ -0,0 +1,34 @@
+include(CheckIncludeFileCXX)
+check_include_file_cxx("sys/prctl.h" HAVE_SYS_PRCTL_H)
+
+set(support_srcs ../sim/src/test_dmclock.cc)
+set(test_srcs
+  test_test_client.cc
+  test_dmclock_server.cc
+  test_dmclock_client.cc
+  )
+
+set_source_files_properties(${core_srcs} ${test_srcs}
+  PROPERTIES
+  COMPILE_FLAGS "${local_flags}"
+  )
+
+add_executable(dmclock-tests ${test_srcs} ${support_srcs})
+if(HAVE_SYS_PRCTL_H)
+  target_compile_definitions(dmclock-tests PRIVATE "HAVE_SYS_PRCTL_H")
+endif()
+target_include_directories(dmclock-tests PRIVATE
+  ../sim/src ${CMAKE_CURRENT_BINARY_DIR})
+target_include_directories(dmclock-tests PRIVATE SYSTEM
+  "${GTEST_INCLUDE_DIRS}")
+
+target_link_libraries(dmclock-tests LINK_PRIVATE
+  dmclock
+  Threads::Threads
+  GTest::GTest
+  GTest::Main)
+
+add_test(NAME dmclock-tests
+  COMMAND $<TARGET_FILE:dmclock-tests>)
+add_test(NAME dmclock-data-struct-tests
+  COMMAND $<TARGET_FILE:dmclock-data-struct-tests>)
diff --git a/src/dmclock/test/dmcPrCtl.h b/src/dmclock/test/dmcPrCtl.h
new file mode 100644
index 000000000..e0b2216f2
--- /dev/null
+++ b/src/dmclock/test/dmcPrCtl.h
@@ -0,0 +1,56 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 Red Hat Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+// essentially the same as ceph's PrCtl.h, copied into the dmclock library
+
+#ifdef HAVE_SYS_PRCTL_H
+#include <iostream>
+#include <sys/prctl.h>
+#include <errno.h>
+
+struct PrCtl {
+  int saved_state = -1;
+  int set_dumpable(int new_state) {
+    int r = prctl(PR_SET_DUMPABLE, new_state);
+    if (r) {
+      r = -errno;
+      std::cerr << "warning: unable to " << (new_state ? "set" : "unset")
+                << " dumpable flag: " << strerror(r)
+                << std::endl;
+    }
+    return r;
+  }
+  PrCtl(int new_state = 0) {
+    int r = prctl(PR_GET_DUMPABLE);
+    if (r == -1) {
+      r = errno;
+      std::cerr << "warning: unable to get dumpable flag: " << strerror(r)
+                << std::endl;
+    } else if (r != new_state) {
+      if (!set_dumpable(new_state)) {
+        saved_state = r;
+      }
+    }
+  }
+  ~PrCtl() {
+    if (saved_state < 0) {
+      return;
+    }
+    set_dumpable(saved_state);
+  }
+};
+#else
+struct PrCtl {};
+#endif
diff --git a/src/dmclock/test/test_dmclock_client.cc b/src/dmclock/test/test_dmclock_client.cc
new file mode 100644
index 000000000..58840457b
--- /dev/null
+++ b/src/dmclock/test/test_dmclock_client.cc
@@ -0,0 +1,307 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <chrono>
+#include <mutex>
+#include <functional>
+#include <iostream>
+
+
+#include "dmclock_client.h"
+#include "dmclock_util.h"
+#include "gtest/gtest.h"
+
+
+namespace dmc = crimson::dmclock;
+
+
+namespace crimson {
+  namespace dmclock {
+
+    /*
+     * Allows us to test the code provided with the mutex provided locked.
+     */
+    static void test_locked(std::mutex& mtx, std::function<void()> code) {
+      std::lock_guard<std::mutex> l(mtx);
+      code();
+    }
+
+
+    TEST(dmclock_client, server_erase) {
+      using ServerId = int;
+      // using ClientId = int;
+
+      ServerId server = 101;
+      // ClientId client = 3;
+
+      // dmc::PhaseType resp_params = dmc::PhaseType::reservation;
+
+      dmc::ServiceTracker<ServerId> st(std::chrono::seconds(2),
+                                       std::chrono::seconds(3));
+
+      auto lock_st = [&](std::function<void()> code) {
+	test_locked(st.data_mtx, code);
+      };
+
+      /* The timeline should be as follows:
+       *
+       *     0 seconds : request created
+       *
+       *     1 seconds : map is size 1
+       *
+       * 2 seconds : clean notes first mark; +2 is base for further calcs
+       *
+       * 4 seconds : clean does nothing except makes another mark
+       *
+       *   5 seconds : when we're secheduled to erase (+2 + 3)
+       *
+       *     5 seconds : since the clean job hasn't run yet, map still size 1
+       *
+       * 6 seconds : clean erases server
+       *
+       *     7 seconds : verified server is gone (map size 0)
+       */
+
+      lock_st([&] () {
+	  EXPECT_EQ(0u, st.server_map.size()) <<
+	    "server map initially has size 0";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+
+      // call for side effects
+      (void) st.get_req_params(server);
+
+      lock_st([&] () {
+	  EXPECT_EQ(1u, st.server_map.size()) <<
+	    "server map has size 1 after first request";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(4));
+
+      lock_st([&] () {
+	  EXPECT_EQ(1u, st.server_map.size()) <<
+	    "server map has size 1 just before erase";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(2));
+
+      lock_st([&] () {
+	  EXPECT_EQ(0u, st.server_map.size()) <<
+	    "server map has size 0 just after erase";
+	});
+    } // TEST
+
+
+    TEST(dmclock_client, delta_rho_values_borrowing_tracker) {
+      using ServerId = int;
+      // using ClientId = int;
+
+      ServerId server1 = 101;
+      ServerId server2 = 7;
+      // ClientId client = 3;
+
+      // RespParams<ServerId> resp_params(server, dmc::PhaseType::reservation);
+
+      dmc::ServiceTracker<ServerId,dmc::BorrowingTracker> st(std::chrono::seconds(2),
+							     std::chrono::seconds(3));
+      auto rp1 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp1.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp1.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      auto rp2 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp2.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp2.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      // RESPONSE
+      st.track_resp(server1, dmc::PhaseType::priority, 1u);
+
+      auto rp3 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp3.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp3.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      // RESPONSE
+      st.track_resp(server2, dmc::PhaseType::priority, 1u);
+
+      auto rp4 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp4.delta) <<
+	"delta should be 2 with one intervening priority response by " <<
+	"another server";
+      EXPECT_EQ(1u, rp4.rho) <<
+	"rho should be 1 with one intervening priority responses by " <<
+	"another server";
+
+      auto rp5 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp5.delta) <<
+	"delta should be 1 with no intervening responses by" <<
+	"other servers";
+      EXPECT_EQ(1u, rp5.rho) <<
+	"rho should be 1 with no intervening reservation responses by" <<
+	"other servers";
+
+      // RESPONSE
+      st.track_resp(server2, dmc::PhaseType::reservation, 1u);
+
+      auto rp6 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp6.delta) <<
+	"delta should be 2 with one intervening reservation response by " <<
+	"another server";
+      EXPECT_EQ(1u, rp6.rho) <<
+	"rho should be 2 with one intervening reservation responses by " <<
+	"another server";
+
+      st.track_resp(server2, dmc::PhaseType::reservation, 1u);
+      st.track_resp(server1, dmc::PhaseType::priority, 1u);
+      st.track_resp(server2, dmc::PhaseType::priority, 1u);
+      st.track_resp(server2, dmc::PhaseType::reservation, 1u);
+      st.track_resp(server1, dmc::PhaseType::reservation, 1u);
+      st.track_resp(server1, dmc::PhaseType::priority, 1u);
+      st.track_resp(server2, dmc::PhaseType::priority, 1u);
+
+      auto rp7 = st.get_req_params(server1);
+
+      EXPECT_EQ(5u, rp7.delta) <<
+	"delta should be 5 with four intervening responses by " <<
+	"another server";
+      EXPECT_EQ(1u, rp7.rho) <<
+	"rho should be 1 with two intervening reservation responses by " <<
+	"another server";
+
+      auto rp7b = st.get_req_params(server2);
+
+      EXPECT_EQ(9u, rp7b.delta) <<
+	"delta should be 9 with three intervening responses by " <<
+	"another server";
+      EXPECT_EQ(4u, rp7b.rho) <<
+	"rho should be 4 with one intervening reservation responses by " <<
+	"another server";
+
+      auto rp8 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp8.delta) <<
+	"delta should be 1 with no intervening responses by " <<
+	"another server";
+      EXPECT_EQ(1u, rp8.rho) <<
+	"rho should be 1 with no intervening reservation responses by " <<
+	"another server";
+
+      auto rp8b = st.get_req_params(server2);
+      EXPECT_EQ(1u, rp8b.delta) <<
+	"delta should be 1 with no intervening responses by " <<
+	"another server";
+      EXPECT_EQ(1u, rp8b.rho) <<
+	"rho should be 1 with no intervening reservation responses by " <<
+	"another server";
+    } // TEST
+
+
+    // NB: the BorrowingTracker has not been fully tested and the
+    // expected values below have not yet been compared with the
+    // theoretically correct values.
+    TEST(dmclock_client, delta_rho_values_orig_tracker) {
+      using ServerId = int;
+
+      ServerId server1 = 101;
+      ServerId server2 = 7;
+
+      dmc::ServiceTracker<ServerId,OrigTracker>
+	st(std::chrono::seconds(2), std::chrono::seconds(3));
+
+      auto rp1 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp1.delta);
+      EXPECT_EQ(1u, rp1.rho);
+
+      auto rp2 = st.get_req_params(server1);
+
+      EXPECT_EQ(0u, rp2.delta);
+      EXPECT_EQ(0u, rp2.rho);
+
+      st.track_resp(server1, dmc::PhaseType::priority, 1u);
+
+      auto rp3 = st.get_req_params(server1);
+
+      EXPECT_EQ(0u, rp3.delta);
+      EXPECT_EQ(0u, rp3.rho);
+
+      st.track_resp(server2, dmc::PhaseType::priority, 1u);
+
+      auto rp4 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp4.delta);
+      EXPECT_EQ(0u, rp4.rho);
+
+      auto rp5 = st.get_req_params(server1);
+
+      EXPECT_EQ(0u, rp5.delta);
+      EXPECT_EQ(0u, rp5.rho);
+
+      st.track_resp(server2, dmc::PhaseType::reservation, 1u);
+
+      auto rp6 = st.get_req_params(server1);
+
+      EXPECT_EQ(1u, rp6.delta);
+      EXPECT_EQ(1u, rp6.rho);
+
+      // auto rp6_b = st.get_req_params(server2);
+
+      st.track_resp(server2, dmc::PhaseType::reservation, 1u);
+      st.track_resp(server1, dmc::PhaseType::priority, 1u);
+      st.track_resp(server2, dmc::PhaseType::priority, 1u);
+      st.track_resp(server2, dmc::PhaseType::reservation, 1u);
+      st.track_resp(server1, dmc::PhaseType::reservation, 1u);
+      st.track_resp(server1, dmc::PhaseType::priority, 1u);
+      st.track_resp(server2, dmc::PhaseType::priority, 1u);
+
+      auto rp7 = st.get_req_params(server1);
+
+      EXPECT_EQ(4u, rp7.delta);
+      EXPECT_EQ(2u, rp7.rho);
+
+      auto rp7b = st.get_req_params(server2);
+
+      EXPECT_EQ(3u, rp7b.delta);
+      EXPECT_EQ(1u, rp7b.rho);
+
+      auto rp8 = st.get_req_params(server1);
+
+      EXPECT_EQ(0u, rp8.delta);
+      EXPECT_EQ(0u, rp8.rho);
+
+      auto rp8b = st.get_req_params(server2);
+      EXPECT_EQ(0u, rp8b.delta);
+      EXPECT_EQ(0u, rp8b.rho);
+    } // TEST
+
+  } // namespace dmclock
+} // namespace crimson
diff --git a/src/dmclock/test/test_dmclock_server.cc b/src/dmclock/test/test_dmclock_server.cc
new file mode 100644
index 000000000..f591e2e40
--- /dev/null
+++ b/src/dmclock/test/test_dmclock_server.cc
@@ -0,0 +1,1140 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <memory>
+#include <chrono>
+#include <iostream>
+#include <list>
+#include <vector>
+
+
+#include "dmclock_server.h"
+#include "dmclock_util.h"
+#include "gtest/gtest.h"
+
+// process control to prevent core dumps during gtest death tests
+#include "dmcPrCtl.h"
+
+
+namespace dmc = crimson::dmclock;
+
+
+// we need a request object; an empty one will do
+struct Request {
+};
+
+
+namespace crimson {
+  namespace dmclock {
+
+    /*
+     * Allows us to test the code provided with the mutex provided locked.
+     */
+    static void test_locked(std::mutex& mtx, std::function<void()> code) {
+      std::unique_lock<std::mutex> l(mtx);
+      code();
+    }
+
+
+    TEST(dmclock_server, bad_tag_deathtest) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request,true>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 18;
+
+      double reservation = 0.0;
+      double weight = 0.0;
+
+      dmc::ClientInfo ci1(reservation, weight, 0.0);
+      dmc::ClientInfo ci2(reservation, weight, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	if (client1 == c) return &ci1;
+	else if (client2 == c) return &ci2;
+	else {
+	  ADD_FAILURE() << "got request from neither of two clients";
+	  return nullptr;
+	}
+      };
+
+      QueueRef pq(new Queue(client_info_f, AtLimit::Wait));
+      ReqParams req_params(1,1);
+
+      // Disable coredumps
+      PrCtl unset_dumpable;
+
+      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(Request{}, client1, req_params),
+				"Assertion.*reservation.*max_tag.*"
+				"proportion.*max_tag") <<
+	"we should fail if a client tries to generate a reservation tag "
+	"where reservation and proportion are both 0";
+
+
+      EXPECT_DEATH_IF_SUPPORTED(pq->add_request(Request{}, client2, req_params),
+				"Assertion.*reservation.*max_tag.*"
+				"proportion.*max_tag") <<
+	"we should fail if a client tries to generate a reservation tag "
+	"where reservation and proportion are both 0";
+
+      EXPECT_DEATH_IF_SUPPORTED(Queue(client_info_f, AtLimit::Reject),
+				"Assertion.*Reject.*Delayed") <<
+	"we should fail if a client tries to construct a queue with both "
+        "DelayedTagCalc and AtLimit::Reject";
+    }
+
+
+    TEST(dmclock_server, client_idle_erase) {
+      using ClientId = int;
+      using Queue = dmc::PushPriorityQueue<ClientId,Request>;
+      ClientId client = 17;
+      double reservation = 100.0;
+
+      dmc::ClientInfo ci(reservation, 1.0, 0.0);
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &ci;
+      };
+      auto server_ready_f = [] () -> bool { return true; };
+      auto submit_req_f = [] (const ClientId& c,
+			      std::unique_ptr<Request> req,
+			      dmc::PhaseType phase,
+			      uint64_t req_cost) {
+	// empty; do nothing
+      };
+
+      Queue pq(client_info_f,
+	       server_ready_f,
+	       submit_req_f,
+	       std::chrono::seconds(3),
+	       std::chrono::seconds(5),
+	       std::chrono::seconds(2),
+	       AtLimit::Wait);
+
+      auto lock_pq = [&](std::function<void()> code) {
+	test_locked(pq.data_mtx, code);
+      };
+
+
+      /* The timeline should be as follows:
+       *
+       *     0 seconds : request created
+       *
+       *     1 seconds : map is size 1, idle is false
+       *
+       * 2 seconds : clean notes first mark; +2 is base for further calcs
+       *
+       * 4 seconds : clean does nothing except makes another mark
+       *
+       *   5 seconds : when we're secheduled to idle (+2 + 3)
+       *
+       * 6 seconds : clean idles client
+       *
+       *   7 seconds : when we're secheduled to erase (+2 + 5)
+       *
+       *     7 seconds : verified client is idle
+       *
+       * 8 seconds : clean erases client info
+       *
+       *     9 seconds : verified client is erased
+       */
+
+      lock_pq([&] () {
+	  EXPECT_EQ(0u, pq.client_map.size()) <<
+	    "client map initially has size 0";
+	});
+
+      Request req;
+      dmc::ReqParams req_params(1, 1);
+      EXPECT_EQ(0, pq.add_request_time(req, client, req_params, dmc::get_time()));
+
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+
+      lock_pq([&] () {
+	  EXPECT_EQ(1u, pq.client_map.size()) <<
+	    "client map has 1 after 1 client";
+	  EXPECT_FALSE(pq.client_map.at(client)->idle) <<
+	    "initially client map entry shows not idle.";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(6));
+
+      lock_pq([&] () {
+	  EXPECT_TRUE(pq.client_map.at(client)->idle) <<
+	    "after idle age client map entry shows idle.";
+	});
+
+      std::this_thread::sleep_for(std::chrono::seconds(2));
+
+      lock_pq([&] () {
+	  EXPECT_EQ(0u, pq.client_map.size()) <<
+	    "client map loses its entry after erase age";
+	});
+    } // TEST
+
+
+    TEST(dmclock_server, delayed_tag_calc) {
+      using ClientId = int;
+      constexpr ClientId client1 = 17;
+
+      using DelayedQueue = PullPriorityQueue<ClientId, Request, true>;
+      using ImmediateQueue = PullPriorityQueue<ClientId, Request, false>;
+
+      ClientInfo info(0.0, 1.0, 1.0);
+      auto client_info_f = [&] (ClientId c) -> const ClientInfo* {
+	return &info;
+      };
+
+      Time t{1};
+      {
+	DelayedQueue queue(client_info_f);
+
+	queue.add_request_time({}, client1, {0,0}, t);
+	queue.add_request_time({}, client1, {0,0}, t + 1);
+	queue.add_request_time({}, client1, {10,10}, t + 2);
+
+	auto pr1 = queue.pull_request(t);
+	ASSERT_TRUE(pr1.is_retn());
+	auto pr2 = queue.pull_request(t + 1);
+	// ReqParams{10,10} from request #3 pushes request #2 over limit by 10s
+	ASSERT_TRUE(pr2.is_future());
+	EXPECT_DOUBLE_EQ(t + 11, pr2.getTime());
+      }
+      {
+	ImmediateQueue queue(client_info_f);
+
+	queue.add_request_time({}, client1, {0,0}, t);
+	queue.add_request_time({}, client1, {0,0}, t + 1);
+	queue.add_request_time({}, client1, {10,10}, t + 2);
+
+	auto pr1 = queue.pull_request(t);
+	ASSERT_TRUE(pr1.is_retn());
+	auto pr2 = queue.pull_request(t + 1);
+	// ReqParams{10,10} from request #3 has no effect on request #2
+	ASSERT_TRUE(pr2.is_retn());
+	auto pr3 = queue.pull_request(t + 2);
+	ASSERT_TRUE(pr3.is_future());
+	EXPECT_DOUBLE_EQ(t + 12, pr3.getTime());
+      }
+    }
+
+#if 0
+    TEST(dmclock_server, reservation_timing) {
+      using ClientId = int;
+      // NB? PUSH OR PULL
+      using Queue = std::unique_ptr<dmc::PriorityQueue<ClientId,Request>>;
+      using std::chrono::steady_clock;
+
+      int client = 17;
+
+      std::vector<dmc::Time> times;
+      std::mutex times_mtx;
+      using Guard = std::lock_guard<decltype(times_mtx)>;
+
+      // reservation every second
+      dmc::ClientInfo ci(1.0, 0.0, 0.0);
+      Queue pq;
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &ci;
+      };
+      auto server_ready_f = [] () -> bool { return true; };
+      auto submit_req_f = [&] (const ClientId& c,
+			       std::unique_ptr<Request> req,
+			       dmc::PhaseType phase) {
+	{
+	  Guard g(times_mtx);
+	  times.emplace_back(dmc::get_time());
+	}
+	std::thread complete([&](){ pq->request_completed(); });
+	complete.detach();
+      };
+
+      // NB? PUSH OR PULL
+      pq = Queue(new dmc::PriorityQueue<ClientId,Request>(client_info_f,
+							  server_ready_f,
+							  submit_req_f,
+							  false));
+
+      Request req;
+      ReqParams<ClientId> req_params(client, 1, 1);
+
+      for (int i = 0; i < 5; ++i) {
+	pq->add_request_time(req, req_params, dmc::get_time());
+      }
+
+      {
+	Guard g(times_mtx);
+	std::this_thread::sleep_for(std::chrono::milliseconds(5500));
+	EXPECT_EQ(5, times.size()) <<
+	  "after 5.5 seconds, we should have 5 requests times at 1 second apart";
+      }
+    } // TEST
+#endif
+
+
+    TEST(dmclock_server, remove_by_req_filter) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+      using MyReqRef = typename Queue::RequestRef;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info1;
+      };
+
+      Queue pq(client_info_f, AtLimit::Allow);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      EXPECT_EQ(0, pq.add_request(MyReq(1), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(11), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(2), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(0), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(13), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(2), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(13), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(98), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(44), client1, req_params));
+
+      EXPECT_EQ(2u, pq.client_count());
+      EXPECT_EQ(9u, pq.request_count());
+
+      pq.remove_by_req_filter([](MyReqRef&& r) -> bool {return 1 == r->id % 2;});
+
+      EXPECT_EQ(5u, pq.request_count());
+
+      std::list<MyReq> capture;
+      pq.remove_by_req_filter(
+	[&capture] (MyReqRef&& r) -> bool {
+	  if (0 == r->id % 2) {
+	    capture.push_front(*r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	true);
+
+      EXPECT_EQ(0u, pq.request_count());
+      EXPECT_EQ(5u, capture.size());
+      int total = 0;
+      for (auto i : capture) {
+	total += i.id;
+      }
+      EXPECT_EQ(146, total) << " sum of captured items should be 146";
+    } // TEST
+
+
+    TEST(dmclock_server, remove_by_req_filter_ordering_forwards_visit) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+      using MyReqRef = typename Queue::RequestRef;
+
+      ClientId client1 = 17;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info1;
+      };
+
+      Queue pq(client_info_f, AtLimit::Allow);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      EXPECT_EQ(0, pq.add_request(MyReq(1), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(2), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(3), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(4), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(5), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(6), client1, req_params));
+
+      EXPECT_EQ(1u, pq.client_count());
+      EXPECT_EQ(6u, pq.request_count());
+
+      // remove odd ids in forward order and append to end
+
+      std::vector<MyReq> capture;
+      pq.remove_by_req_filter(
+	[&capture] (MyReqRef&& r) -> bool {
+	  if (1 == r->id % 2) {
+	    capture.push_back(*r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	false);
+
+      EXPECT_EQ(3u, pq.request_count());
+      EXPECT_EQ(3u, capture.size());
+      EXPECT_EQ(1, capture[0].id) << "items should come out in forward order";
+      EXPECT_EQ(3, capture[1].id) << "items should come out in forward order";
+      EXPECT_EQ(5, capture[2].id) << "items should come out in forward order";
+
+      // remove even ids in reverse order but insert at front so comes
+      // out forwards
+
+      std::vector<MyReq> capture2;
+      pq.remove_by_req_filter(
+	[&capture2] (MyReqRef&& r) -> bool {
+	  if (0 == r->id % 2) {
+	    capture2.insert(capture2.begin(), *r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	false);
+
+      EXPECT_EQ(0u, pq.request_count());
+      EXPECT_EQ(3u, capture2.size());
+      EXPECT_EQ(6, capture2[0].id) << "items should come out in reverse order";
+      EXPECT_EQ(4, capture2[1].id) << "items should come out in reverse order";
+      EXPECT_EQ(2, capture2[2].id) << "items should come out in reverse order";
+    } // TEST
+
+
+    TEST(dmclock_server, remove_by_req_filter_ordering_backwards_visit) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+      using MyReqRef = typename Queue::RequestRef;
+
+      ClientId client1 = 17;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info1;
+      };
+
+      Queue pq(client_info_f, AtLimit::Allow);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      EXPECT_EQ(0, pq.add_request(MyReq(1), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(2), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(3), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(4), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(5), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(6), client1, req_params));
+
+      EXPECT_EQ(1u, pq.client_count());
+      EXPECT_EQ(6u, pq.request_count());
+
+      // now remove odd ids in forward order
+
+      std::vector<MyReq> capture;
+      pq.remove_by_req_filter(
+	[&capture] (MyReqRef&& r) -> bool {
+	  if (1 == r->id % 2) {
+	    capture.insert(capture.begin(), *r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	true);
+
+      EXPECT_EQ(3u, pq.request_count());
+      EXPECT_EQ(3u, capture.size());
+      EXPECT_EQ(1, capture[0].id) << "items should come out in forward order";
+      EXPECT_EQ(3, capture[1].id) << "items should come out in forward order";
+      EXPECT_EQ(5, capture[2].id) << "items should come out in forward order";
+
+      // now remove even ids in reverse order
+
+      std::vector<MyReq> capture2;
+      pq.remove_by_req_filter(
+	[&capture2] (MyReqRef&& r) -> bool {
+	  if (0 == r->id % 2) {
+	    capture2.push_back(*r);
+	    return true;
+	  } else {
+	    return false;
+	  }
+	},
+	true);
+
+      EXPECT_EQ(0u, pq.request_count());
+      EXPECT_EQ(3u, capture2.size());
+      EXPECT_EQ(6, capture2[0].id) << "items should come out in reverse order";
+      EXPECT_EQ(4, capture2[1].id) << "items should come out in reverse order";
+      EXPECT_EQ(2, capture2[2].id) << "items should come out in reverse order";
+    } // TEST
+
+
+    TEST(dmclock_server, remove_by_client) {
+      struct MyReq {
+	int id;
+
+	MyReq(int _id) :
+	  id(_id)
+	{
+	  // empty
+	}
+      }; // MyReq
+
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,MyReq>;
+      using MyReqRef = typename Queue::RequestRef;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info1;
+      };
+
+      Queue pq(client_info_f, AtLimit::Allow);
+
+      EXPECT_EQ(0u, pq.client_count());
+      EXPECT_EQ(0u, pq.request_count());
+
+      ReqParams req_params(1,1);
+
+      EXPECT_EQ(0, pq.add_request(MyReq(1), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(11), client1, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(2), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(0), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(13), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(2), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(13), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(98), client2, req_params));
+      EXPECT_EQ(0, pq.add_request(MyReq(44), client1, req_params));
+
+      EXPECT_EQ(2u, pq.client_count());
+      EXPECT_EQ(9u, pq.request_count());
+
+      std::list<MyReq> removed;
+
+      pq.remove_by_client(client1,
+			  true,
+			  [&removed] (MyReqRef&& r) {
+			    removed.push_front(*r);
+			  });
+
+      EXPECT_EQ(3u, removed.size());
+      EXPECT_EQ(1, removed.front().id);
+      removed.pop_front();
+      EXPECT_EQ(11, removed.front().id);
+      removed.pop_front();
+      EXPECT_EQ(44, removed.front().id);
+      removed.pop_front();
+
+      EXPECT_EQ(6u, pq.request_count());
+
+      Queue::PullReq pr = pq.pull_request();
+      EXPECT_TRUE(pr.is_retn());
+      EXPECT_EQ(2, pr.get_retn().request->id);
+
+      pr = pq.pull_request();
+      EXPECT_TRUE(pr.is_retn());
+      EXPECT_EQ(0, pr.get_retn().request->id);
+
+      pq.remove_by_client(client2);
+      EXPECT_EQ(0u, pq.request_count()) <<
+	"after second client removed, none left";
+    } // TEST
+
+
+    TEST(dmclock_server_pull, pull_weight) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      dmc::ClientInfo info1(0.0, 1.0, 0.0);
+      dmc::ClientInfo info2(0.0, 2.0, 0.0);
+
+      QueueRef pq;
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	if (client1 == c) return &info1;
+	else if (client2 == c) return &info2;
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existent client";
+	  return nullptr;
+	}
+      };
+
+      pq = QueueRef(new Queue(client_info_f, AtLimit::Wait));
+
+      ReqParams req_params(1,1);
+
+      auto now = dmc::get_time();
+
+      for (int i = 0; i < 5; ++i) {
+	EXPECT_EQ(0, pq->add_request(Request{}, client1, req_params));
+	EXPECT_EQ(0, pq->add_request(Request{}, client2, req_params));
+	now += 0.0001;
+      }
+
+      int c1_count = 0;
+      int c2_count = 0;
+      for (int i = 0; i < 6; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::priority, retn.phase);
+      }
+
+      EXPECT_EQ(2, c1_count) <<
+	"one-third of request should have come from first client";
+      EXPECT_EQ(4, c2_count) <<
+	"two-thirds of request should have come from second client";
+    }
+
+
+    TEST(dmclock_server_pull, pull_reservation) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      ClientId client2 = 8;
+
+      dmc::ClientInfo info1(2.0, 0.0, 0.0);
+      dmc::ClientInfo info2(1.0, 0.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	if (client1 == c) return &info1;
+	else if (client2 == c) return &info2;
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existent client";
+	  return nullptr;
+	}
+      };
+
+      QueueRef pq(new Queue(client_info_f, AtLimit::Wait));
+
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto old_time = dmc::get_time() - 100.0;
+
+      for (int i = 0; i < 5; ++i) {
+	EXPECT_EQ(0, pq->add_request_time(Request{}, client1, req_params, old_time));
+	EXPECT_EQ(0, pq->add_request_time(Request{}, client2, req_params, old_time));
+	old_time += 0.001;
+      }
+
+      int c1_count = 0;
+      int c2_count = 0;
+
+      for (int i = 0; i < 6; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::reservation, retn.phase);
+      }
+
+      EXPECT_EQ(4, c1_count) <<
+	"two-thirds of request should have come from first client";
+      EXPECT_EQ(2, c2_count) <<
+	"one-third of request should have come from second client";
+    } // dmclock_server_pull.pull_reservation
+
+
+    TEST(dmclock_server_pull, update_client_info) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request,false>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      dmc::ClientInfo info1(0.0, 100.0, 0.0);
+      dmc::ClientInfo info2(0.0, 200.0, 0.0);
+
+      QueueRef pq;
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	if (client1 == c) return &info1;
+	else if (client2 == c) return &info2;
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existent client";
+	  return nullptr;
+	}
+      };
+
+      pq = QueueRef(new Queue(client_info_f, AtLimit::Wait));
+
+      ReqParams req_params(1,1);
+
+      auto now = dmc::get_time();
+
+      for (int i = 0; i < 5; ++i) {
+	EXPECT_EQ(0, pq->add_request(Request{}, client1, req_params));
+	EXPECT_EQ(0, pq->add_request(Request{}, client2, req_params));
+	now += 0.0001;
+      }
+
+      int c1_count = 0;
+      int c2_count = 0;
+      for (int i = 0; i < 10; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (i > 5) continue;
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::priority, retn.phase);
+      }
+
+      EXPECT_EQ(2, c1_count) <<
+	"before: one-third of request should have come from first client";
+      EXPECT_EQ(4, c2_count) <<
+	"before: two-thirds of request should have come from second client";
+
+      std::chrono::seconds dura(1);
+      std::this_thread::sleep_for(dura);
+
+      info1 = dmc::ClientInfo(0.0, 200.0, 0.0);
+      pq->update_client_info(17);
+
+      now = dmc::get_time();
+
+      for (int i = 0; i < 5; ++i) {
+	EXPECT_EQ(0, pq->add_request(Request{}, client1, req_params));
+	EXPECT_EQ(0, pq->add_request(Request{}, client2, req_params));
+	now += 0.0001;
+      }
+
+      c1_count = 0;
+      c2_count = 0;
+      for (int i = 0; i < 6; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::priority, retn.phase);
+      }
+
+      EXPECT_EQ(3, c1_count) <<
+	"after: one-third of request should have come from first client";
+      EXPECT_EQ(3, c2_count) <<
+	"after: two-thirds of request should have come from second client";
+    }
+
+
+    TEST(dmclock_server_pull, dynamic_cli_info_f) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request,true,true>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 17;
+      ClientId client2 = 98;
+
+      std::vector<dmc::ClientInfo> info1;
+      std::vector<dmc::ClientInfo> info2;
+
+      info1.push_back(dmc::ClientInfo(0.0, 100.0, 0.0));
+      info1.push_back(dmc::ClientInfo(0.0, 150.0, 0.0));
+
+      info2.push_back(dmc::ClientInfo(0.0, 200.0, 0.0));
+      info2.push_back(dmc::ClientInfo(0.0, 50.0, 0.0));
+
+      size_t cli_info_group = 0;
+
+      QueueRef pq;
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	if (client1 == c) return &info1[cli_info_group];
+	else if (client2 == c) return &info2[cli_info_group];
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existent client";
+	  return nullptr;
+	}
+      };
+
+      pq = QueueRef(new Queue(client_info_f, AtLimit::Wait));
+
+      ReqParams req_params(1,1);
+
+      auto now = dmc::get_time();
+
+      for (int i = 0; i < 5; ++i) {
+	EXPECT_EQ(0, pq->add_request(Request{}, client1, req_params));
+	EXPECT_EQ(0, pq->add_request(Request{}, client2, req_params));
+	now += 0.0001;
+      }
+
+      int c1_count = 0;
+      int c2_count = 0;
+      for (int i = 0; i < 10; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (i > 5) continue;
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::priority, retn.phase);
+      }
+
+      EXPECT_EQ(2, c1_count) <<
+	"before: one-third of request should have come from first client";
+      EXPECT_EQ(4, c2_count) <<
+	"before: two-thirds of request should have come from second client";
+
+      std::chrono::seconds dura(1);
+      std::this_thread::sleep_for(dura);
+
+      cli_info_group = 1;
+ 
+      now = dmc::get_time();
+
+      for (int i = 0; i < 6; ++i) {
+	EXPECT_EQ(0, pq->add_request(Request{}, client1, req_params));
+	EXPECT_EQ(0, pq->add_request(Request{}, client2, req_params));
+	now += 0.0001;
+      }
+
+      c1_count = 0;
+      c2_count = 0;
+      for (int i = 0; i < 8; ++i) {
+	Queue::PullReq pr = pq->pull_request();
+	EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+	auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+
+	if (client1 == retn.client) ++c1_count;
+	else if (client2 == retn.client) ++c2_count;
+	else ADD_FAILURE() << "got request from neither of two clients";
+
+	EXPECT_EQ(PhaseType::priority, retn.phase);
+      }
+
+      EXPECT_EQ(6, c1_count) <<
+	"after: one-third of request should have come from first client";
+      EXPECT_EQ(2, c2_count) <<
+	"after: two-thirds of request should have come from second client";
+    }
+
+
+    // This test shows what happens when a request can be ready (under
+    // limit) but not schedulable since proportion tag is 0. We expect
+    // to get some future and none responses.
+    TEST(dmclock_server_pull, ready_and_under_limit) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      ClientId client2 = 8;
+
+      dmc::ClientInfo info1(1.0, 0.0, 0.0);
+      dmc::ClientInfo info2(1.0, 0.0, 0.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	if (client1 == c) return &info1;
+	else if (client2 == c) return &info2;
+	else {
+	  ADD_FAILURE() << "client info looked up for non-existent client";
+	  return nullptr;
+	}
+      };
+
+      QueueRef pq(new Queue(client_info_f, AtLimit::Wait));
+
+      ReqParams req_params(0, 0);
+
+      // make sure all times are well before now
+      auto start_time = dmc::get_time() - 100.0;
+
+      // add six requests; for same client reservations spaced one apart
+      for (int i = 0; i < 3; ++i) {
+	EXPECT_EQ(0, pq->add_request_time(Request{}, client1, req_params, start_time));
+	EXPECT_EQ(0, pq->add_request_time(Request{}, client2, req_params, start_time));
+      }
+
+      Queue::PullReq pr = pq->pull_request(start_time + 0.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 0.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 0.5);
+      EXPECT_EQ(Queue::NextReqType::future, pr.type) <<
+	"too soon for next reservation";
+
+      pr = pq->pull_request(start_time + 1.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 1.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 1.5);
+      EXPECT_EQ(Queue::NextReqType::future, pr.type) <<
+	"too soon for next reservation";
+
+      pr = pq->pull_request(start_time + 2.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 2.5);
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      pr = pq->pull_request(start_time + 2.5);
+      EXPECT_EQ(Queue::NextReqType::none, pr.type) << "no more requests left";
+    }
+
+
+    TEST(dmclock_server_pull, pull_none) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      dmc::ClientInfo info(1.0, 1.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, AtLimit::Wait));
+
+      // Request req;
+      ReqParams req_params(1,1);
+
+      auto now = dmc::get_time();
+
+      Queue::PullReq pr = pq->pull_request(now + 100);
+
+      EXPECT_EQ(Queue::NextReqType::none, pr.type);
+    }
+
+
+    TEST(dmclock_server_pull, pull_future) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      // ClientId client2 = 8;
+
+      dmc::ClientInfo info(1.0, 0.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, AtLimit::Wait));
+
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto now = dmc::get_time();
+
+      EXPECT_EQ(0, pq->add_request_time(Request{}, client1, req_params, now + 100));
+      Queue::PullReq pr = pq->pull_request(now);
+
+      EXPECT_EQ(Queue::NextReqType::future, pr.type);
+
+      Time when = boost::get<Time>(pr.data);
+      EXPECT_EQ(now + 100, when);
+    }
+
+
+    TEST(dmclock_server_pull, pull_future_limit_break_weight) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      // ClientId client2 = 8;
+
+      dmc::ClientInfo info(0.0, 1.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, AtLimit::Allow));
+
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto now = dmc::get_time();
+
+      EXPECT_EQ(0, pq->add_request_time(Request{}, client1, req_params, now + 100));
+      Queue::PullReq pr = pq->pull_request(now);
+
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+      EXPECT_EQ(client1, retn.client);
+    }
+
+
+    TEST(dmclock_server_pull, pull_future_limit_break_reservation) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId,Request>;
+      using QueueRef = std::unique_ptr<Queue>;
+
+      ClientId client1 = 52;
+      // ClientId client2 = 8;
+
+      dmc::ClientInfo info(1.0, 0.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info;
+      };
+
+      QueueRef pq(new Queue(client_info_f, AtLimit::Allow));
+
+      ReqParams req_params(1,1);
+
+      // make sure all times are well before now
+      auto now = dmc::get_time();
+
+      EXPECT_EQ(0, pq->add_request_time(Request{}, client1, req_params, now + 100));
+      Queue::PullReq pr = pq->pull_request(now);
+
+      EXPECT_EQ(Queue::NextReqType::returning, pr.type);
+
+      auto& retn = boost::get<Queue::PullReq::Retn>(pr.data);
+      EXPECT_EQ(client1, retn.client);
+    }
+
+
+    TEST(dmclock_server_pull, pull_reject_at_limit) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId, Request, false>;
+      using MyReqRef = typename Queue::RequestRef;
+
+      ClientId client1 = 52;
+      ClientId client2 = 53;
+
+      dmc::ClientInfo info(0.0, 1.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info;
+      };
+
+      Queue pq(client_info_f, AtLimit::Reject);
+
+      {
+        // success at 1 request per second
+        EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{1}));
+        EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{2}));
+        EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{3}));
+        // request too soon
+        EXPECT_EQ(EAGAIN, pq.add_request_time({}, client1, {}, Time{3.9}));
+        // previous rejected request counts against limit
+        EXPECT_EQ(EAGAIN, pq.add_request_time({}, client1, {}, Time{4}));
+        EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{6}));
+      }
+      {
+        auto r1 = MyReqRef{new Request};
+        ASSERT_EQ(0, pq.add_request(std::move(r1), client2, {}, Time{1}));
+        EXPECT_EQ(nullptr, r1); // add_request takes r1 on success
+        auto r2 = MyReqRef{new Request};
+        ASSERT_EQ(EAGAIN, pq.add_request(std::move(r2), client2, {}, Time{1}));
+        EXPECT_NE(nullptr, r2); // add_request does not take r2 on failure
+      }
+    }
+
+
+    TEST(dmclock_server_pull, pull_reject_threshold) {
+      using ClientId = int;
+      using Queue = dmc::PullPriorityQueue<ClientId, Request, false>;
+
+      ClientId client1 = 52;
+
+      dmc::ClientInfo info(0.0, 1.0, 1.0);
+
+      auto client_info_f = [&] (ClientId c) -> const dmc::ClientInfo* {
+	return &info;
+      };
+
+      // allow up to 3 seconds worth of limit before rejecting
+      Queue pq(client_info_f, RejectThreshold{3.0});
+
+      EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{1})); // at limit=1
+      EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{1})); // 1 over
+      EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{1})); // 2 over
+      EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{1})); // 3 over
+      EXPECT_EQ(EAGAIN, pq.add_request_time({}, client1, {}, Time{1})); // reject
+      EXPECT_EQ(0, pq.add_request_time({}, client1, {}, Time{3})); // 3 over
+    }
+
+  } // namespace dmclock
+} // namespace crimson
diff --git a/src/dmclock/test/test_test_client.cc b/src/dmclock/test/test_test_client.cc
new file mode 100644
index 000000000..11cbd74b1
--- /dev/null
+++ b/src/dmclock/test/test_test_client.cc
@@ -0,0 +1,138 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+/*
+ * Copyright (C) 2016 Red Hat Inc.
+ *
+ * Author: J. Eric Ivancich <ivancich@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version
+ * 2.1, as published by the Free Software Foundation.  See file
+ * COPYING.
+ */
+
+
+#include <atomic>
+#include <thread>
+#include <chrono>
+#include <iostream>
+
+#include "gtest/gtest.h"
+
+#include "sim_recs.h"
+#include "sim_client.h"
+
+#include "test_dmclock.h"
+
+
+using namespace std::placeholders;
+
+namespace dmc = crimson::dmclock;
+namespace test = crimson::test_dmc;
+namespace sim = crimson::qos_simulation;
+
+using TimePoint = std::chrono::time_point<std::chrono::system_clock>;
+
+static TimePoint now() { return std::chrono::system_clock::now(); }
+
+
+TEST(test_client, full_bore_timing) {
+  std::atomic_ulong count(0);
+
+  ServerId server_id = 3;
+
+  sim::TestResponse resp(0);
+  dmc::PhaseType resp_params = dmc::PhaseType::priority;
+  test::DmcClient* client;
+  const sim::Cost request_cost = 1u;
+
+  auto start = now();
+  client =
+    new test::DmcClient(ClientId(0),
+			[&] (const ServerId& server,
+			     const sim::TestRequest& req,
+			     const ClientId& client_id,
+			     const dmc::ReqParams& req_params) {
+			  ++count;
+			  client->receive_response(resp, client_id, resp_params, request_cost);
+			},
+			[&] (const uint64_t seed) -> ServerId& {
+			  return server_id;
+			},
+			test::dmc_client_accumulate_f,
+			1000, // ops to run
+			100, // iops goal
+			5); // outstanding ops allowed
+  client->wait_until_done();
+  auto end = now();
+  EXPECT_EQ(1000u, count) << "didn't get right number of ops";
+
+  int milliseconds = (end - start) / std::chrono::milliseconds(1);
+  EXPECT_LT(10000, milliseconds) << "timing too fast to be correct";
+  EXPECT_GT(12000, milliseconds) << "timing suspiciously slow";
+
+  delete client;
+}
+
+
+TEST(test_client, paused_timing) {
+  std::atomic_ulong count(0);
+  std::atomic_ulong unresponded_count(0);
+  std::atomic_bool auto_respond(false);
+
+  ClientId my_client_id = 0;
+  ServerId server_id = 3;
+
+  sim::TestResponse resp(0);
+  dmc::PhaseType resp_params = dmc::PhaseType::priority;
+  const uint64_t request_cost = 1u;
+  test::DmcClient* client;
+
+  auto start = now();
+  client =
+    new test::DmcClient(my_client_id,
+			[&] (const ServerId& server,
+			     const sim::TestRequest& req,
+			     const ClientId& client_id,
+			     const dmc::ReqParams& req_params) {
+			  ++count;
+			  if (auto_respond.load()) {
+			    client->receive_response(resp, client_id, resp_params, request_cost);
+			  } else {
+			    ++unresponded_count;
+			  }
+			},
+			[&] (const uint64_t seed) -> ServerId& {
+			  return server_id;
+			},
+			test::dmc_client_accumulate_f,
+
+			1000, // ops to run
+			100, // iops goal
+			50); // outstanding ops allowed
+  std::thread t([&]() {
+      std::this_thread::sleep_for(std::chrono::seconds(5));
+      EXPECT_EQ(50u, unresponded_count.load()) <<
+	"should have 50 unresponded calls";
+      auto_respond = true;
+      // respond to those 50 calls
+      for(int i = 0; i < 50; ++i) {
+	client->receive_response(resp, my_client_id, resp_params, 1);
+	--unresponded_count;
+      }
+    });
+
+  client->wait_until_done();
+  auto end = now();
+  int milliseconds = (end - start) / std::chrono::milliseconds(1);
+
+  // the 50 outstanding ops allowed means the first half-second of
+  // requests get responded to during the 5 second pause. So we have
+  // to adjust our expectations by a half-second.
+  EXPECT_LT(15000 - 500, milliseconds) << "timing too fast to be correct";
+  EXPECT_GT(17000 - 500, milliseconds) << "timing suspiciously slow";
+  t.join();
+
+  delete client;
+}