diff options
99 files changed, 6762 insertions, 914 deletions
diff --git a/.cirrus.yml b/.cirrus.yml new file mode 100644 index 0000000..3960a06 --- /dev/null +++ b/.cirrus.yml @@ -0,0 +1,14 @@ +env: + CIRRUS_CLONE_DEPTH: 1 + ARCH: amd64 + +task: + freebsd_instance: + matrix: + image: freebsd-12-0-release-amd64 + image: freebsd-11-2-release-amd64 + script: + - cc --version + - export CFLAGS="-DITERATE=400 -DPAIRS_S=100 -DITERATIONS=24" + - ./tools/ci-build.sh --cores=$(sysctl -n hw.ncpu) + - make check diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..c0001db --- /dev/null +++ b/.drone.yml @@ -0,0 +1,29 @@ +kind: pipeline +name: gcc/amd64/linux + +platform: + arch: amd64 + +steps: +- name: build + image: gcc + pull: true + commands: + - ./tools/ci-build.sh --cores=4 + - make check + +--- +kind: pipeline +name: gcc/arm64/linux + +platform: + arch: arm64 + +steps: +- name: build + image: gcc + pull: true + commands: + - ./tools/ci-build.sh --cores=4 + - make check + diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..bdb6193 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,71 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +name: "CodeQL" + +on: + push: + branches: [master] + pull_request: + # The branches below must be a subset of the branches above + branches: [master] + schedule: + - cron: '0 11 * * 5' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + # Override automatic language detection by changing the below list + # Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python'] + language: ['cpp'] + # Learn more... + # https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + # We must fetch at least the immediate parents so that if this is + # a pull request then we can checkout the head. + fetch-depth: 2 + + # If this run was triggered by a pull request event, then checkout + # the head of the pull request instead of the merge commit. + - run: git checkout HEAD^2 + if: ${{ github.event_name == 'pull_request' }} + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 + + # âšī¸ Command-line programs to run using the OS shell. + # đ https://git.io/JvXDl + + # âī¸ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 @@ -1,27 +1,25 @@ -/Makefile +*.a build/ck.build build/ck.pc -build/regressions.build build/ck.spec -include/ck_md.h -src/Makefile -doc/Makefile -doc/*.3 build/Makefile +build/regressions.build +doc/*.3 +doc/Makefile .DS_Store -LOG -*.log -*.html -*.gz -*.o -*.a -*.so *.dSYM -.*.sw[op] GPATH GRTAGS GTAGS +*.gz +*.html ID +include/ck_md.h +include/freebsd/ck_md.h +*.log +LOG +/Makefile +*.o regressions/ck_array/validate/serial regressions/ck_backoff/validate/validate regressions/ck_bag/validate/order @@ -37,17 +35,28 @@ regressions/ck_brlock/benchmark/throughput regressions/ck_brlock/validate/validate regressions/ck_bytelock/benchmark/latency regressions/ck_bytelock/validate/validate +regressions/ck_cc/validate/ck_cc +regressions/ck_cc/validate/ck_cc_nobuiltin regressions/ck_cohort/benchmark/ck_cohort.LATENCY regressions/ck_cohort/benchmark/ck_cohort.THROUGHPUT regressions/ck_cohort/validate/validate +regressions/ck_ec/benchmark/ck_ec +regressions/ck_ec/validate/ck_ec_smoke_test +regressions/ck_ec/validate/prop_test_slow_wakeup +regressions/ck_ec/validate/prop_test_timeutil_add +regressions/ck_ec/validate/prop_test_timeutil_add_ns +regressions/ck_ec/validate/prop_test_timeutil_cmp +regressions/ck_ec/validate/prop_test_timeutil_scale +regressions/ck_ec/validate/prop_test_value +regressions/ck_ec/validate/prop_test_wakeup regressions/ck_epoch/validate/ck_epoch_call regressions/ck_epoch/validate/ck_epoch_poll regressions/ck_epoch/validate/ck_epoch_section regressions/ck_epoch/validate/ck_epoch_section_2 -regressions/ck_epoch/validate/torture regressions/ck_epoch/validate/ck_epoch_synchronize regressions/ck_epoch/validate/ck_stack regressions/ck_epoch/validate/ck_stack_read +regressions/ck_epoch/validate/torture regressions/ck_fifo/benchmark/latency regressions/ck_fifo/validate/ck_fifo_mpmc regressions/ck_fifo/validate/ck_fifo_mpmc_iterator @@ -75,11 +84,15 @@ regressions/ck_ht/validate/serial.delete regressions/ck_pflock/benchmark/latency regressions/ck_pflock/benchmark/throughput regressions/ck_pflock/validate/validate +regressions/ck_pr/benchmark/ck_pr_add_64 regressions/ck_pr/benchmark/ck_pr_cas_64 regressions/ck_pr/benchmark/ck_pr_cas_64_2 +regressions/ck_pr/benchmark/ck_pr_faa_64 regressions/ck_pr/benchmark/ck_pr_fas_64 +regressions/ck_pr/benchmark/ck_pr_neg_64 regressions/ck_pr/benchmark/fp regressions/ck_pr/validate/ck_pr_add +regressions/ck_pr/validate/ck_pr_add regressions/ck_pr/validate/ck_pr_and regressions/ck_pr/validate/ck_pr_bin regressions/ck_pr/validate/ck_pr_btc @@ -88,10 +101,13 @@ regressions/ck_pr/validate/ck_pr_bts regressions/ck_pr/validate/ck_pr_btx regressions/ck_pr/validate/ck_pr_cas regressions/ck_pr/validate/ck_pr_dec +regressions/ck_pr/validate/ck_pr_dec_zero regressions/ck_pr/validate/ck_pr_faa regressions/ck_pr/validate/ck_pr_fas regressions/ck_pr/validate/ck_pr_fax +regressions/ck_pr/validate/ck_pr_fence regressions/ck_pr/validate/ck_pr_inc +regressions/ck_pr/validate/ck_pr_inc_zero regressions/ck_pr/validate/ck_pr_load regressions/ck_pr/validate/ck_pr_n regressions/ck_pr/validate/ck_pr_or @@ -106,12 +122,12 @@ regressions/ck_rhs/benchmark/parallel_bytestring regressions/ck_rhs/benchmark/serial regressions/ck_rhs/validate/serial regressions/ck_ring/benchmark/latency +regressions/ck_ring/validate/ck_ring_mpmc +regressions/ck_ring/validate/ck_ring_mpmc_template regressions/ck_ring/validate/ck_ring_spmc regressions/ck_ring/validate/ck_ring_spmc_template regressions/ck_ring/validate/ck_ring_spsc regressions/ck_ring/validate/ck_ring_spsc_template -regressions/ck_ring/validate/ck_ring_mpmc -regressions/ck_ring/validate/ck_ring_mpmc_template regressions/ck_rwcohort/benchmark/ck_neutral.LATENCY regressions/ck_rwcohort/benchmark/ck_neutral.THROUGHPUT regressions/ck_rwcohort/benchmark/ck_rp.LATENCY @@ -143,9 +159,9 @@ regressions/ck_spinlock/benchmark/ck_mcs.THROUGHPUT regressions/ck_spinlock/benchmark/ck_spinlock.LATENCY regressions/ck_spinlock/benchmark/ck_spinlock.THROUGHPUT regressions/ck_spinlock/benchmark/ck_ticket.LATENCY -regressions/ck_spinlock/benchmark/ck_ticket.THROUGHPUT regressions/ck_spinlock/benchmark/ck_ticket_pb.LATENCY regressions/ck_spinlock/benchmark/ck_ticket_pb.THROUGHPUT +regressions/ck_spinlock/benchmark/ck_ticket.THROUGHPUT regressions/ck_spinlock/benchmark/linux_spinlock.LATENCY regressions/ck_spinlock/benchmark/linux_spinlock.THROUGHPUT regressions/ck_spinlock/validate/ck_anderson @@ -185,3 +201,6 @@ regressions/ck_swlock/validate/validate regressions/ck_tflock/benchmark/latency regressions/ck_tflock/benchmark/throughput regressions/ck_tflock/validate/validate +*.so +src/Makefile +.*.sw[op] diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..32d4c29 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,39 @@ +# sudo required as tests set cpu affinity +sudo: false + +os: + - linux + - linux-ppc64le + - osx + +language: + - c + +compiler: + - gcc + - clang + +matrix: + exclude: + - os: osx + compiler: gcc +addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - gcc-8 + - clang-6.0 + +script: + - > + if [[ $TRAVIS_OS_NAME == linux ]]; then + case "$CC" in + gcc) export CC=gcc-8 ;; + clang) export CC=clang-6.0 ;; + esac + fi + - ${CC} --version + - export CFLAGS="-DITERATE=400 -DPAIRS_S=100 -DITERATIONS=24" + - ./tools/ci-build.sh --cores=4 + - make check @@ -0,0 +1 @@ +concurrencykit.org
\ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..70a5568 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at sbahra@repnop.org. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/Makefile.in b/Makefile.in index 7e73f70..6f17a12 100644 --- a/Makefile.in +++ b/Makefile.in @@ -36,12 +36,12 @@ install-headers: mkdir -p $(DESTDIR)/$(HEADERS) || exit cp $(SRC_DIR)/include/*.h $(DESTDIR)/$(HEADERS) || exit chmod 644 $(DESTDIR)/$(HEADERS)/ck_*.h || exit - mkdir -p $(DESTDIR)$(HEADERS)/gcc || exit + mkdir -p $(DESTDIR)/$(HEADERS)/gcc || exit cp -r $(SRC_DIR)/include/gcc/* $(DESTDIR)/$(HEADERS)/gcc || exit cp include/ck_md.h $(DESTDIR)/$(HEADERS)/ck_md.h || exit chmod 755 $(DESTDIR)/$(HEADERS)/gcc chmod 644 $(DESTDIR)/$(HEADERS)/gcc/ck_*.h $(DESTDIR)/$(HEADERS)/gcc/*/ck_*.h || exit - mkdir -p $(DESTDIR)$(HEADERS)/spinlock || exit + mkdir -p $(DESTDIR)/$(HEADERS)/spinlock || exit cp -r $(SRC_DIR)/include/spinlock/* $(DESTDIR)/$(HEADERS)/spinlock || exit chmod 755 $(DESTDIR)/$(HEADERS)/spinlock chmod 644 $(DESTDIR)/$(HEADERS)/spinlock/*.h || exit @@ -1,21 +0,0 @@ - ____ _ ___ _ - / ___|___ _ __ ___ _ _ _ __ _ __ ___ _ __ ___ _ _ | |/ (_) |_ -| | / _ \| '_ \ / __| | | | '__| '__/ _ \ '_ \ / __| | | | | ' /| | __| -| |__| (_) | | | | (__| |_| | | | | | __/ | | | (__| |_| | | . \| | |_ - \____\___/|_| |_|\___|\__,_|_| |_| \___|_| |_|\___|\__, | |_|\_\_|\__| - |___/ - -Step 1. - ./configure - For additional options try ./configure --help - -Step 2. - In order to compile regressions (requires POSIX threads) use - "make regressions". In order to compile libck use "make all" or "make". - -Step 3. - In order to install use "make install" - To uninstall use "make uninstall". - -See http://concurrencykit.org/ for more information. - diff --git a/README.md b/README.md new file mode 100644 index 0000000..dbfc0e0 --- /dev/null +++ b/README.md @@ -0,0 +1,204 @@ +### Continuous Integration + +| Drone | Travis | Cirrus | +| -------- | ------ | ------- | +| [![Build Status](https://cloud.drone.io/api/badges/concurrencykit/ck/status.svg)](https://cloud.drone.io/concurrencykit/ck) | [![Build Status](https://travis-ci.org/concurrencykit/ck.svg)](https://travis-ci.org/concurrencykit/ck) | [![Build Status](https://api.cirrus-ci.com/github/concurrencykit/ck.svg?branch=master)](https://cirrus-ci.com/github/concurrencykit/ck) | + +Compilers tested in the past include gcc, clang, cygwin, icc, mingw32, mingw64 and suncc across all supported architectures. All new architectures are required to pass the integration test and under-go extensive code review. + +Continuous integration is currently enabled for the following targets: + * `darwin/clang/x86-64` + * `freebsd/clang/x86-64` + * `linux/gcc/arm64` + * `linux/gcc/x86-64` + * `linux/clang/x86-64` + * `linux/clang/ppc64le` + +### Compile and Build + +* Step 1. + `./configure` + For additional options try `./configure --help` + +* Step 2. + In order to compile regressions (requires POSIX threads) use + `make regressions`. In order to compile libck use `make all` or `make`. + +* Step 3. + In order to install use `make install` + To uninstall use `make uninstall`. + +See http://concurrencykit.org/ for more information. + +### Supported Architectures + +Concurrency Kit supports any architecture using compiler built-ins as a fallback. There is usually a performance degradation associated with this. + +Concurrency Kit has specialized assembly for the following architectures: + * `aarch64` + * `arm` + * `ppc` + * `ppc64` + * `s390x` + * `sparcv9+` + * `x86` + * `x86_64` + +### Features + +#### Concurrency Primitives + +##### ck_pr + +Concurrency primitives as made available by the underlying architecture, includes support for all atomic operations (natively), transactional memory, pipeline control, read-for-ownership and more. + +##### ck_backoff + +A simple and efficient (minimal noise) backoff function. + +##### ck_cc + +Abstracted compiler builtins when writing efficient concurrent data structures. + +#### Safe Memory Reclamation + +##### ck_epoch + +A scalable safe memory reclamation mechanism with support idle threads and various optimizations that make it better than or competitive with many state-of-the-art solutions. + +##### ck_hp + +Implements support for hazard pointers, a simple and efficient lock-free safe memory reclamation mechanism. + +#### Data Structures + +##### ck_array + +A simple concurrently-readable pointer array structure. + +##### ck_bitmap + +An efficient multi-reader and multi-writer concurrent bitmap structure. + +##### ck_ring + +Efficient concurrent bounded FIFO data structures with various performance trade-off. This includes specialization for single-reader, many-reader, single-writer and many-writer. + +##### ck_fifo + +A reference implementation of the first published lock-free FIFO algorithm, with specialization for single-enqueuer-single-dequeuer and many-enqueuer-single-dequeuer and extensions to allow for node re-use. + +##### ck_hp_fifo + +A reference implementation of the above algorithm, implemented with safe memory reclamation using hazard pointers. + +##### ck_hp_stack + +A reference implementation of a Treiber stack with support for hazard pointers. + +##### ck_stack + +A reference implementation of an efficient lock-free stack, with specialized variants for a variety of memory management strategies and bounded concurrency. + +##### ck_queue + +A concurrently readable friendly derivative of the BSD-queue interface. Coupled with a safe memory reclamation mechanism, implement scalable read-side queues with a simple search and replace. + +##### ck_hs + +An extremely efficient single-writer-many-reader hash set, that satisfies lock-freedom with bounded concurrency without any usage of atomic operations and allows for recycling of unused or deleted slots. This data structure is recommended for use as a general hash-set if it is possible to compute values from keys. Learn more at https://engineering.backtrace.io/workload-specialization/ and http://concurrencykit.org/articles/ck_hs.html. + +##### ck_ht + +A specialization of the `ck_hs` algorithm allowing for disjunct key-value pairs. + +##### ck_rhs + +A variant of `ck_hs` that utilizes robin-hood hashing to allow for improved performance with higher load factors and high deletion rates. + +#### Synchronization Primitives + +##### ck_ec + +An extremely efficient event counter implementation, a better alternative to condition variables. + +##### ck_barrier + +A plethora of execution barriers including: centralized barriers, combining barriers, dissemination barriers, MCS barriers, tournament barriers. + +##### ck_brlock + +A simple big-reader lock implementation, write-biased reader-writer lock with scalable read-side locking. + +##### ck_bytelock + +An implementation of bytelocks, for research purposes, allowing for (in theory), fast read-side acquisition without the use of atomic operations. In reality, memory barriers are required on the fast path. + +##### ck_cohort + +A generic lock cohorting interface, allows you to turn any lock into a NUMA-friendly scalable NUMA lock. There is a significant trade-off in fast path acquisition cost. Specialization is included for all relevant lock implementations in Concurrency Kit. Learn more by reading "Lock Cohorting: A General Technique for Designing NUMA Locks". + +##### ck_elide + +A generic lock elision framework, allows you to turn any lock implementation into an elision-aware implementation. This requires support for restricted transactional memory by the underlying hardware. + +##### ck_pflock + +Phase-fair reader-writer mutex that provides strong fairness guarantees between readers and writers. Learn more by reading "Spin-Based Reader-Writer Synchronization for Multiprocessor Real-Time Systems". + +##### ck_rwcohort + +A generic read-write lock cohorting interface, allows you to turn any read-write lock into a NUMA-friendly scalable NUMA lock. There is a significant trade-off in fast path acquisition cost. Specialization is included for all relevant lock implementations in Concurrency Kit. Learn more by reading "Lock Cohorting: A General Technique for Designing NUMA Locks". + +##### ck_rwlock + +A simple centralized write-biased read-write lock. + +##### ck_sequence + +A sequence counter lock, popularized by the Linux kernel, allows for very fast read and write synchronization for simple data structures where deep copy is permitted. + +##### ck_swlock + +A single-writer specialized read-lock that is copy-safe, useful for data structures that must remain small, be copied and contain in-band mutexes. + +##### ck_tflock + +Task-fair locks are fair read-write locks, derived from "Scalable reader-writer synchronization for shared-memory multiprocessors". + +##### ck_spinlock + +A basic but very fast spinlock implementation. + +##### ck_spinlock_anderson + +Scalable and fast anderson spinlocks. This is here for reference, one of the earliest scalable and fair lock implementations. + +##### ck_spinlock_cas + +A basic spinlock utilizing compare_and_swap. + +##### ck_spinlock_dec + +A basic spinlock, a C adaption of the older optimized Linux kernel spinlock for x86. Primarily here for reference. + +##### ck_spinlock_fas + +A basic spinlock utilizing atomic exchange. + +##### ck_spinlock_clh + +An efficient implementation of the scalable CLH lock, providing many of the same performance properties of MCS with a better fast-path. + +##### ck_spinlock_hclh + +A NUMA-friendly CLH lock. + +##### ck_spinlock_mcs + +An implementation of the seminal scalable and fair MCS lock. + +##### ck_spinlock_ticket + +An implementation of fair centralized locks. + diff --git a/_config.yml b/_config.yml new file mode 100644 index 0000000..c419263 --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-cayman
\ No newline at end of file diff --git a/build/ck.build.s390x b/build/ck.build.s390x new file mode 100644 index 0000000..2a91187 --- /dev/null +++ b/build/ck.build.s390x @@ -0,0 +1 @@ +CFLAGS+=-O2 -D__s390x__ diff --git a/build/ck.build.unknown b/build/ck.build.unknown new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/build/ck.build.unknown @@ -34,7 +34,7 @@ WANT_PIC=yes P_PWD=`pwd` MAINTAINER='sbahra@repnop.org' -VERSION=${VERSION:-'0.6.0'} +VERSION=${VERSION:-'0.7.1'} VERSION_MAJOR='0' BUILD="$PWD/build/ck.build" PREFIX=${PREFIX:-"/usr/local"} @@ -119,6 +119,9 @@ generate() -e "s#@GZIP_SUFFIX@#$GZIP_SUFFIX#g" \ -e "s#@POINTER_PACK_ENABLE@#$POINTER_PACK_ENABLE#g" \ -e "s#@DISABLE_DOUBLE@#$DISABLE_DOUBLE#g" \ + -e "s#@DISABLE_STATIC@#$DISABLE_STATIC#g" \ + -e "s#@SSE_DISABLE@#$SSE_DISABLE#g" \ + -e "s#@PPC32_LWSYNC_ENABLE@#$PPC32_LWSYNC_ENABLE#g" \ -e "s#@RTM_ENABLE@#$RTM_ENABLE#g" \ -e "s#@LSE_ENABLE@#$LSE_ENABLE#g" \ -e "s#@VMA_BITS@#$VMA_BITS_R#g" \ @@ -144,6 +147,7 @@ generate_stdout() echo " SRC_DIR = $BUILD_DIR" echo " SYSTEM = $SYSTEM" echo " PROFILE = $PROFILE" + echo " AR = $AR" echo " CC = $CC" echo " COMPILER = $COMPILER" echo " CFLAGS = $CFLAGS" @@ -153,13 +157,16 @@ generate_stdout() echo " LDNAME_VERSION = $LDNAME_VERSION" echo " LDNAME_MAJOR = $LDNAME_MAJOR" echo " LDFLAGS = $LDFLAGS" + echo " STATIC_LIB = $DISABLE_STATIC" echo " GZIP = $GZIP" echo " CORES = $CORES" echo " POINTER_PACK = $POINTER_PACK_ENABLE" + echo " PPC32_LWSYNC = $PPC32_LWSYNC_ENABLE" echo " VMA_BITS = $VMA_BITS" echo " MEMORY_MODEL = $MM" echo " RTM = $RTM_ENABLE" echo " LSE = $LSE_ENABLE" + echo " SSE = $SSE_DISABLE" echo echo "Headers will be installed in $HEADERS" echo "Libraries will be installed in $LIBRARY" @@ -169,7 +176,8 @@ generate_stdout() for option; do case "$option" in *=?*) - value=`expr -- "$option" : '[^=]*=\(.*\)'` + optname=`echo $option|cut -c 3-` + value=`expr "$optname" : '[^=]*=\(.*\)'` ;; *=) value= @@ -194,18 +202,24 @@ for option; do echo echo "The following options will affect generated code." echo " --enable-pointer-packing Assumes address encoding is subset of pointer range" - echo " --enable-rtm Enable restricted transactional memory (power, x86_64)" - echo " --enable-lse Enable large system extensions (arm64)" echo " --memory-model=N Specify memory model (currently tso, pso or rmo)" echo " --vma-bits=N Specify valid number of VMA bits" echo " --platform=N Force the platform type, instead of relying on autodetection" - echo " --use-cc-builtins Use the compiler atomic bultin functions, instead of the CK implementation" + echo " --use-cc-builtins Use the compiler atomic builtin functions, instead of the CK implementation" echo " --disable-double Don't generate any of the functions using the \"double\" type" + echo " --disable-static Don't compile a static version of the ck lib" + echo + echo "The following options will affect specific platform-dependent generated code." + echo " --disable-sse Do not use any SSE instructions (x86)" + echo " --enable-lse Enable large system extensions (arm64)" + echo " --enable-ppc32-lwsync Enable lwsync instruction usage (32-bit Power ISA)" + echo " --enable-rtm Enable restricted transactional memory (Power ISA, x86_64)" echo echo "The following options affect regression testing." echo " --cores=N Specify number of cores available on target machine" echo echo "The following environment variables may be used:" + echo " AR AR archiver command" echo " CC C compiler command" echo " CFLAGS C compiler flags" echo " LDFLAGS Linker flags" @@ -237,12 +251,18 @@ for option; do --enable-pointer-packing) POINTER_PACK_ENABLE="CK_MD_POINTER_PACK_ENABLE" ;; + --enable-ppc32-lwsync) + PPC32_LWSYNC_ENABLE="CK_MD_PPC32_LWSYNC" + ;; --enable-rtm) RTM_ENABLE_SET="CK_MD_RTM_ENABLE" ;; --enable-lse) LSE_ENABLE_SET="CK_MD_LSE_ENABLE" ;; + --disable-sse) + SSE_DISABLE="CK_MD_SSE_DISABLE" + ;; --cores=*) CORES=$value ;; @@ -276,6 +296,9 @@ for option; do --disable-double) DISABLE_DOUBLE="CK_PR_DISABLE_DOUBLE" ;; + --disable-static) + DISABLE_STATIC=1 + ;; --platform=*) PLATFORM=$value ;; @@ -294,7 +317,8 @@ for option; do fi ;; *=*) - NAME=`expr -- "$option" : '\([^=]*\)='` + optname=`echo $option|cut -c 3-` + NAME=`expr "$optname" : '\([^=]*\)='` eval "$NAME='$value'" export $NAME ;; @@ -309,10 +333,13 @@ done HEADERS=${HEADERS:-"${PREFIX}/include"} LIBRARY=${LIBRARY:-"${PREFIX}/lib"} MANDIR=${MANDIR:-"${PREFIX}/share/man"} -GZIP=${GZIP:-"gzip -c"} +GZIP=${GZIP-"gzip -c"} POINTER_PACK_ENABLE=${POINTER_PACK_ENABLE:-"CK_MD_POINTER_PACK_DISABLE"} DISABLE_DOUBLE=${DISABLE_DOUBLE:-"CK_PR_ENABLE_DOUBLE"} +DISABLE_STATIC=${DISABLE_STATIC:-"0"} +PPC32_LWSYNC_ENABLE=${PPC32_LWSYNC_ENABLE:-"CK_MD_PPC32_LWSYNC_DISABLE"} RTM_ENABLE=${RTM_ENABLE_SET:-"CK_MD_RTM_DISABLE"} +SSE_DISABLE=${SSE_DISABLE:-"CK_MD_SSE_ENABLE"} LSE_ENABLE=${LSE_ENABLE_SET:-"CK_MD_LSE_DISABLE"} VMA_BITS=${VMA_BITS:-"unknown"} @@ -347,14 +374,18 @@ case "$SYSTEM" in DCORES=`sysctl -n hw.ncpu` SYSTEM=darwin ;; - MINGW32*) + MINGW32*|MSYS_NT*) SYSTEM=mingw32 LDFLAGS="-mthreads $LDFLAGS" ;; - CYGWIN_NT*) - SYSTEM=cygwin - LDFLAGS="-mthreads $LDFLAGS" - ;; + MINGW64*) + SYSTEM=mingw64 + LDFLAGS="-mthreads $LDFLAGS" + ;; + CYGWIN_NT*) + SYSTEM=cygwin + LDFLAGS="-mthreads $LDFLAGS" + ;; *) SYSTEM= ;; @@ -365,11 +396,18 @@ assert "$SYSTEM" "$SYSTEM" "unsupported" CORES=${CORES:-${DCORES}} printf "Detecting machine architecture..." if test "x$PLATFORM" = "x"; then - PLATFORM=`uname -m 2> /dev/null` + case $SYSTEM in + "freebsd") + PLATFORM=`uname -p 2> /dev/null` + ;; + *) + PLATFORM=`uname -m 2> /dev/null` + ;; + esac fi case $PLATFORM in - "macppc"|"Power Macintosh"|"powerpc") + "macppc"|"Power Macintosh"|"powerpc"|"powerpcspe") RTM_ENABLE="CK_MD_RTM_DISABLE" LSE_ENABLE="CK_MD_LSE_DISABLE" MM="${MM:-"CK_MD_RMO"}" @@ -457,19 +495,22 @@ case $PLATFORM in ;; esac ;; - "ppc64"|"ppc64le") + "ppc64"|"ppc64le"|"powerpc64") RTM_ENABLE="CK_MD_RTM_DISABLE" LSE_ENABLE="CK_MD_LSE_DISABLE" MM="${MM:-"CK_MD_RMO"}" PLATFORM=ppc64 ENVIRONMENT=64 ;; - arm|armv6l|armv7l) - if test "$PLATFORM" = "armv6l"; then - CFLAGS="$CFLAGS -march=armv6k"; - elif test "$PLATFORM" = "armv7l"; then - CFLAGS="$CFLAGS -march=armv7-a"; - fi + arm|armv6|armv6l|armv7|armv7l) + case "$PLATFORM" in + "armv6"|"armv6l") + CFLAGS="$CFLAGS -march=armv6k"; + ;; + "armv7"|"armv7l") + CFLAGS="$CFLAGS -march=armv7-a"; + ;; + esac RTM_ENABLE="CK_MD_RTM_DISABLE" LSE_ENABLE="CK_MD_LSE_DISABLE" MM="${MM:-"CK_MD_RMO"}" @@ -482,11 +523,19 @@ case $PLATFORM in PLATFORM=aarch64 ENVIRONMENT=64 ;; + "s390x") + RTM_ENABLE="CK_MD_RTM_DISABLE" + LSE_ENABLE="CK_MD_LSE_DISABLE" + MM="${MM:-"CK_MD_RMO"}" + PLATFORM=s390x + ENVIRONMENT=64 + ;; *) RTM_ENABLE="CK_MD_RTM_DISABLE" LSE_ENABLE="CK_MD_LSE_DISABLE" - PLATFORM= + PLATFORM=unknown MM="${MM:-"CK_MD_RMO"}" + USE_CC_BUILTINS=1 ;; esac @@ -543,27 +592,65 @@ else echo "success [$BUILD_DIR]" fi -printf "Finding gzip tool................" -GZIP=`pathsearch "${GZIP:-gzip}"` -if test -z "$GZIP" -o ! -x "$GZIP"; then +if test -n "$GZIP"; then + printf "Finding gzip tool................" GZIP=`pathsearch "${GZIP:-gzip}"` - GZIP="$GZIP" + if test -z "$GZIP" -o ! -x "$GZIP"; then + GZIP=`pathsearch "${GZIP:-gzip}"` + GZIP="$GZIP" + fi + + if test -z "$GZIP"; then + echo "not found" + else + echo "success [$GZIP]" + GZIP="$GZIP -c" + GZIP_SUFFIX=".gz" + fi fi if test -z "$GZIP"; then - echo "not found" GZIP=cat GZIP_SUFFIX="" -else - echo "success [$GZIP]" - GZIP="$GZIP -c" - GZIP_SUFFIX=".gz" +fi + +if test "$PROFILE"; then + printf "Using user-specified profile....." + + if test -z "$CC"; then + echo "failed [specify compiler]" + exit $EXIT_FAILURE + fi + + if test ! -f build/ck.build.$PROFILE; then + echo "failed [$PROFILE]" + exit $EXIT_FAILURE + fi + + echo "success [$PROFILE]" + printf "Generating header files.........." + generate include/ck_md.h.in include/ck_md.h + generate include/freebsd/ck_md.h.in include/freebsd/ck_md.h + echo "success" + printf "Generating build files..........." + generate src/Makefile.in src/Makefile + generate doc/Makefile.in doc/Makefile + generate build/ck.build.in build/ck.build + generate build/regressions.build.in build/regressions.build + generate build/ck.pc.in build/ck.pc + generate build/ck.spec.in build/ck.spec + generate Makefile.in Makefile + echo "success" + generate_stdout + exit $EXIT_SUCCESS fi printf "Finding suitable compiler........" -CC=`pathsearch "${CC:-cc}"` -if test -z "$CC" -o ! -x "$CC"; then - CC=`pathsearch "${CC:-gcc}"` +if test ! -x "${CC}"; then + CC=`pathsearch "${CC:-cc}"` + if test -z "$CC" -o ! -x "$CC"; then + CC=`pathsearch "${CC:-gcc}"` + fi fi assert "$CC" "not found" @@ -596,7 +683,7 @@ int main(void) { EOF $CC -o .1 .1.c -COMPILER=`./.1` +COMPILER=`./.1 2> /dev/null` r=$? rm -f .1.c .1 @@ -628,13 +715,24 @@ elif test "$COMPILER" = "gcc" || test "$COMPILER" = "clang" || test "$COMPILER" if test "$WANT_PIC" = "yes"; then LDFLAGS="$LDFLAGS -shared -fPIC" CFLAGS="$CFLAGS -fPIC" - ALL_LIBS="libck.so libck.a" - INSTALL_LIBS="install-so install-lib" + + if [ "$DISABLE_STATIC" -eq 1 ]; then + ALL_LIBS="libck.so" + INSTALL_LIBS="install-so" + else + ALL_LIBS="libck.so libck.a" + INSTALL_LIBS="install-so install-lib" + fi else LDFLAGS="$LDFLAGS -fno-PIC" CFLAGS="$CFLAGS -fno-PIC" - ALL_LIBS="libck.a" - INSTALL_LIBS="install-lib" + if [ "$DISABLE_STATIC" -eq 1 ]; then + echo "Error: You have choosen to disable PIC, yet you also disabled the static lib." 1>&2 + exit $EXIT_FAILURE + else + ALL_LIBS="libck.a" + INSTALL_LIBS="install-lib" + fi fi CFLAGS="-D_XOPEN_SOURCE=600 -D_BSD_SOURCE -D_DEFAULT_SOURCE -std=gnu99 -pedantic -Wall -W -Wundef -Wendif-labels -Wshadow -Wpointer-arith -Wcast-align -Wcast-qual -Wwrite-strings -Wstrict-prototypes -Wmissing-prototypes -Wnested-externs -Winline -Wdisabled-optimization -fstrict-aliasing -O2 -pipe -Wno-parentheses $CFLAGS" @@ -647,6 +745,17 @@ else assert "" "unknown compiler" fi +printf "Finding suitable archiver........" +if test ! -x "${AR}"; then + AR=`pathsearch "${AR:-ar}"` + if test -z "$AR" -o ! -x "$AR"; then + AR=`pathsearch "${AR:-ar}"` + else + echo "success [$AR]" + fi +fi +assert "$AR" "not found" + printf "Detecting VMA bits..............." VMA="unknown" if test "$VMA_BITS" = "unknown"; then @@ -732,42 +841,13 @@ printf "Detecting git SHA................" get_git_sha echo "$GIT_MSG [$GIT_SHA]" -if test "$PROFILE"; then - printf "Using user-specified profile....." - - if test -z "$CC"; then - echo "failed [specify compiler]" - exit $EXIT_FAILURE - fi - - if test ! -f build/ck.build.$PROFILE; then - echo "failed [$PROFILE]" - exit $EXIT_FAILURE - fi - - echo "success [$PROFILE]" - printf "Generating header files.........." - generate include/ck_md.h.in include/ck_md.h - echo "success" - printf "Generating build files..........." - generate src/Makefile.in src/Makefile - generate doc/Makefile.in doc/Makefile - generate build/ck.build.in build/ck.build - generate build/regressions.build.in build/regressions.build - generate build/ck.pc.in build/ck.pc - generate build/ck.spec.in build/ck.spec - generate Makefile.in Makefile - echo "success" - generate_stdout - exit $EXIT_SUCCESS -fi - # Platform will be used as a macro. PROFILE="${PROFILE:-$PLATFORM}" PLATFORM="__${PLATFORM}__" printf "Generating header files.........." generate include/ck_md.h.in include/ck_md.h +generate include/freebsd/ck_md.h.in include/freebsd/ck_md.h echo "success" printf "Generating build files..........." @@ -794,3 +874,12 @@ generate Makefile.in $P_PWD/Makefile touch src/*.c echo "success" generate_stdout + +if test "$PROFILE" = "unknown"; then + echo + echo "WARNING: your target architecture is not a first-class citizen." + echo + echo "The test suite may not work as intended. Consider reaching out " + echo "to the mailing list about having the project add first-class " + echo "support for your architecture." +fi diff --git a/doc/ck_epoch_poll b/doc/ck_epoch_poll index 68c4a4e..704b04a 100644 --- a/doc/ck_epoch_poll +++ b/doc/ck_epoch_poll @@ -47,10 +47,9 @@ if deemed safe. This function is meant to be used in cases epoch reclamation cost must be amortized over time in a manner that does not affect caller progress. .Sh RETURN VALUES -This function will return true if at least one function was dispatched. -This function will return false if it has determined not all threads have -observed the latest generation of epoch-protected objects. Neither value -indicates an error. +This functions returns false if the following conditions are met: +no memory was reclaimed, the records are not in a grace period and no forward +progress was made. .Sh ERRORS Behavior is undefined if the object pointed to by .Fa record diff --git a/doc/ck_epoch_register b/doc/ck_epoch_register index 85ea461..90055d9 100644 --- a/doc/ck_epoch_register +++ b/doc/ck_epoch_register @@ -34,7 +34,7 @@ Concurrency Kit (libck, \-lck) .Sh SYNOPSIS .In ck_epoch.h .Ft void -.Fn ck_epoch_register "ck_epoch_t *epoch" "ck_epoch_record_t *record" +.Fn ck_epoch_register "ck_epoch_t *epoch" "ck_epoch_record_t *record" "void *cl" .Sh DESCRIPTION The .Fn ck_epoch_register 3 @@ -49,7 +49,11 @@ object pointed to by the argument will have lifetime managed by the underlying epoch sub-system. The record object must not be destroyed after it is associated with a .Fn ck_epoch_register 3 -call. +call. An optional context pointer +.Fa cl +may be passed that is retrievable with the +.Fn ck_epoch_record_ct 3 +function. .Sh RETURN VALUES This function has no return value. .Sh SEE ALSO diff --git a/doc/ck_epoch_synchronize b/doc/ck_epoch_synchronize index 6c9a698..6e5f73d 100644 --- a/doc/ck_epoch_synchronize +++ b/doc/ck_epoch_synchronize @@ -40,6 +40,8 @@ The .Fn ck_epoch_synchronize 3 function will block the caller until a grace period has been detected, according to the semantics of epoch reclamation. +It is not safe to call this function on a record that is +in an active section. Any objects requiring safe memory reclamation which are logically deleted are safe for physical deletion following a call to .Fn ck_epoch_synchronize 3 . diff --git a/doc/ck_pr_dec b/doc/ck_pr_dec index f3d34dd..b33ec19 100644 --- a/doc/ck_pr_dec +++ b/doc/ck_pr_dec @@ -29,22 +29,31 @@ .Sh NAME .Nm ck_pr_dec_ptr , .Nm ck_pr_dec_ptr_zero , +.Nm ck_pr_dec_ptr_is_zero , .Nm ck_pr_dec_double , .Nm ck_pr_dec_double_zero , +.Nm ck_pr_dec_double_is_zero , .Nm ck_pr_dec_char , .Nm ck_pr_dec_char_zero , +.Nm ck_pr_dec_char_is_zero , .Nm ck_pr_dec_uint , .Nm ck_pr_dec_uint_zero , +.Nm ck_pr_dec_char_is_zero , .Nm ck_pr_dec_int , .Nm ck_pr_dec_int_zero , +.Nm ck_pr_dec_int_is_zero , .Nm ck_pr_dec_64 , .Nm ck_pr_dec_64_zero , +.Nm ck_pr_dec_64_is_zero , .Nm ck_pr_dec_32 , .Nm ck_pr_dec_32_zero , +.Nm ck_pr_dec_32_is_zero , .Nm ck_pr_dec_16 , .Nm ck_pr_dec_16_zero , +.Nm ck_pr_dec_32_is_zero , .Nm ck_pr_dec_8 , -.Nm ck_pr_dec_8_zero +.Nm ck_pr_dec_8_zero , +.Nm ck_pr_dec_8_is_zero .Nd atomic decrement operations .Sh LIBRARY Concurrency Kit (libck, \-lck) @@ -54,38 +63,56 @@ Concurrency Kit (libck, \-lck) .Fn ck_pr_dec_ptr "void *target" .Ft void .Fn ck_pr_dec_ptr_zero "void *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_ptr_is_zero "void *target" .Ft void .Fn ck_pr_dec_double "double *target" .Ft void .Fn ck_pr_dec_double_zero "double *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_double_is_zero "double *target" .Ft void .Fn ck_pr_dec_char "char *target" .Ft void .Fn ck_pr_dec_char_zero "char *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_char_is_zero "char *target" .Ft void .Fn ck_pr_dec_uint "unsigned int *target" .Ft void .Fn ck_pr_dec_uint_zero "unsigned int *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_uint_is_zero "unsigned int *target" .Ft void .Fn ck_pr_dec_int "int *target" .Ft void .Fn ck_pr_dec_int_zero "int *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_int_is_zero "int *target" .Ft void .Fn ck_pr_dec_64 "uint64_t *target" .Ft void .Fn ck_pr_dec_64_zero "uint64_t *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_64_is_zero "uint64_t *target" .Ft void .Fn ck_pr_dec_32 "uint32_t *target" .Ft void .Fn ck_pr_dec_32_zero "uint32_t *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_32_is_zero "uint32_t *target" .Ft void .Fn ck_pr_dec_16 "uint16_t *target" .Ft void .Fn ck_pr_dec_16_zero "uint16_t *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_16_is_zero "uint16_t *target" .Ft void .Fn ck_pr_dec_8 "uint8_t *target" .Ft void .Fn ck_pr_dec_8_zero "uint8_t *target" "bool *z" +.Ft bool +.Fn ck_pr_dec_8_is_zero "uint8_t *target" .Sh DESCRIPTION The .Fn ck_pr_dec 3 @@ -99,6 +126,8 @@ to true if the result of the decrement operation was 0. They set the value pointed to by .Fa z to false otherwise. +The ck_pr_dec_is_zero family of function return true if the result +of the decrement operation was 0 and false otherwise. .Sh SEE ALSO .Xr ck_pr_fence_load 3 , .Xr ck_pr_fence_load_depends 3 , diff --git a/doc/ck_pr_inc b/doc/ck_pr_inc index 72a3e70..337e488 100644 --- a/doc/ck_pr_inc +++ b/doc/ck_pr_inc @@ -29,22 +29,31 @@ .Sh NAME .Nm ck_pr_inc_ptr , .Nm ck_pr_inc_ptr_zero , +.Nm ck_pr_inc_ptr_is_zero , .Nm ck_pr_inc_double , .Nm ck_pr_inc_double_zero , +.Nm ck_pr_inc_double_is_zero , .Nm ck_pr_inc_char , .Nm ck_pr_inc_char_zero , +.Nm ck_pr_inc_char_is_zero , .Nm ck_pr_inc_uint , .Nm ck_pr_inc_uint_zero , +.Nm ck_pr_inc_uint_is_zero , .Nm ck_pr_inc_int , .Nm ck_pr_inc_int_zero , +.Nm ck_pr_inc_int_is_zero , .Nm ck_pr_inc_64 , .Nm ck_pr_inc_64_zero , +.Nm ck_pr_inc_64_is_zero , .Nm ck_pr_inc_32 , .Nm ck_pr_inc_32_zero , +.Nm ck_pr_inc_32_is_zero , .Nm ck_pr_inc_16 , .Nm ck_pr_inc_16_zero , +.Nm ck_pr_inc_16_is_zero , .Nm ck_pr_inc_8 , -.Nm ck_pr_inc_8_zero +.Nm ck_pr_inc_8_zero , +.Nm ck_pr_inc_8_is_zero .Nd atomic increment operations .Sh LIBRARY Concurrency Kit (libck, \-lck) @@ -54,38 +63,56 @@ Concurrency Kit (libck, \-lck) .Fn ck_pr_inc_ptr "void *target" .Ft void .Fn ck_pr_inc_ptr_zero "void *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_ptr_is_zero "void *target" .Ft void .Fn ck_pr_inc_double "double *target" .Ft void .Fn ck_pr_inc_double_zero "double *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_double_is_zero "double *target" .Ft void .Fn ck_pr_inc_char "char *target" .Ft void .Fn ck_pr_inc_char_zero "char *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_char_is_zero "char *target" .Ft void .Fn ck_pr_inc_uint "unsigned int *target" .Ft void .Fn ck_pr_inc_uint_zero "unsigned int *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_uint_is_zero "unsigned int *target" .Ft void .Fn ck_pr_inc_int "int *target" .Ft void .Fn ck_pr_inc_int_zero "int *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_int_is_zero "int *target" .Ft void .Fn ck_pr_inc_64 "uint64_t *target" .Ft void .Fn ck_pr_inc_64_zero "uint64_t *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_64_is_zero "uint64_t *target" .Ft void .Fn ck_pr_inc_32 "uint32_t *target" .Ft void .Fn ck_pr_inc_32_zero "uint32_t *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_32_is_zero "uint32_t *target" .Ft void .Fn ck_pr_inc_16 "uint16_t *target" .Ft void .Fn ck_pr_inc_16_zero "uint16_t *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_16_is_zero "uint16_t *target" .Ft void .Fn ck_pr_inc_8 "uint8_t *target" .Ft void .Fn ck_pr_inc_8_zero "uint8_t *target" "bool *z" +.Ft bool +.Fn ck_pr_inc_8_is_zero "uint8_t *target" .Sh DESCRIPTION The .Fn ck_pr_inc 3 @@ -99,6 +126,8 @@ to true if the result of the increment operation was 0. The functions set the value pointed to by .Fa z false otherwise. +The ck_pr_inc_is_zero family of function return true if the result +of the decrement operation was 0 and false otherwise. .Sh SEE ALSO .Xr ck_pr_fence_load 3 , .Xr ck_pr_fence_load_depends 3 , diff --git a/include/ck_backoff.h b/include/ck_backoff.h index 82a4f21..a1f7616 100644 --- a/include/ck_backoff.h +++ b/include/ck_backoff.h @@ -50,7 +50,7 @@ ck_backoff_eb(unsigned int *c) for (i = 0; i < ceiling; i++) ck_pr_barrier(); - *c = ceiling <<= ceiling < CK_BACKOFF_CEILING; + *c = ceiling << (ceiling < CK_BACKOFF_CEILING); return; } diff --git a/include/ck_cc.h b/include/ck_cc.h index e17dc7b..1b4ff46 100644 --- a/include/ck_cc.h +++ b/include/ck_cc.h @@ -50,6 +50,7 @@ * Container function. * This relies on (compiler) implementation-defined behavior. */ +#ifndef CK_CC_CONTAINER #define CK_CC_CONTAINER(F, T, M, N) \ CK_CC_INLINE static T * \ N(F *p) \ @@ -57,6 +58,7 @@ F *n = p; \ return (T *)(void *)(((char *)n) - ((size_t)&((T *)0)->M)); \ } +#endif #define CK_CC_PAD(x) union { char pad[x]; } @@ -104,41 +106,35 @@ #define CK_CC_TYPEOF(X, DEFAULT) (DEFAULT) #endif -#ifndef CK_F_CC_FFS -#define CK_F_CC_FFS -CK_CC_INLINE static int -ck_cc_ffs(unsigned int x) -{ - unsigned int i; - - if (x == 0) - return 0; - - for (i = 1; (x & 1) == 0; i++, x >>= 1); - - return i; +#define CK_F_CC_FFS_G(L, T) \ +CK_CC_INLINE static int \ +ck_cc_##L(T v) \ +{ \ + unsigned int i; \ + \ + if (v == 0) \ + return 0; \ + \ + for (i = 1; (v & 1) == 0; i++, v >>= 1); \ + return i; \ } -#endif - -#ifndef CK_F_CC_CLZ -#define CK_F_CC_CLZ -#include <ck_limits.h> -CK_CC_INLINE static int -ck_cc_clz(unsigned int x) -{ - unsigned int count, i; +#ifndef CK_F_CC_FFS +#define CK_F_CC_FFS +CK_F_CC_FFS_G(ffs, unsigned int) +#endif /* CK_F_CC_FFS */ - for (count = 0, i = sizeof(unsigned int) * CHAR_BIT; i > 0; count++) { - unsigned int bit = 1U << --i; +#ifndef CK_F_CC_FFSL +#define CK_F_CC_FFSL +CK_F_CC_FFS_G(ffsl, unsigned long) +#endif /* CK_F_CC_FFSL */ - if (x & bit) - break; - } +#ifndef CK_F_CC_FFSLL +#define CK_F_CC_FFSLL +CK_F_CC_FFS_G(ffsll, unsigned long long) +#endif /* CK_F_CC_FFSLL */ - return count; -} -#endif +#undef CK_F_CC_FFS_G #ifndef CK_F_CC_CTZ #define CK_F_CC_CTZ @@ -151,7 +147,6 @@ ck_cc_ctz(unsigned int x) return 0; for (i = 0; (x & 1) == 0; i++, x >>= 1); - return i; } #endif diff --git a/include/ck_ec.h b/include/ck_ec.h new file mode 100644 index 0000000..cd2a368 --- /dev/null +++ b/include/ck_ec.h @@ -0,0 +1,945 @@ +/* + * Copyright 2018 Paul Khuong, Google LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Overview + * ======== + * + * ck_ec implements 32- and 64- bit event counts. Event counts let us + * easily integrate OS-level blocking (e.g., futexes) in lock-free + * protocols. Waiters block conditionally, if the event count's value + * is still equal to some old value. + * + * Event counts come in four variants: 32 and 64 bit (with one bit + * stolen for internal signaling, so 31 and 63 bit counters), and + * single or multiple producers (wakers). Waiters are always multiple + * consumers. The 32 bit variants are smaller, and more efficient, + * especially in single producer mode. The 64 bit variants are larger, + * but practically invulnerable to ABA. + * + * The 32 bit variant is always available. The 64 bit variant is only + * available if CK supports 64-bit atomic operations. Currently, + * specialization for single producer is only implemented for x86 and + * x86-64, on compilers that support GCC extended inline assembly; + * other platforms fall back to the multiple producer code path. + * + * A typical usage pattern is: + * + * 1. On the producer side: + * + * - Make changes to some shared data structure, without involving + * the event count at all. + * - After each change, call ck_ec_inc on the event count. The call + * acts as a write-write barrier, and wakes up any consumer blocked + * on the event count (waiting for new changes). + * + * 2. On the consumer side: + * + * - Snapshot ck_ec_value of the event count. The call acts as a + * read barrier. + * - Read and process the shared data structure. + * - Wait for new changes by calling ck_ec_wait with the snapshot value. + * + * Some data structures may opt for tighter integration with their + * event count. For example, an SPMC ring buffer or disruptor might + * use the event count's value as the write pointer. If the buffer is + * regularly full, it might also make sense to store the read pointer + * in an MP event count. + * + * This event count implementation supports tighter integration in two + * ways. + * + * Producers may opt to increment by an arbitrary value (less than + * INT32_MAX / INT64_MAX), in order to encode, e.g., byte + * offsets. Larger increment values make wraparound more likely, so + * the increments should still be relatively small. + * + * Consumers may pass a predicate to ck_ec_wait_pred. This predicate + * can make `ck_ec_wait_pred` return early, before the event count's + * value changes, and can override the deadline passed to futex_wait. + * This lets consumer block on one eventcount, while optimistically + * looking at other waking conditions. + * + * API Reference + * ============= + * + * When compiled as C11 or later, this header defines type-generic + * macros for ck_ec32 and ck_ec64; the reference describes this + * type-generic API. + * + * ck_ec needs additional OS primitives to determine the current time, + * to wait on an address, and to wake all threads waiting on a given + * address. These are defined with fields in a struct ck_ec_ops. Each + * ck_ec_ops may additionally define the number of spin loop + * iterations in the slow path, as well as the initial wait time in + * the internal exponential backoff, the exponential scale factor, and + * the right shift count (< 32). + * + * The ops, in addition to the single/multiple producer flag, are + * encapsulated in a struct ck_ec_mode, passed to most ck_ec + * operations. + * + * ec is a struct ck_ec32 *, or a struct ck_ec64 *. + * + * value is an uint32_t for ck_ec32, and an uint64_t for ck_ec64. It + * never exceeds INT32_MAX and INT64_MAX respectively. + * + * mode is a struct ck_ec_mode *. + * + * deadline is either NULL, or a `const struct timespec *` that will + * be treated as an absolute deadline. + * + * `void ck_ec_init(ec, value)`: initializes the event count to value. + * + * `value ck_ec_value(ec)`: returns the current value of the event + * counter. This read acts as a read (acquire) barrier. + * + * `bool ck_ec_has_waiters(ec)`: returns whether some thread has + * marked the event count as requiring an OS wakeup. + * + * `void ck_ec_inc(ec, mode)`: increments the value of the event + * counter by one. This writes acts as a write barrier. Wakes up + * any waiting thread. + * + * `value ck_ec_add(ec, mode, value)`: increments the event counter by + * `value`, and returns the event counter's previous value. This + * write acts as a write barrier. Wakes up any waiting thread. + * + * `int ck_ec_deadline(struct timespec *new_deadline, + * mode, + * const struct timespec *timeout)`: + * computes a deadline `timeout` away from the current time. If + * timeout is NULL, computes a deadline in the infinite future. The + * resulting deadline is written to `new_deadline`. Returns 0 on + * success, and -1 if ops->gettime failed (without touching errno). + * + * `int ck_ec_wait(ec, mode, value, deadline)`: waits until the event + * counter's value differs from `value`, or, if `deadline` is + * provided and non-NULL, until the current time is after that + * deadline. Use a deadline with tv_sec = 0 for a non-blocking + * execution. Returns 0 if the event counter has changed, and -1 on + * timeout. This function acts as a read (acquire) barrier. + * + * `int ck_ec_wait_pred(ec, mode, value, pred, data, deadline)`: waits + * until the event counter's value differs from `value`, or until + * `pred` returns non-zero, or, if `deadline` is provided and + * non-NULL, until the current time is after that deadline. Use a + * deadline with tv_sec = 0 for a non-blocking execution. Returns 0 if + * the event counter has changed, `pred`'s return value if non-zero, + * and -1 on timeout. This function acts as a read (acquire) barrier. + * + * `pred` is always called as `pred(data, iteration_deadline, now)`, + * where `iteration_deadline` is a timespec of the deadline for this + * exponential backoff iteration, and `now` is the current time. If + * `pred` returns a non-zero value, that value is immediately returned + * to the waiter. Otherwise, `pred` is free to modify + * `iteration_deadline` (moving it further in the future is a bad + * idea). + * + * Implementation notes + * ==================== + * + * The multiple producer implementation is a regular locked event + * count, with a single flag bit to denote the need to wake up waiting + * threads. + * + * The single producer specialization is heavily tied to + * [x86-TSO](https://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf), and + * to non-atomic read-modify-write instructions (e.g., `inc mem`); + * these non-atomic RMW let us write to the same memory locations with + * atomic and non-atomic instructions, without suffering from process + * scheduling stalls. + * + * The reason we can mix atomic and non-atomic writes to the `counter` + * word is that every non-atomic write obviates the need for the + * atomically flipped flag bit: we only use non-atomic writes to + * update the event count, and the atomic flag only informs the + * producer that we would like a futex_wake, because of the update. + * We only require the non-atomic RMW counter update to prevent + * preemption from introducing arbitrarily long worst case delays. + * + * Correctness does not rely on the usual ordering argument: in the + * absence of fences, there is no strict ordering between atomic and + * non-atomic writes. The key is instead x86-TSO's guarantee that a + * read is satisfied from the most recent buffered write in the local + * store queue if there is one, or from memory if there is no write to + * that address in the store queue. + * + * x86-TSO's constraint on reads suffices to guarantee that the + * producer will never forget about a counter update. If the last + * update is still queued, the new update will be based on the queued + * value. Otherwise, the new update will be based on the value in + * memory, which may or may not have had its flag flipped. In either + * case, the value of the counter (modulo flag) is correct. + * + * When the producer forwards the counter's value from its store + * queue, the new update might not preserve a flag flip. Any waiter + * thus has to check from time to time to determine if it wasn't + * woken up because the flag bit was silently cleared. + * + * In reality, the store queue in x86-TSO stands for in-flight + * instructions in the chip's out-of-order backend. In the vast + * majority of cases, instructions will only remain in flight for a + * few hundred or thousand of cycles. That's why ck_ec_wait spins on + * the `counter` word for ~100 iterations after flipping its flag bit: + * if the counter hasn't changed after that many iterations, it is + * very likely that the producer's next counter update will observe + * the flag flip. + * + * That's still not a hard guarantee of correctness. Conservatively, + * we can expect that no instruction will remain in flight for more + * than 1 second... if only because some interrupt will have forced + * the chip to store its architectural state in memory, at which point + * an instruction is either fully retired or rolled back. Interrupts, + * particularly the pre-emption timer, are why single-producer updates + * must happen in a single non-atomic read-modify-write instruction. + * Having a single instruction as the critical section means we only + * have to consider the worst-case execution time for that + * instruction. That's easier than doing the same for a pair of + * instructions, which an unlucky pre-emption could delay for + * arbitrarily long. + * + * Thus, after a short spin loop, ck_ec_wait enters an exponential + * backoff loop, where each "sleep" is instead a futex_wait. The + * backoff is only necessary to handle rare cases where the flag flip + * was overwritten after the spin loop. Eventually, more than one + * second will have elapsed since the flag flip, and the sleep timeout + * becomes infinite: since the flag bit has been set for much longer + * than the time for which an instruction may remain in flight, the + * flag will definitely be observed at the next counter update. + * + * The 64 bit ck_ec_wait pulls another trick: futexes only handle 32 + * bit ints, so we must treat the 64 bit counter's low 32 bits as an + * int in futex_wait. That's a bit dodgy, but fine in practice, given + * that the OS's futex code will always read whatever value is + * currently in memory: even if the producer thread were to wait on + * its own event count, the syscall and ring transition would empty + * the store queue (the out-of-order execution backend). + * + * Finally, what happens when the producer is migrated to another core + * or otherwise pre-empted? Migration must already incur a barrier, so + * that thread always sees its own writes, so that's safe. As for + * pre-emption, that requires storing the architectural state, which + * means every instruction must either be executed fully or not at + * all when pre-emption happens. + */ + +#ifndef CK_EC_H +#define CK_EC_H +#include <ck_cc.h> +#include <ck_pr.h> +#include <ck_stdbool.h> +#include <ck_stdint.h> +#include <ck_stddef.h> +#include <sys/time.h> + +/* + * If we have ck_pr_faa_64 (and, presumably, ck_pr_load_64), we + * support 63 bit counters. + */ +#ifdef CK_F_PR_FAA_64 +#define CK_F_EC64 +#endif /* CK_F_PR_FAA_64 */ + +/* + * GCC inline assembly lets us exploit non-atomic read-modify-write + * instructions on x86/x86_64 for a fast single-producer mode. + * + * If we CK_F_EC_SP is not defined, CK_EC always uses the slower + * multiple producer code. + */ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define CK_F_EC_SP +#endif /* GNUC && (__i386__ || __x86_64__) */ + +struct ck_ec_ops; + +struct ck_ec_wait_state { + struct timespec start; /* Time when we entered ck_ec_wait. */ + struct timespec now; /* Time now. */ + const struct ck_ec_ops *ops; + void *data; /* Opaque pointer for the predicate's internal state. */ + +}; + +/* + * ck_ec_ops define system-specific functions to get the current time, + * atomically wait on an address if it still has some expected value, + * and to wake all threads waiting on an address. + * + * Each platform is expected to have few (one) opaque pointer to a + * const ops struct, and reuse it for all ck_ec_mode structs. + */ +struct ck_ec_ops { + /* Populates out with the current time. Returns non-zero on failure. */ + int (*gettime)(const struct ck_ec_ops *, struct timespec *out); + + /* + * Waits on address if its value is still `expected`. If + * deadline is non-NULL, stops waiting once that deadline is + * reached. May return early for any reason. + */ + void (*wait32)(const struct ck_ec_wait_state *, const uint32_t *, + uint32_t expected, const struct timespec *deadline); + + /* + * Same as wait32, but for a 64 bit counter. Only used if + * CK_F_EC64 is defined. + * + * If underlying blocking primitive only supports 32 bit + * control words, it should be safe to block on the least + * significant half of the 64 bit address. + */ + void (*wait64)(const struct ck_ec_wait_state *, const uint64_t *, + uint64_t expected, const struct timespec *deadline); + + /* Wakes up all threads waiting on address. */ + void (*wake32)(const struct ck_ec_ops *, const uint32_t *address); + + /* + * Same as wake32, but for a 64 bit counter. Only used if + * CK_F_EC64 is defined. + * + * When wait64 truncates the control word at address to `only` + * consider its least significant half, wake64 should perform + * any necessary fixup (e.g., on big endian platforms). + */ + void (*wake64)(const struct ck_ec_ops *, const uint64_t *address); + + /* + * Number of iterations for the initial busy wait. 0 defaults + * to 100 (not ABI stable). + */ + uint32_t busy_loop_iter; + + /* + * Delay in nanoseconds for the first iteration of the + * exponential backoff. 0 defaults to 2 ms (not ABI stable). + */ + uint32_t initial_wait_ns; + + /* + * Scale factor for the exponential backoff. 0 defaults to 8x + * (not ABI stable). + */ + uint32_t wait_scale_factor; + + /* + * Right shift count for the exponential backoff. The update + * after each iteration is + * wait_ns = (wait_ns * wait_scale_factor) >> wait_shift_count, + * until one second has elapsed. After that, the deadline goes + * to infinity. + */ + uint32_t wait_shift_count; +}; + +/* + * ck_ec_mode wraps the ops table, and informs the fast path whether + * it should attempt to specialize for single producer mode. + * + * mode structs are expected to be exposed by value, e.g., + * + * extern const struct ck_ec_ops system_ec_ops; + * + * static const struct ck_ec_mode ec_sp = { + * .ops = &system_ec_ops, + * .single_producer = true + * }; + * + * static const struct ck_ec_mode ec_mp = { + * .ops = &system_ec_ops, + * .single_producer = false + * }; + * + * ck_ec_mode structs are only passed to inline functions defined in + * this header, and never escape to their slow paths, so they should + * not result in any object file size increase. + */ +struct ck_ec_mode { + const struct ck_ec_ops *ops; + /* + * If single_producer is true, the event count has a unique + * incrementer. The implementation will specialize ck_ec_inc + * and ck_ec_add if possible (if CK_F_EC_SP is defined). + */ + bool single_producer; +}; + +struct ck_ec32 { + /* Flag is "sign" bit, value in bits 0:30. */ + uint32_t counter; +}; + +typedef struct ck_ec32 ck_ec32_t; + +#ifdef CK_F_EC64 +struct ck_ec64 { + /* + * Flag is bottom bit, value in bits 1:63. Eventcount only + * works on x86-64 (i.e., little endian), so the futex int + * lies in the first 4 (bottom) bytes. + */ + uint64_t counter; +}; + +typedef struct ck_ec64 ck_ec64_t; +#endif /* CK_F_EC64 */ + +#define CK_EC_INITIALIZER { .counter = 0 } + +/* + * Initializes the event count to `value`. The value must not + * exceed INT32_MAX. + */ +static void ck_ec32_init(struct ck_ec32 *ec, uint32_t value); + +#ifndef CK_F_EC64 +#define ck_ec_init ck_ec32_init +#else +/* + * Initializes the event count to `value`. The value must not + * exceed INT64_MAX. + */ +static void ck_ec64_init(struct ck_ec64 *ec, uint64_t value); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_init(EC, VALUE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_init, \ + struct ck_ec64 : ck_ec64_init)((EC), (VALUE))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Returns the counter value in the event count. The value is at most + * INT32_MAX. + */ +static uint32_t ck_ec32_value(const struct ck_ec32* ec); + +#ifndef CK_F_EC64 +#define ck_ec_value ck_ec32_value +#else +/* + * Returns the counter value in the event count. The value is at most + * INT64_MAX. + */ +static uint64_t ck_ec64_value(const struct ck_ec64* ec); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_value(EC) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_value, \ + struct ck_ec64 : ck_ec64_value)((EC))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Returns whether there may be slow pathed waiters that need an + * explicit OS wakeup for this event count. + */ +static bool ck_ec32_has_waiters(const struct ck_ec32 *ec); + +#ifndef CK_F_EC64 +#define ck_ec_has_waiters ck_ec32_has_waiters +#else +static bool ck_ec64_has_waiters(const struct ck_ec64 *ec); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_has_waiters(EC) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_has_waiters, \ + struct ck_ec64 : ck_ec64_has_waiters)((EC))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Increments the counter value in the event count by one, and wakes + * up any waiter. + */ +static void ck_ec32_inc(struct ck_ec32 *ec, const struct ck_ec_mode *mode); + +#ifndef CK_F_EC64 +#define ck_ec_inc ck_ec32_inc +#else +static void ck_ec64_inc(struct ck_ec64 *ec, const struct ck_ec_mode *mode); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_inc(EC, MODE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_inc, \ + struct ck_ec64 : ck_ec64_inc)((EC), (MODE))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Increments the counter value in the event count by delta, wakes + * up any waiter, and returns the previous counter value. + */ +static uint32_t ck_ec32_add(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta); + +#ifndef CK_F_EC64 +#define ck_ec_add ck_ec32_add +#else +static uint64_t ck_ec64_add(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_add(EC, MODE, DELTA) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_add, \ + struct ck_ec64 : ck_ec64_add)((EC), (MODE), (DELTA))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Populates `new_deadline` with a deadline `timeout` in the future. + * Returns 0 on success, and -1 if clock_gettime failed, in which + * case errno is left as is. + */ +static int ck_ec_deadline(struct timespec *new_deadline, + const struct ck_ec_mode *mode, + const struct timespec *timeout); + +/* + * Waits until the counter value in the event count differs from + * old_value, or, if deadline is non-NULL, until CLOCK_MONOTONIC is + * past the deadline. + * + * Returns 0 on success, and -1 on timeout. + */ +static int ck_ec32_wait(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + const struct timespec *deadline); + +#ifndef CK_F_EC64 +#define ck_ec_wait ck_ec32_wait +#else +static int ck_ec64_wait(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + const struct timespec *deadline); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_wait(EC, MODE, OLD_VALUE, DEADLINE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_wait, \ + struct ck_ec64 : ck_ec64_wait)((EC), (MODE), \ + (OLD_VALUE), (DEADLINE))) + +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Waits until the counter value in the event count differs from + * old_value, pred returns non-zero, or, if deadline is non-NULL, + * until CLOCK_MONOTONIC is past the deadline. + * + * Returns 0 on success, -1 on timeout, and the return value of pred + * if it returns non-zero. + * + * A NULL pred represents a function that always returns 0. + */ +static int ck_ec32_wait_pred(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + +#ifndef CK_F_EC64 +#define ck_ec_wait_pred ck_ec32_wait_pred +#else +static int ck_ec64_wait_pred(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_wait_pred(EC, MODE, OLD_VALUE, PRED, DATA, DEADLINE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_wait_pred, \ + struct ck_ec64 : ck_ec64_wait_pred) \ + ((EC), (MODE), (OLD_VALUE), (PRED), (DATA), (DEADLINE))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Inline implementation details. 32 bit first, then 64 bit + * conditionally. + */ +CK_CC_FORCE_INLINE void ck_ec32_init(struct ck_ec32 *ec, uint32_t value) +{ + ec->counter = value & ~(1UL << 31); + return; +} + +CK_CC_FORCE_INLINE uint32_t ck_ec32_value(const struct ck_ec32 *ec) +{ + uint32_t ret = ck_pr_load_32(&ec->counter) & ~(1UL << 31); + + ck_pr_fence_acquire(); + return ret; +} + +CK_CC_FORCE_INLINE bool ck_ec32_has_waiters(const struct ck_ec32 *ec) +{ + return ck_pr_load_32(&ec->counter) & (1UL << 31); +} + +/* Slow path for ck_ec{32,64}_{inc,add} */ +void ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops); + +CK_CC_FORCE_INLINE void ck_ec32_inc(struct ck_ec32 *ec, + const struct ck_ec_mode *mode) +{ +#if !defined(CK_F_EC_SP) + /* Nothing to specialize if we don't have EC_SP. */ + ck_ec32_add(ec, mode, 1); + return; +#else + char flagged; + +#if __GNUC__ >= 6 + /* + * We don't want to wake if the sign bit is 0. We do want to + * wake if the sign bit just flipped from 1 to 0. We don't + * care what happens when our increment caused the sign bit to + * flip from 0 to 1 (that's once per 2^31 increment). + * + * This leaves us with four cases: + * + * old sign bit | new sign bit | SF | OF | ZF + * ------------------------------------------- + * 0 | 0 | 0 | 0 | ? + * 0 | 1 | 1 | 0 | ? + * 1 | 1 | 1 | 0 | ? + * 1 | 0 | 0 | 0 | 1 + * + * In the first case, we don't want to hit ck_ec32_wake. In + * the last two cases, we do want to call ck_ec32_wake. In the + * second case, we don't care, so we arbitrarily choose to + * call ck_ec32_wake. + * + * The "le" condition checks if SF != OF, or ZF == 1, which + * meets our requirements. + */ +#define CK_EC32_INC_ASM(PREFIX) \ + __asm__ volatile(PREFIX " incl %0" \ + : "+m"(ec->counter), "=@ccle"(flagged) \ + :: "cc", "memory") +#else +#define CK_EC32_INC_ASM(PREFIX) \ + __asm__ volatile(PREFIX " incl %0; setle %1" \ + : "+m"(ec->counter), "=r"(flagged) \ + :: "cc", "memory") +#endif /* __GNUC__ */ + + if (mode->single_producer == true) { + ck_pr_fence_store(); + CK_EC32_INC_ASM(""); + } else { + ck_pr_fence_store_atomic(); + CK_EC32_INC_ASM("lock"); + } +#undef CK_EC32_INC_ASM + + if (CK_CC_UNLIKELY(flagged)) { + ck_ec32_wake(ec, mode->ops); + } + + return; +#endif /* CK_F_EC_SP */ +} + +CK_CC_FORCE_INLINE uint32_t ck_ec32_add_epilogue(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old) +{ + const uint32_t flag_mask = 1U << 31; + uint32_t ret; + + ret = old & ~flag_mask; + /* These two only differ if the flag bit is set. */ + if (CK_CC_UNLIKELY(old != ret)) { + ck_ec32_wake(ec, mode->ops); + } + + return ret; +} + +static CK_CC_INLINE uint32_t ck_ec32_add_mp(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta) +{ + uint32_t old; + + ck_pr_fence_store_atomic(); + old = ck_pr_faa_32(&ec->counter, delta); + return ck_ec32_add_epilogue(ec, mode, old); +} + +#ifdef CK_F_EC_SP +static CK_CC_INLINE uint32_t ck_ec32_add_sp(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta) +{ + uint32_t old; + + /* + * Correctness of this racy write depends on actually + * having an update to write. Exit here if the update + * is a no-op. + */ + if (CK_CC_UNLIKELY(delta == 0)) { + return ck_ec32_value(ec); + } + + ck_pr_fence_store(); + old = delta; + __asm__ volatile("xaddl %1, %0" + : "+m"(ec->counter), "+r"(old) + :: "cc", "memory"); + return ck_ec32_add_epilogue(ec, mode, old); +} +#endif /* CK_F_EC_SP */ + +CK_CC_FORCE_INLINE uint32_t ck_ec32_add(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta) +{ +#ifdef CK_F_EC_SP + if (mode->single_producer == true) { + return ck_ec32_add_sp(ec, mode, delta); + } +#endif + + return ck_ec32_add_mp(ec, mode, delta); +} + +int ck_ec_deadline_impl(struct timespec *new_deadline, + const struct ck_ec_ops *ops, + const struct timespec *timeout); + +CK_CC_FORCE_INLINE int ck_ec_deadline(struct timespec *new_deadline, + const struct ck_ec_mode *mode, + const struct timespec *timeout) +{ + return ck_ec_deadline_impl(new_deadline, mode->ops, timeout); +} + + +int ck_ec32_wait_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + const struct timespec *deadline); + +CK_CC_FORCE_INLINE int ck_ec32_wait(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + const struct timespec *deadline) +{ + if (ck_ec32_value(ec) != old_value) { + return 0; + } + + return ck_ec32_wait_slow(ec, mode->ops, old_value, deadline); +} + +int ck_ec32_wait_pred_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + +CK_CC_FORCE_INLINE int +ck_ec32_wait_pred(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline) +{ + if (ck_ec32_value(ec) != old_value) { + return 0; + } + + return ck_ec32_wait_pred_slow(ec, mode->ops, old_value, + pred, data, deadline); +} + +#ifdef CK_F_EC64 +CK_CC_FORCE_INLINE void ck_ec64_init(struct ck_ec64 *ec, uint64_t value) +{ + ec->counter = value << 1; + return; +} + +CK_CC_FORCE_INLINE uint64_t ck_ec64_value(const struct ck_ec64 *ec) +{ + uint64_t ret = ck_pr_load_64(&ec->counter) >> 1; + + ck_pr_fence_acquire(); + return ret; +} + +CK_CC_FORCE_INLINE bool ck_ec64_has_waiters(const struct ck_ec64 *ec) +{ + return ck_pr_load_64(&ec->counter) & 1; +} + +void ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops); + +CK_CC_FORCE_INLINE void ck_ec64_inc(struct ck_ec64 *ec, + const struct ck_ec_mode *mode) +{ + /* We always xadd, so there's no special optimization here. */ + (void)ck_ec64_add(ec, mode, 1); + return; +} + +CK_CC_FORCE_INLINE uint64_t ck_ec_add64_epilogue(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old) +{ + uint64_t ret = old >> 1; + + if (CK_CC_UNLIKELY(old & 1)) { + ck_ec64_wake(ec, mode->ops); + } + + return ret; +} + +static CK_CC_INLINE uint64_t ck_ec64_add_mp(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta) +{ + uint64_t inc = 2 * delta; /* The low bit is the flag bit. */ + + ck_pr_fence_store_atomic(); + return ck_ec_add64_epilogue(ec, mode, ck_pr_faa_64(&ec->counter, inc)); +} + +#ifdef CK_F_EC_SP +/* Single-producer specialisation. */ +static CK_CC_INLINE uint64_t ck_ec64_add_sp(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta) +{ + uint64_t old; + + /* + * Correctness of this racy write depends on actually + * having an update to write. Exit here if the update + * is a no-op. + */ + if (CK_CC_UNLIKELY(delta == 0)) { + return ck_ec64_value(ec); + } + + ck_pr_fence_store(); + old = 2 * delta; /* The low bit is the flag bit. */ + __asm__ volatile("xaddq %1, %0" + : "+m"(ec->counter), "+r"(old) + :: "cc", "memory"); + return ck_ec_add64_epilogue(ec, mode, old); +} +#endif /* CK_F_EC_SP */ + +/* + * Dispatch on mode->single_producer in this FORCE_INLINE function: + * the end result is always small, but not all compilers have enough + * foresight to inline and get the reduction. + */ +CK_CC_FORCE_INLINE uint64_t ck_ec64_add(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta) +{ +#ifdef CK_F_EC_SP + if (mode->single_producer == true) { + return ck_ec64_add_sp(ec, mode, delta); + } +#endif + + return ck_ec64_add_mp(ec, mode, delta); +} + +int ck_ec64_wait_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + const struct timespec *deadline); + +CK_CC_FORCE_INLINE int ck_ec64_wait(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + const struct timespec *deadline) +{ + if (ck_ec64_value(ec) != old_value) { + return 0; + } + + return ck_ec64_wait_slow(ec, mode->ops, old_value, deadline); +} + +int ck_ec64_wait_pred_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + + +CK_CC_FORCE_INLINE int +ck_ec64_wait_pred(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline) +{ + if (ck_ec64_value(ec) != old_value) { + return 0; + } + + return ck_ec64_wait_pred_slow(ec, mode->ops, old_value, + pred, data, deadline); +} +#endif /* CK_F_EC64 */ +#endif /* !CK_EC_H */ diff --git a/include/ck_epoch.h b/include/ck_epoch.h index e7ce5bc..58f3d28 100644 --- a/include/ck_epoch.h +++ b/include/ck_epoch.h @@ -83,6 +83,7 @@ struct ck_epoch_ref { }; struct ck_epoch_record { + ck_stack_entry_t record_next; struct ck_epoch *global; unsigned int state; unsigned int epoch; @@ -92,17 +93,16 @@ struct ck_epoch_record { } local CK_CC_CACHELINE; unsigned int n_pending; unsigned int n_peak; - unsigned long n_dispatch; + unsigned int n_dispatch; + void *ct; ck_stack_t pending[CK_EPOCH_LENGTH]; - ck_stack_entry_t record_next; } CK_CC_CACHELINE; typedef struct ck_epoch_record ck_epoch_record_t; struct ck_epoch { unsigned int epoch; - char pad[CK_MD_CACHELINE - sizeof(unsigned int)]; - ck_stack_t records; unsigned int n_free; + ck_stack_t records; }; typedef struct ck_epoch ck_epoch_t; @@ -110,7 +110,14 @@ typedef struct ck_epoch ck_epoch_t; * Internal functions. */ void _ck_epoch_addref(ck_epoch_record_t *, ck_epoch_section_t *); -void _ck_epoch_delref(ck_epoch_record_t *, ck_epoch_section_t *); +bool _ck_epoch_delref(ck_epoch_record_t *, ck_epoch_section_t *); + +CK_CC_FORCE_INLINE static void * +ck_epoch_record_ct(const ck_epoch_record_t *record) +{ + + return ck_pr_load_ptr(&record->ct); +} /* * Marks the beginning of an epoch-protected section. @@ -160,9 +167,10 @@ ck_epoch_begin(ck_epoch_record_t *record, ck_epoch_section_t *section) } /* - * Marks the end of an epoch-protected section. + * Marks the end of an epoch-protected section. Returns true if no more + * sections exist for the caller. */ -CK_CC_FORCE_INLINE static void +CK_CC_FORCE_INLINE static bool ck_epoch_end(ck_epoch_record_t *record, ck_epoch_section_t *section) { @@ -170,15 +178,19 @@ ck_epoch_end(ck_epoch_record_t *record, ck_epoch_section_t *section) ck_pr_store_uint(&record->active, record->active - 1); if (section != NULL) - _ck_epoch_delref(record, section); + return _ck_epoch_delref(record, section); - return; + return record->active == 0; } /* * Defers the execution of the function pointed to by the "cb" * argument until an epoch counter loop. This allows for a * non-blocking deferral. + * + * We can get away without a fence here due to the monotonic nature + * of the epoch counter. Worst case, this will result in some delays + * before object destruction. */ CK_CC_FORCE_INLINE static void ck_epoch_call(ck_epoch_record_t *record, @@ -195,13 +207,75 @@ ck_epoch_call(ck_epoch_record_t *record, return; } +/* + * Same as ck_epoch_call, but allows for records to be shared and is reentrant. + */ +CK_CC_FORCE_INLINE static void +ck_epoch_call_strict(ck_epoch_record_t *record, + ck_epoch_entry_t *entry, + ck_epoch_cb_t *function) +{ + struct ck_epoch *epoch = record->global; + unsigned int e = ck_pr_load_uint(&epoch->epoch); + unsigned int offset = e & (CK_EPOCH_LENGTH - 1); + + ck_pr_inc_uint(&record->n_pending); + entry->function = function; + + /* Store fence is implied by push operation. */ + ck_stack_push_upmc(&record->pending[offset], &entry->stack_entry); + return; +} + +/* + * This callback is used for synchronize_wait to allow for custom blocking + * behavior. + */ +typedef void ck_epoch_wait_cb_t(ck_epoch_t *, ck_epoch_record_t *, + void *); + +/* + * Return latest epoch value. This operation provides load ordering. + */ +CK_CC_FORCE_INLINE static unsigned int +ck_epoch_value(const ck_epoch_t *ep) +{ + + ck_pr_fence_load(); + return ck_pr_load_uint(&ep->epoch); +} + void ck_epoch_init(ck_epoch_t *); -ck_epoch_record_t *ck_epoch_recycle(ck_epoch_t *); -void ck_epoch_register(ck_epoch_t *, ck_epoch_record_t *); + +/* + * Attempts to recycle an unused epoch record. If one is successfully + * allocated, the record context pointer is also updated. + */ +ck_epoch_record_t *ck_epoch_recycle(ck_epoch_t *, void *); + +/* + * Registers an epoch record. An optional context pointer may be passed that + * is retrievable with ck_epoch_record_ct. + */ +void ck_epoch_register(ck_epoch_t *, ck_epoch_record_t *, void *); + +/* + * Marks a record as available for re-use by a subsequent recycle operation. + * Note that the record cannot be physically destroyed. + */ void ck_epoch_unregister(ck_epoch_record_t *); + bool ck_epoch_poll(ck_epoch_record_t *); +bool ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred); void ck_epoch_synchronize(ck_epoch_record_t *); +void ck_epoch_synchronize_wait(ck_epoch_t *, ck_epoch_wait_cb_t *, void *); void ck_epoch_barrier(ck_epoch_record_t *); +void ck_epoch_barrier_wait(ck_epoch_record_t *, ck_epoch_wait_cb_t *, void *); + +/* + * Reclaim entries associated with a record. This is safe to call only on + * the caller's record or records that are using call_strict. + */ void ck_epoch_reclaim(ck_epoch_record_t *); #endif /* CK_EPOCH_H */ diff --git a/include/ck_fifo.h b/include/ck_fifo.h index 6d50070..c9a6f3d 100644 --- a/include/ck_fifo.h +++ b/include/ck_fifo.h @@ -115,7 +115,7 @@ CK_CC_INLINE static void ck_fifo_spsc_deinit(struct ck_fifo_spsc *fifo, struct ck_fifo_spsc_entry **garbage) { - *garbage = fifo->head; + *garbage = fifo->garbage; fifo->head = fifo->tail = NULL; return; } diff --git a/include/ck_hs.h b/include/ck_hs.h index b3eb046..cd3e5da 100644 --- a/include/ck_hs.h +++ b/include/ck_hs.h @@ -100,18 +100,28 @@ struct ck_hs_stat { struct ck_hs_iterator { void **cursor; unsigned long offset; + struct ck_hs_map *map; }; typedef struct ck_hs_iterator ck_hs_iterator_t; -#define CK_HS_ITERATOR_INITIALIZER { NULL, 0 } +#define CK_HS_ITERATOR_INITIALIZER { NULL, 0, NULL } /* Convenience wrapper to table hash function. */ #define CK_HS_HASH(T, F, K) F((K), (T)->seed) +/* Computes the hash of n bytes of k for the specified hash map. */ +static inline unsigned long +ck_hs_hash(const struct ck_hs *hs, const void *k) +{ + + return hs->hf(k, hs->seed); +} + typedef void *ck_hs_apply_fn_t(void *, void *); bool ck_hs_apply(ck_hs_t *, unsigned long, const void *, ck_hs_apply_fn_t *, void *); void ck_hs_iterator_init(ck_hs_iterator_t *); bool ck_hs_next(ck_hs_t *, ck_hs_iterator_t *, void **); +bool ck_hs_next_spmc(ck_hs_t *, ck_hs_iterator_t *, void **); bool ck_hs_move(ck_hs_t *, ck_hs_t *, ck_hs_hash_cb_t *, ck_hs_compare_cb_t *, struct ck_malloc *); bool ck_hs_init(ck_hs_t *, unsigned int, ck_hs_hash_cb_t *, diff --git a/include/ck_md.h.in b/include/ck_md.h.in index cb5783e..feae35b 100644 --- a/include/ck_md.h.in +++ b/include/ck_md.h.in @@ -47,7 +47,15 @@ #define @POINTER_PACK_ENABLE@ #endif /* @POINTER_PACK_ENABLE@ */ -#ifndef @VMA_BITS@ +#ifndef @SSE_DISABLE@ +#define @SSE_DISABLE@ +#endif /* @SSE_DISABLE@ */ + +#ifndef @PPC32_LWSYNC_ENABLE@ +#define @PPC32_LWSYNC_ENABLE@ +#endif /* @PPC32_LWSYNC_ENABLE@ */ + +#ifndef @VMA_BITS@ #define @VMA_BITS@ @VMA_BITS_VALUE@ #endif /* @VMA_BITS@ */ diff --git a/include/ck_pr.h b/include/ck_pr.h index 9b7fc42..8ebf855 100644 --- a/include/ck_pr.h +++ b/include/ck_pr.h @@ -34,7 +34,20 @@ #include <ck_stdint.h> #include <ck_stdbool.h> -#ifndef CK_USE_CC_BUILTINS +/* + * Default to using builtins for clang analyzer, coverity, and sparse: + * inline assembly is often too opaque for useful analysis. Override + * the defaults by defining CK_USE_CC_BUILTINS=0 or 1. + */ +#if !defined(CK_USE_CC_BUILTINS) +#if defined(__clang_analyzer__) || defined(__COVERITY__) || defined(__CHECKER__) +#define CK_USE_CC_BUILTINS 1 +#else +#define CK_USE_CC_BUILTINS 0 +#endif +#endif + +#if !CK_USE_CC_BUILTINS #if defined(__x86_64__) #include "gcc/x86_64/ck_pr.h" #elif defined(__x86__) @@ -43,6 +56,8 @@ #include "gcc/sparcv9/ck_pr.h" #elif defined(__ppc64__) #include "gcc/ppc64/ck_pr.h" +#elif defined(__s390x__) +#include "gcc/s390x/ck_pr.h" #elif defined(__ppc__) #include "gcc/ppc/ck_pr.h" #elif defined(__arm__) @@ -613,8 +628,8 @@ CK_PR_BTX_S(bts, 16, uint16_t, |,) } #define CK_PR_UNARY_Z(K, S, M, T, P, C, Z) \ - CK_CC_INLINE static void \ - ck_pr_##K##_##S##_zero(M *target, bool *zero) \ + CK_CC_INLINE static bool \ + ck_pr_##K##_##S##_is_zero(M *target) \ { \ T previous; \ C punt; \ @@ -625,12 +640,21 @@ CK_PR_BTX_S(bts, 16, uint16_t, |,) (C)(previous P 1), \ &previous) == false) \ ck_pr_stall(); \ - *zero = previous == (T)Z; \ + return previous == (T)Z; \ + } + +#define CK_PR_UNARY_Z_STUB(K, S, M) \ + CK_CC_INLINE static void \ + ck_pr_##K##_##S##_zero(M *target, bool *zero) \ + { \ + *zero = ck_pr_##K##_##S##_is_zero(target); \ return; \ } #define CK_PR_UNARY_S(K, X, S, M) CK_PR_UNARY(K, X, S, M, M) -#define CK_PR_UNARY_Z_S(K, S, M, P, Z) CK_PR_UNARY_Z(K, S, M, M, P, M, Z) +#define CK_PR_UNARY_Z_S(K, S, M, P, Z) \ + CK_PR_UNARY_Z(K, S, M, M, P, M, Z) \ + CK_PR_UNARY_Z_STUB(K, S, M) #if defined(CK_F_PR_LOAD_CHAR) && defined(CK_F_PR_CAS_CHAR_VALUE) @@ -642,6 +666,8 @@ CK_PR_UNARY_S(inc, add, char, char) #ifndef CK_F_PR_INC_CHAR_ZERO #define CK_F_PR_INC_CHAR_ZERO CK_PR_UNARY_Z_S(inc, char, char, +, -1) +#else +CK_PR_UNARY_Z_STUB(inc, char, char) #endif /* CK_F_PR_INC_CHAR_ZERO */ #ifndef CK_F_PR_DEC_CHAR @@ -652,6 +678,8 @@ CK_PR_UNARY_S(dec, sub, char, char) #ifndef CK_F_PR_DEC_CHAR_ZERO #define CK_F_PR_DEC_CHAR_ZERO CK_PR_UNARY_Z_S(dec, char, char, -, 1) +#else +CK_PR_UNARY_Z_STUB(dec, char, char) #endif /* CK_F_PR_DEC_CHAR_ZERO */ #endif /* CK_F_PR_LOAD_CHAR && CK_F_PR_CAS_CHAR_VALUE */ @@ -666,6 +694,8 @@ CK_PR_UNARY_S(inc, add, int, int) #ifndef CK_F_PR_INC_INT_ZERO #define CK_F_PR_INC_INT_ZERO CK_PR_UNARY_Z_S(inc, int, int, +, -1) +#else +CK_PR_UNARY_Z_STUB(inc, int, int) #endif /* CK_F_PR_INC_INT_ZERO */ #ifndef CK_F_PR_DEC_INT @@ -676,6 +706,8 @@ CK_PR_UNARY_S(dec, sub, int, int) #ifndef CK_F_PR_DEC_INT_ZERO #define CK_F_PR_DEC_INT_ZERO CK_PR_UNARY_Z_S(dec, int, int, -, 1) +#else +CK_PR_UNARY_Z_STUB(dec, int, int) #endif /* CK_F_PR_DEC_INT_ZERO */ #endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */ @@ -705,6 +737,8 @@ CK_PR_UNARY_S(inc, add, uint, unsigned int) #ifndef CK_F_PR_INC_UINT_ZERO #define CK_F_PR_INC_UINT_ZERO CK_PR_UNARY_Z_S(inc, uint, unsigned int, +, UINT_MAX) +#else +CK_PR_UNARY_Z_STUB(inc, uint, unsigned int) #endif /* CK_F_PR_INC_UINT_ZERO */ #ifndef CK_F_PR_DEC_UINT @@ -715,6 +749,8 @@ CK_PR_UNARY_S(dec, sub, uint, unsigned int) #ifndef CK_F_PR_DEC_UINT_ZERO #define CK_F_PR_DEC_UINT_ZERO CK_PR_UNARY_Z_S(dec, uint, unsigned int, -, 1) +#else +CK_PR_UNARY_Z_STUB(dec, uint, unsigned int) #endif /* CK_F_PR_DEC_UINT_ZERO */ #endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */ @@ -729,6 +765,8 @@ CK_PR_UNARY(inc, add, ptr, void, uintptr_t) #ifndef CK_F_PR_INC_PTR_ZERO #define CK_F_PR_INC_PTR_ZERO CK_PR_UNARY_Z(inc, ptr, void, uintptr_t, +, void *, UINT_MAX) +#else +CK_PR_UNARY_Z_STUB(inc, ptr, void) #endif /* CK_F_PR_INC_PTR_ZERO */ #ifndef CK_F_PR_DEC_PTR @@ -739,6 +777,8 @@ CK_PR_UNARY(dec, sub, ptr, void, uintptr_t) #ifndef CK_F_PR_DEC_PTR_ZERO #define CK_F_PR_DEC_PTR_ZERO CK_PR_UNARY_Z(dec, ptr, void, uintptr_t, -, void *, 1) +#else +CK_PR_UNARY_Z_STUB(dec, ptr, void) #endif /* CK_F_PR_DEC_PTR_ZERO */ #endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */ @@ -753,6 +793,8 @@ CK_PR_UNARY_S(inc, add, 64, uint64_t) #ifndef CK_F_PR_INC_64_ZERO #define CK_F_PR_INC_64_ZERO CK_PR_UNARY_Z_S(inc, 64, uint64_t, +, UINT64_MAX) +#else +CK_PR_UNARY_Z_STUB(inc, 64, uint64_t) #endif /* CK_F_PR_INC_64_ZERO */ #ifndef CK_F_PR_DEC_64 @@ -763,6 +805,8 @@ CK_PR_UNARY_S(dec, sub, 64, uint64_t) #ifndef CK_F_PR_DEC_64_ZERO #define CK_F_PR_DEC_64_ZERO CK_PR_UNARY_Z_S(dec, 64, uint64_t, -, 1) +#else +CK_PR_UNARY_Z_STUB(dec, 64, uint64_t) #endif /* CK_F_PR_DEC_64_ZERO */ #endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */ @@ -777,6 +821,8 @@ CK_PR_UNARY_S(inc, add, 32, uint32_t) #ifndef CK_F_PR_INC_32_ZERO #define CK_F_PR_INC_32_ZERO CK_PR_UNARY_Z_S(inc, 32, uint32_t, +, UINT32_MAX) +#else +CK_PR_UNARY_Z_STUB(inc, 32, uint32_t) #endif /* CK_F_PR_INC_32_ZERO */ #ifndef CK_F_PR_DEC_32 @@ -787,6 +833,8 @@ CK_PR_UNARY_S(dec, sub, 32, uint32_t) #ifndef CK_F_PR_DEC_32_ZERO #define CK_F_PR_DEC_32_ZERO CK_PR_UNARY_Z_S(dec, 32, uint32_t, -, 1) +#else +CK_PR_UNARY_Z_STUB(dec, 32, uint32_t) #endif /* CK_F_PR_DEC_32_ZERO */ #endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */ @@ -801,6 +849,8 @@ CK_PR_UNARY_S(inc, add, 16, uint16_t) #ifndef CK_F_PR_INC_16_ZERO #define CK_F_PR_INC_16_ZERO CK_PR_UNARY_Z_S(inc, 16, uint16_t, +, UINT16_MAX) +#else +CK_PR_UNARY_Z_STUB(inc, 16, uint16_t) #endif /* CK_F_PR_INC_16_ZERO */ #ifndef CK_F_PR_DEC_16 @@ -811,6 +861,8 @@ CK_PR_UNARY_S(dec, sub, 16, uint16_t) #ifndef CK_F_PR_DEC_16_ZERO #define CK_F_PR_DEC_16_ZERO CK_PR_UNARY_Z_S(dec, 16, uint16_t, -, 1) +#else +CK_PR_UNARY_Z_STUB(dec, 16, uint16_t) #endif /* CK_F_PR_DEC_16_ZERO */ #endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */ @@ -825,6 +877,8 @@ CK_PR_UNARY_S(inc, add, 8, uint8_t) #ifndef CK_F_PR_INC_8_ZERO #define CK_F_PR_INC_8_ZERO CK_PR_UNARY_Z_S(inc, 8, uint8_t, +, UINT8_MAX) +#else +CK_PR_UNARY_Z_STUB(inc, 8, uint8_t) #endif /* CK_F_PR_INC_8_ZERO */ #ifndef CK_F_PR_DEC_8 @@ -835,6 +889,8 @@ CK_PR_UNARY_S(dec, sub, 8, uint8_t) #ifndef CK_F_PR_DEC_8_ZERO #define CK_F_PR_DEC_8_ZERO CK_PR_UNARY_Z_S(dec, 8, uint8_t, -, 1) +#else +CK_PR_UNARY_Z_STUB(dec, 8, uint8_t) #endif /* CK_F_PR_DEC_8_ZERO */ #endif /* CK_F_PR_LOAD_8 && CK_F_PR_CAS_8_VALUE */ diff --git a/include/ck_queue.h b/include/ck_queue.h index c1e9872..fd38d8a 100644 --- a/include/ck_queue.h +++ b/include/ck_queue.h @@ -125,7 +125,7 @@ */ #define CK_SLIST_HEAD(name, type) \ struct name { \ - struct type *slh_first; /* first element */ \ + struct type *cslh_first; /* first element */ \ } #define CK_SLIST_HEAD_INITIALIZER(head) \ @@ -133,85 +133,95 @@ struct name { \ #define CK_SLIST_ENTRY(type) \ struct { \ - struct type *sle_next; /* next element */ \ + struct type *csle_next; /* next element */ \ } /* * Singly-linked List functions. */ #define CK_SLIST_EMPTY(head) \ - (ck_pr_load_ptr(&(head)->slh_first) == NULL) + (ck_pr_load_ptr(&(head)->cslh_first) == NULL) #define CK_SLIST_FIRST(head) \ - (ck_pr_load_ptr(&(head)->slh_first)) + (ck_pr_load_ptr(&(head)->cslh_first)) #define CK_SLIST_NEXT(elm, field) \ - ck_pr_load_ptr(&((elm)->field.sle_next)) + ck_pr_load_ptr(&((elm)->field.csle_next)) #define CK_SLIST_FOREACH(var, head, field) \ for ((var) = CK_SLIST_FIRST((head)); \ - (var) && (ck_pr_fence_load(), 1); \ + (var); \ (var) = CK_SLIST_NEXT((var), field)) -#define CK_SLIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = CK_SLIST_FIRST(head); \ - (var) && (ck_pr_fence_load(), (tvar) = CK_SLIST_NEXT(var, field), 1);\ +#define CK_SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = CK_SLIST_FIRST(head); \ + (var) && ((tvar) = CK_SLIST_NEXT(var, field), 1); \ (var) = (tvar)) #define CK_SLIST_FOREACH_PREVPTR(var, varp, head, field) \ - for ((varp) = &(head)->slh_first; \ - ((var) = ck_pr_load_ptr(varp)) != NULL && (ck_pr_fence_load(), 1); \ - (varp) = &(var)->field.sle_next) + for ((varp) = &(head)->cslh_first; \ + ((var) = ck_pr_load_ptr(varp)) != NULL; \ + (varp) = &(var)->field.csle_next) #define CK_SLIST_INIT(head) do { \ - ck_pr_store_ptr(&(head)->slh_first, NULL); \ + ck_pr_store_ptr(&(head)->cslh_first, NULL); \ ck_pr_fence_store(); \ } while (0) #define CK_SLIST_INSERT_AFTER(a, b, field) do { \ - (b)->field.sle_next = (a)->field.sle_next; \ + (b)->field.csle_next = (a)->field.csle_next; \ ck_pr_fence_store(); \ - ck_pr_store_ptr(&(a)->field.sle_next, b); \ + ck_pr_store_ptr(&(a)->field.csle_next, b); \ } while (0) #define CK_SLIST_INSERT_HEAD(head, elm, field) do { \ - (elm)->field.sle_next = (head)->slh_first; \ + (elm)->field.csle_next = (head)->cslh_first; \ ck_pr_fence_store(); \ - ck_pr_store_ptr(&(head)->slh_first, elm); \ + ck_pr_store_ptr(&(head)->cslh_first, elm); \ +} while (0) + +#define CK_SLIST_INSERT_PREVPTR(prevp, slistelm, elm, field) do { \ + (elm)->field.csle_next = (slistelm); \ + ck_pr_fence_store(); \ + ck_pr_store_ptr(prevp, elm); \ } while (0) #define CK_SLIST_REMOVE_AFTER(elm, field) do { \ - ck_pr_store_ptr(&(elm)->field.sle_next, \ - (elm)->field.sle_next->field.sle_next); \ + ck_pr_store_ptr(&(elm)->field.csle_next, \ + (elm)->field.csle_next->field.csle_next); \ } while (0) #define CK_SLIST_REMOVE(head, elm, type, field) do { \ - if ((head)->slh_first == (elm)) { \ + if ((head)->cslh_first == (elm)) { \ CK_SLIST_REMOVE_HEAD((head), field); \ } else { \ - struct type *curelm = (head)->slh_first; \ - while (curelm->field.sle_next != (elm)) \ - curelm = curelm->field.sle_next; \ + struct type *curelm = (head)->cslh_first; \ + while (curelm->field.csle_next != (elm)) \ + curelm = curelm->field.csle_next; \ CK_SLIST_REMOVE_AFTER(curelm, field); \ } \ } while (0) #define CK_SLIST_REMOVE_HEAD(head, field) do { \ - ck_pr_store_ptr(&(head)->slh_first, \ - (head)->slh_first->field.sle_next); \ + ck_pr_store_ptr(&(head)->cslh_first, \ + (head)->cslh_first->field.csle_next); \ +} while (0) + +#define CK_SLIST_REMOVE_PREVPTR(prevp, elm, field) do { \ + ck_pr_store_ptr(prevptr, (elm)->field.csle_next); \ } while (0) #define CK_SLIST_MOVE(head1, head2, field) do { \ - ck_pr_store_ptr(&(head1)->slh_first, (head2)->slh_first); \ + ck_pr_store_ptr(&(head1)->cslh_first, (head2)->cslh_first); \ } while (0) /* * This operation is not applied atomically. */ #define CK_SLIST_SWAP(a, b, type) do { \ - struct type *swap_first = (a)->slh_first; \ - (a)->slh_first = (b)->slh_first; \ - (b)->slh_first = swap_first; \ + struct type *swap_first = (a)->cslh_first; \ + (a)->cslh_first = (b)->cslh_first; \ + (b)->cslh_first = swap_first; \ } while (0) /* @@ -219,107 +229,107 @@ struct { \ */ #define CK_STAILQ_HEAD(name, type) \ struct name { \ - struct type *stqh_first;/* first element */ \ - struct type **stqh_last;/* addr of last next element */ \ + struct type *cstqh_first;/* first element */ \ + struct type **cstqh_last;/* addr of last next element */ \ } #define CK_STAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).stqh_first } + { NULL, &(head).cstqh_first } #define CK_STAILQ_ENTRY(type) \ struct { \ - struct type *stqe_next; /* next element */ \ + struct type *cstqe_next; /* next element */ \ } /* * Singly-linked Tail queue functions. */ #define CK_STAILQ_CONCAT(head1, head2) do { \ - if ((head2)->stqh_first == NULL) { \ - ck_pr_store_ptr((head1)->stqh_last, (head2)->stqh_first); \ + if ((head2)->cstqh_first != NULL) { \ + ck_pr_store_ptr((head1)->cstqh_last, (head2)->cstqh_first); \ ck_pr_fence_store(); \ - (head1)->stqh_last = (head2)->stqh_last; \ + (head1)->cstqh_last = (head2)->cstqh_last; \ CK_STAILQ_INIT((head2)); \ } \ } while (0) -#define CK_STAILQ_EMPTY(head) (ck_pr_load_ptr(&(head)->stqh_first) == NULL) +#define CK_STAILQ_EMPTY(head) (ck_pr_load_ptr(&(head)->cstqh_first) == NULL) -#define CK_STAILQ_FIRST(head) (ck_pr_load_ptr(&(head)->stqh_first)) +#define CK_STAILQ_FIRST(head) (ck_pr_load_ptr(&(head)->cstqh_first)) #define CK_STAILQ_FOREACH(var, head, field) \ for((var) = CK_STAILQ_FIRST((head)); \ - (var) && (ck_pr_fence_load(), 1); \ + (var); \ (var) = CK_STAILQ_NEXT((var), field)) #define CK_STAILQ_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = CK_STAILQ_FIRST((head)); \ - (var) && (ck_pr_fence_load(), (tvar) = \ + (var) && ((tvar) = \ CK_STAILQ_NEXT((var), field), 1); \ (var) = (tvar)) #define CK_STAILQ_INIT(head) do { \ - ck_pr_store_ptr(&(head)->stqh_first, NULL); \ + ck_pr_store_ptr(&(head)->cstqh_first, NULL); \ ck_pr_fence_store(); \ - (head)->stqh_last = &(head)->stqh_first; \ + (head)->cstqh_last = &(head)->cstqh_first; \ } while (0) #define CK_STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ - (elm)->field.stqe_next = (tqelm)->field.stqe_next; \ + (elm)->field.cstqe_next = (tqelm)->field.cstqe_next; \ ck_pr_fence_store(); \ - ck_pr_store_ptr(&(tqelm)->field.stqe_next, elm); \ - if ((elm)->field.stqe_next == NULL) \ - (head)->stqh_last = &(elm)->field.stqe_next; \ + ck_pr_store_ptr(&(tqelm)->field.cstqe_next, elm); \ + if ((elm)->field.cstqe_next == NULL) \ + (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_INSERT_HEAD(head, elm, field) do { \ - (elm)->field.stqe_next = (head)->stqh_first; \ + (elm)->field.cstqe_next = (head)->cstqh_first; \ ck_pr_fence_store(); \ - ck_pr_store_ptr(&(head)->stqh_first, elm); \ - if ((elm)->field.stqe_next == NULL) \ - (head)->stqh_last = &(elm)->field.stqe_next; \ + ck_pr_store_ptr(&(head)->cstqh_first, elm); \ + if ((elm)->field.cstqe_next == NULL) \ + (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_INSERT_TAIL(head, elm, field) do { \ - (elm)->field.stqe_next = NULL; \ + (elm)->field.cstqe_next = NULL; \ ck_pr_fence_store(); \ - ck_pr_store_ptr((head)->stqh_last, (elm)); \ - (head)->stqh_last = &(elm)->field.stqe_next; \ + ck_pr_store_ptr((head)->cstqh_last, (elm)); \ + (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_NEXT(elm, field) \ - (ck_pr_load_ptr(&(elm)->field.stqe_next)) + (ck_pr_load_ptr(&(elm)->field.cstqe_next)) #define CK_STAILQ_REMOVE(head, elm, type, field) do { \ - if ((head)->stqh_first == (elm)) { \ + if ((head)->cstqh_first == (elm)) { \ CK_STAILQ_REMOVE_HEAD((head), field); \ } else { \ - struct type *curelm = (head)->stqh_first; \ - while (curelm->field.stqe_next != (elm)) \ - curelm = curelm->field.stqe_next; \ + struct type *curelm = (head)->cstqh_first; \ + while (curelm->field.cstqe_next != (elm)) \ + curelm = curelm->field.cstqe_next; \ CK_STAILQ_REMOVE_AFTER(head, curelm, field); \ } \ } while (0) #define CK_STAILQ_REMOVE_AFTER(head, elm, field) do { \ - ck_pr_store_ptr(&(elm)->field.stqe_next, \ - (elm)->field.stqe_next->field.stqe_next); \ - if ((elm)->field.stqe_next == NULL) \ - (head)->stqh_last = &(elm)->field.stqe_next; \ + ck_pr_store_ptr(&(elm)->field.cstqe_next, \ + (elm)->field.cstqe_next->field.cstqe_next); \ + if ((elm)->field.cstqe_next == NULL) \ + (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_REMOVE_HEAD(head, field) do { \ - ck_pr_store_ptr(&(head)->stqh_first, \ - (head)->stqh_first->field.stqe_next); \ - if ((head)->stqh_first == NULL) \ - (head)->stqh_last = &(head)->stqh_first; \ + ck_pr_store_ptr(&(head)->cstqh_first, \ + (head)->cstqh_first->field.cstqe_next); \ + if ((head)->cstqh_first == NULL) \ + (head)->cstqh_last = &(head)->cstqh_first; \ } while (0) #define CK_STAILQ_MOVE(head1, head2, field) do { \ - ck_pr_store_ptr(&(head1)->stqh_first, (head2)->stqh_first); \ - (head1)->stqh_last = (head2)->stqh_last; \ - if ((head2)->stqh_last == &(head2)->stqh_first) \ - (head1)->stqh_last = &(head1)->stqh_first; \ + ck_pr_store_ptr(&(head1)->cstqh_first, (head2)->cstqh_first); \ + (head1)->cstqh_last = (head2)->cstqh_last; \ + if ((head2)->cstqh_last == &(head2)->cstqh_first) \ + (head1)->cstqh_last = &(head1)->cstqh_first; \ } while (0) /* @@ -327,15 +337,15 @@ struct { \ */ #define CK_STAILQ_SWAP(head1, head2, type) do { \ struct type *swap_first = CK_STAILQ_FIRST(head1); \ - struct type **swap_last = (head1)->stqh_last; \ + struct type **swap_last = (head1)->cstqh_last; \ CK_STAILQ_FIRST(head1) = CK_STAILQ_FIRST(head2); \ - (head1)->stqh_last = (head2)->stqh_last; \ + (head1)->cstqh_last = (head2)->cstqh_last; \ CK_STAILQ_FIRST(head2) = swap_first; \ - (head2)->stqh_last = swap_last; \ + (head2)->cstqh_last = swap_last; \ if (CK_STAILQ_EMPTY(head1)) \ - (head1)->stqh_last = &(head1)->stqh_first; \ + (head1)->cstqh_last = &(head1)->cstqh_first; \ if (CK_STAILQ_EMPTY(head2)) \ - (head2)->stqh_last = &(head2)->stqh_first; \ + (head2)->cstqh_last = &(head2)->cstqh_first; \ } while (0) /* @@ -343,7 +353,7 @@ struct { \ */ #define CK_LIST_HEAD(name, type) \ struct name { \ - struct type *lh_first; /* first element */ \ + struct type *clh_first; /* first element */ \ } #define CK_LIST_HEAD_INITIALIZER(head) \ @@ -351,78 +361,78 @@ struct name { \ #define CK_LIST_ENTRY(type) \ struct { \ - struct type *le_next; /* next element */ \ - struct type **le_prev; /* address of previous next element */ \ + struct type *cle_next; /* next element */ \ + struct type **cle_prev; /* address of previous next element */ \ } -#define CK_LIST_FIRST(head) ck_pr_load_ptr(&(head)->lh_first) +#define CK_LIST_FIRST(head) ck_pr_load_ptr(&(head)->clh_first) #define CK_LIST_EMPTY(head) (CK_LIST_FIRST(head) == NULL) -#define CK_LIST_NEXT(elm, field) ck_pr_load_ptr(&(elm)->field.le_next) +#define CK_LIST_NEXT(elm, field) ck_pr_load_ptr(&(elm)->field.cle_next) #define CK_LIST_FOREACH(var, head, field) \ for ((var) = CK_LIST_FIRST((head)); \ - (var) && (ck_pr_fence_load(), 1); \ + (var); \ (var) = CK_LIST_NEXT((var), field)) #define CK_LIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = CK_LIST_FIRST((head)); \ - (var) && (ck_pr_fence_load(), (tvar) = CK_LIST_NEXT((var), field), 1);\ + (var) && ((tvar) = CK_LIST_NEXT((var), field), 1); \ (var) = (tvar)) #define CK_LIST_INIT(head) do { \ - ck_pr_store_ptr(&(head)->lh_first, NULL); \ + ck_pr_store_ptr(&(head)->clh_first, NULL); \ ck_pr_fence_store(); \ } while (0) #define CK_LIST_INSERT_AFTER(listelm, elm, field) do { \ - (elm)->field.le_next = (listelm)->field.le_next; \ - (elm)->field.le_prev = &(listelm)->field.le_next; \ + (elm)->field.cle_next = (listelm)->field.cle_next; \ + (elm)->field.cle_prev = &(listelm)->field.cle_next; \ ck_pr_fence_store(); \ - if ((listelm)->field.le_next != NULL) \ - (listelm)->field.le_next->field.le_prev = &(elm)->field.le_next;\ - ck_pr_store_ptr(&(listelm)->field.le_next, elm); \ + if ((listelm)->field.cle_next != NULL) \ + (listelm)->field.cle_next->field.cle_prev = &(elm)->field.cle_next;\ + ck_pr_store_ptr(&(listelm)->field.cle_next, elm); \ } while (0) #define CK_LIST_INSERT_BEFORE(listelm, elm, field) do { \ - (elm)->field.le_prev = (listelm)->field.le_prev; \ - (elm)->field.le_next = (listelm); \ + (elm)->field.cle_prev = (listelm)->field.cle_prev; \ + (elm)->field.cle_next = (listelm); \ ck_pr_fence_store(); \ - ck_pr_store_ptr((listelm)->field.le_prev, (elm)); \ - (listelm)->field.le_prev = &(elm)->field.le_next; \ + ck_pr_store_ptr((listelm)->field.cle_prev, (elm)); \ + (listelm)->field.cle_prev = &(elm)->field.cle_next; \ } while (0) #define CK_LIST_INSERT_HEAD(head, elm, field) do { \ - (elm)->field.le_next = (head)->lh_first; \ + (elm)->field.cle_next = (head)->clh_first; \ ck_pr_fence_store(); \ - if ((elm)->field.le_next != NULL) \ - (head)->lh_first->field.le_prev = &(elm)->field.le_next; \ - ck_pr_store_ptr(&(head)->lh_first, elm); \ - (elm)->field.le_prev = &(head)->lh_first; \ + if ((elm)->field.cle_next != NULL) \ + (head)->clh_first->field.cle_prev = &(elm)->field.cle_next; \ + ck_pr_store_ptr(&(head)->clh_first, elm); \ + (elm)->field.cle_prev = &(head)->clh_first; \ } while (0) #define CK_LIST_REMOVE(elm, field) do { \ - ck_pr_store_ptr((elm)->field.le_prev, (elm)->field.le_next); \ - if ((elm)->field.le_next != NULL) \ - (elm)->field.le_next->field.le_prev = (elm)->field.le_prev; \ + ck_pr_store_ptr((elm)->field.cle_prev, (elm)->field.cle_next); \ + if ((elm)->field.cle_next != NULL) \ + (elm)->field.cle_next->field.cle_prev = (elm)->field.cle_prev; \ } while (0) #define CK_LIST_MOVE(head1, head2, field) do { \ - ck_pr_store_ptr(&(head1)->lh_first, (head2)->lh_first); \ - if ((head1)->lh_first != NULL) \ - (head1)->lh_first->field.le_prev = &(head1)->lh_first; \ + ck_pr_store_ptr(&(head1)->clh_first, (head2)->clh_first); \ + if ((head1)->clh_first != NULL) \ + (head1)->clh_first->field.cle_prev = &(head1)->clh_first; \ } while (0) /* * This operation is not applied atomically. */ #define CK_LIST_SWAP(head1, head2, type, field) do { \ - struct type *swap_tmp = (head1)->lh_first; \ - (head1)->lh_first = (head2)->lh_first; \ - (head2)->lh_first = swap_tmp; \ - if ((swap_tmp = (head1)->lh_first) != NULL) \ - swap_tmp->field.le_prev = &(head1)->lh_first; \ - if ((swap_tmp = (head2)->lh_first) != NULL) \ - swap_tmp->field.le_prev = &(head2)->lh_first; \ + struct type *swap_tmp = (head1)->clh_first; \ + (head1)->clh_first = (head2)->clh_first; \ + (head2)->clh_first = swap_tmp; \ + if ((swap_tmp = (head1)->clh_first) != NULL) \ + swap_tmp->field.cle_prev = &(head1)->clh_first; \ + if ((swap_tmp = (head2)->clh_first) != NULL) \ + swap_tmp->field.cle_prev = &(head2)->clh_first; \ } while (0) #endif /* CK_QUEUE_H */ diff --git a/include/ck_ring.h b/include/ck_ring.h index 8a2a791..9f6754e 100644 --- a/include/ck_ring.h +++ b/include/ck_ring.h @@ -66,9 +66,56 @@ ck_ring_size(const struct ck_ring *ring) CK_CC_INLINE static unsigned int ck_ring_capacity(const struct ck_ring *ring) { + return ring->size; } +/* + * This function is only safe to call when there are no concurrent operations + * on the ring. This is primarily meant for persistent ck_ring use-cases. The + * function returns true if any mutations were performed on the ring. + */ +CK_CC_INLINE static bool +ck_ring_repair(struct ck_ring *ring) +{ + bool r = false; + + if (ring->p_tail != ring->p_head) { + ring->p_tail = ring->p_head; + r = true; + } + + return r; +} + +/* + * This can be called when no concurrent updates are occurring on the ring + * structure to check for consistency. This is primarily meant to be used for + * persistent storage of the ring. If this functions returns false, the ring + * is in an inconsistent state. + */ +CK_CC_INLINE static bool +ck_ring_valid(const struct ck_ring *ring) +{ + unsigned int size = ring->size; + unsigned int c_head = ring->c_head; + unsigned int p_head = ring->p_head; + + /* The ring must be a power of 2. */ + if (size & (size - 1)) + return false; + + /* The consumer counter must always be smaller than the producer. */ + if (c_head > p_head) + return false; + + /* The producer may only be up to size slots ahead of consumer. */ + if (p_head - c_head >= size) + return false; + + return true; +} + CK_CC_INLINE static void ck_ring_init(struct ck_ring *ring, unsigned int size) { @@ -84,6 +131,45 @@ ck_ring_init(struct ck_ring *ring, unsigned int size) /* * The _ck_ring_* namespace is internal only and must not used externally. */ + +/* + * This function will return a region of memory to write for the next value + * for a single producer. + */ +CK_CC_FORCE_INLINE static void * +_ck_ring_enqueue_reserve_sp(struct ck_ring *ring, + void *CK_CC_RESTRICT buffer, + unsigned int ts, + unsigned int *size) +{ + const unsigned int mask = ring->mask; + unsigned int consumer, producer, delta; + + consumer = ck_pr_load_uint(&ring->c_head); + producer = ring->p_tail; + delta = producer + 1; + if (size != NULL) + *size = (producer - consumer) & mask; + + if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask))) + return NULL; + + return (char *)buffer + ts * (producer & mask); +} + +/* + * This is to be called to commit and make visible a region of previously + * reserved with reverse_sp. + */ +CK_CC_FORCE_INLINE static void +_ck_ring_enqueue_commit_sp(struct ck_ring *ring) +{ + + ck_pr_fence_store(); + ck_pr_store_uint(&ring->p_tail, ring->p_tail + 1); + return; +} + CK_CC_FORCE_INLINE static bool _ck_ring_enqueue_sp(struct ck_ring *ring, void *CK_CC_RESTRICT buffer, @@ -163,6 +249,65 @@ _ck_ring_dequeue_sc(struct ck_ring *ring, return true; } +CK_CC_FORCE_INLINE static void * +_ck_ring_enqueue_reserve_mp(struct ck_ring *ring, + void *buffer, + unsigned int ts, + unsigned int *ticket, + unsigned int *size) +{ + const unsigned int mask = ring->mask; + unsigned int producer, consumer, delta; + + producer = ck_pr_load_uint(&ring->p_head); + + for (;;) { + ck_pr_fence_load(); + consumer = ck_pr_load_uint(&ring->c_head); + + delta = producer + 1; + + if (CK_CC_LIKELY((producer - consumer) < mask)) { + if (ck_pr_cas_uint_value(&ring->p_head, + producer, delta, &producer) == true) { + break; + } + } else { + unsigned int new_producer; + + ck_pr_fence_load(); + new_producer = ck_pr_load_uint(&ring->p_head); + + if (producer == new_producer) { + if (size != NULL) + *size = (producer - consumer) & mask; + + return false; + } + + producer = new_producer; + } + } + + *ticket = producer; + if (size != NULL) + *size = (producer - consumer) & mask; + + return (char *)buffer + ts * (producer & mask); +} + +CK_CC_FORCE_INLINE static void +_ck_ring_enqueue_commit_mp(struct ck_ring *ring, unsigned int producer) +{ + + while (ck_pr_load_uint(&ring->p_tail) != producer) + ck_pr_stall(); + + ck_pr_fence_store(); + ck_pr_store_uint(&ring->p_tail, producer + 1); + return; +} + CK_CC_FORCE_INLINE static bool _ck_ring_enqueue_mp(struct ck_ring *ring, void *buffer, @@ -176,23 +321,54 @@ _ck_ring_enqueue_mp(struct ck_ring *ring, producer = ck_pr_load_uint(&ring->p_head); - do { + for (;;) { /* - * The snapshot of producer must be up to date with - * respect to consumer. + * The snapshot of producer must be up to date with respect to + * consumer. */ ck_pr_fence_load(); consumer = ck_pr_load_uint(&ring->c_head); delta = producer + 1; - if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask))) { - r = false; - goto leave; + + /* + * Only try to CAS if the producer is not clearly stale (not + * less than consumer) and the buffer is definitely not full. + */ + if (CK_CC_LIKELY((producer - consumer) < mask)) { + if (ck_pr_cas_uint_value(&ring->p_head, + producer, delta, &producer) == true) { + break; + } + } else { + unsigned int new_producer; + + /* + * Slow path. Either the buffer is full or we have a + * stale snapshot of p_head. Execute a second read of + * p_read that must be ordered wrt the snapshot of + * c_head. + */ + ck_pr_fence_load(); + new_producer = ck_pr_load_uint(&ring->p_head); + + /* + * Only fail if we haven't made forward progress in + * production: the buffer must have been full when we + * read new_producer (or we wrapped around UINT_MAX + * during this iteration). + */ + if (producer == new_producer) { + r = false; + goto leave; + } + + /* + * p_head advanced during this iteration. Try again. + */ + producer = new_producer; } - } while (ck_pr_cas_uint_value(&ring->p_head, - producer, - delta, - &producer) == false); + } buffer = (char *)buffer + ts * (producer & mask); memcpy(buffer, entry, ts); @@ -323,6 +499,33 @@ ck_ring_enqueue_spsc(struct ck_ring *ring, &entry, sizeof(entry), NULL); } +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spsc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), + size); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spsc(struct ck_ring *ring, + struct ck_ring_buffer *buffer) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), + NULL); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_spsc(struct ck_ring *ring) +{ + + _ck_ring_enqueue_commit_sp(ring); + return; +} + CK_CC_INLINE static bool ck_ring_dequeue_spsc(struct ck_ring *ring, const struct ck_ring_buffer *buffer, @@ -344,8 +547,7 @@ ck_ring_enqueue_mpmc(struct ck_ring *ring, const void *entry) { - return _ck_ring_enqueue_mp(ring, buffer, &entry, - sizeof(entry), NULL); + return _ck_ring_enqueue_mp(ring, buffer, &entry, sizeof(entry), NULL); } CK_CC_INLINE static bool @@ -355,8 +557,37 @@ ck_ring_enqueue_mpmc_size(struct ck_ring *ring, unsigned int *size) { - return _ck_ring_enqueue_mp_size(ring, buffer, &entry, - sizeof(entry), size); + return _ck_ring_enqueue_mp_size(ring, buffer, &entry, sizeof(entry), + size); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpmc(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, NULL); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpmc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, size); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_mpmc(struct ck_ring *ring, unsigned int ticket) +{ + + _ck_ring_enqueue_commit_mp(ring, ticket); + return; } CK_CC_INLINE static bool @@ -384,6 +615,31 @@ ck_ring_dequeue_mpmc(struct ck_ring *ring, * ring buffer containing pointers. Correctness is provided for any number of * consumers with up to one concurrent producer. */ +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spmc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), size); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spmc(struct ck_ring *ring, + struct ck_ring_buffer *buffer) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), NULL); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_spmc(struct ck_ring *ring) +{ + + _ck_ring_enqueue_commit_sp(ring); + return; +} + CK_CC_INLINE static bool ck_ring_enqueue_spmc_size(struct ck_ring *ring, struct ck_ring_buffer *buffer, @@ -428,6 +684,35 @@ ck_ring_dequeue_spmc(struct ck_ring *ring, * ring buffer containing pointers. Correctness is provided for any number of * producers with up to one concurrent consumers. */ +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpsc(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, NULL); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpsc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, size); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_mpsc(struct ck_ring *ring, unsigned int ticket) +{ + + _ck_ring_enqueue_commit_mp(ring, ticket); + return; +} + CK_CC_INLINE static bool ck_ring_enqueue_mpsc(struct ck_ring *ring, struct ck_ring_buffer *buffer, @@ -463,194 +748,290 @@ ck_ring_dequeue_mpsc(struct ck_ring *ring, * CK_RING_PROTOTYPE is used to define a type-safe interface for inlining * values of a particular type in the ring the buffer. */ -#define CK_RING_PROTOTYPE(name, type) \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spsc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_sp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_sp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_spsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_sc(a, b, c, \ - sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spmc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_sp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_sp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_trydequeue_spmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_trydequeue_mc(a, \ - b, c, sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_spmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_mc(a, b, c, \ - sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_mp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpsc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_mp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_mpsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_sc(a, b, c, \ - sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpmc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_mp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_mp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_trydequeue_mpmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_trydequeue_mc(a, \ - b, c, sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_mpmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_mc(a, b, c, \ - sizeof(struct type)); \ +#define CK_RING_PROTOTYPE(name, type) \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spsc_##name(struct ck_ring *a, \ + struct type *b) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), c); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_sp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_sp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_spsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_sc(a, b, c, \ + sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spmc_##name(struct ck_ring *a, \ + struct type *b) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), c); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_sp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_sp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_trydequeue_spmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_trydequeue_mc(a, \ + b, c, sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_spmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_mc(a, b, c, \ + sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpsc_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_mp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_mp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_mpsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_sc(a, b, c, \ + sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_mp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_mp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_trydequeue_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_trydequeue_mc(a, \ + b, c, sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_mc(a, b, c, \ + sizeof(struct type)); \ } /* * A single producer with one concurrent consumer. */ -#define CK_RING_ENQUEUE_SPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_SPSC(name, a, b, c) \ ck_ring_enqueue_spsc_##name(a, b, c) -#define CK_RING_ENQUEUE_SPSC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_SPSC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_spsc_size_##name(a, b, c, d) -#define CK_RING_DEQUEUE_SPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_SPSC(name, a, b, c) \ + ck_ring_enqueue_reserve_spsc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_SPSC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_spsc_size_##name(a, b, c, d) +#define CK_RING_DEQUEUE_SPSC(name, a, b, c) \ ck_ring_dequeue_spsc_##name(a, b, c) /* * A single producer with any number of concurrent consumers. */ -#define CK_RING_ENQUEUE_SPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_SPMC(name, a, b, c) \ ck_ring_enqueue_spmc_##name(a, b, c) -#define CK_RING_ENQUEUE_SPMC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_SPMC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_spmc_size_##name(a, b, c, d) -#define CK_RING_TRYDEQUEUE_SPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_SPMC(name, a, b, c) \ + ck_ring_enqueue_reserve_spmc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_SPMC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_spmc_size_##name(a, b, c, d) +#define CK_RING_TRYDEQUEUE_SPMC(name, a, b, c) \ ck_ring_trydequeue_spmc_##name(a, b, c) -#define CK_RING_DEQUEUE_SPMC(name, a, b, c) \ +#define CK_RING_DEQUEUE_SPMC(name, a, b, c) \ ck_ring_dequeue_spmc_##name(a, b, c) /* * Any number of concurrent producers with up to one * concurrent consumer. */ -#define CK_RING_ENQUEUE_MPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_MPSC(name, a, b, c) \ ck_ring_enqueue_mpsc_##name(a, b, c) -#define CK_RING_ENQUEUE_MPSC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_MPSC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_mpsc_size_##name(a, b, c, d) -#define CK_RING_DEQUEUE_MPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_MPSC(name, a, b, c) \ + ck_ring_enqueue_reserve_mpsc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_MPSC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_mpsc_size_##name(a, b, c, d) +#define CK_RING_DEQUEUE_MPSC(name, a, b, c) \ ck_ring_dequeue_mpsc_##name(a, b, c) /* * Any number of concurrent producers and consumers. */ -#define CK_RING_ENQUEUE_MPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_MPMC(name, a, b, c) \ ck_ring_enqueue_mpmc_##name(a, b, c) -#define CK_RING_ENQUEUE_MPMC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_MPMC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_mpmc_size_##name(a, b, c, d) -#define CK_RING_TRYDEQUEUE_MPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_MPMC(name, a, b, c) \ + ck_ring_enqueue_reserve_mpmc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_MPMC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_mpmc_size_##name(a, b, c, d) +#define CK_RING_TRYDEQUEUE_MPMC(name, a, b, c) \ ck_ring_trydequeue_mpmc_##name(a, b, c) -#define CK_RING_DEQUEUE_MPMC(name, a, b, c) \ +#define CK_RING_DEQUEUE_MPMC(name, a, b, c) \ ck_ring_dequeue_mpmc_##name(a, b, c) #endif /* CK_RING_H */ diff --git a/include/freebsd/ck_md.h.in b/include/freebsd/ck_md.h.in new file mode 100644 index 0000000..7fb6c64 --- /dev/null +++ b/include/freebsd/ck_md.h.in @@ -0,0 +1,133 @@ +/* + * Copyright 2018 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This header file is meant for use of Concurrency Kit in the FreeBSD kernel. + */ + +#ifndef CK_MD_H +#define CK_MD_H + +#include <sys/param.h> + +#ifndef _KERNEL +#error This header file is meant for the FreeBSD kernel. +#endif /* _KERNEL */ + +#ifndef CK_MD_CACHELINE +/* + * FreeBSD's CACHE_LINE macro is a compile-time maximum cache-line size for an + * architecture, defined to be 128 bytes by default on x86*. Even in presence + * of adjacent sector prefetch, this doesn't make sense from a modeling + * perspective. + */ +#if defined(__amd64__) || defined(__i386__) +#define CK_MD_CACHELINE (64) +#else +#define CK_MD_CACHELINE (CACHE_LINE_SIZE) +#endif /* !__amd64__ && !__i386__ */ +#endif /* CK_MD_CACHELINE */ + +#ifndef CK_MD_PAGESIZE +#define CK_MD_PAGESIZE (PAGE_SIZE) +#endif + +/* + * Once FreeBSD has a mechanism to detect RTM, this can be enabled and RTM + * facilities can be called. These facilities refer to TSX. + */ +#ifndef CK_MD_RTM_DISABLE +#define CK_MD_RTM_DISABLE +#endif /* CK_MD_RTM_DISABLE */ + +/* + * Do not enable pointer-packing-related (VMA) optimizations in kernel-space. + */ +#ifndef CK_MD_POINTER_PACK_DISABLE +#define CK_MD_POINTER_PACK_DISABLE +#endif /* CK_MD_POINTER_PACK_DISABLE */ + +/* + * The following would be used for pointer-packing tricks, disabled for the + * kernel. + */ +#ifndef CK_MD_VMA_BITS_UNKNOWN +#define CK_MD_VMA_BITS_UNKNOWN +#endif /* CK_MD_VMA_BITS_UNKNOWN */ + +/* + * Do not enable double operations in kernel-space. + */ +#ifndef CK_PR_DISABLE_DOUBLE +#define CK_PR_DISABLE_DOUBLE +#endif /* CK_PR_DISABLE_DOUBLE */ + +/* + * If building for a uni-processor target, then enable the uniprocessor + * feature flag. This, among other things, will remove the lock prefix. + */ +#ifndef SMP +#define CK_MD_UMP +#endif /* SMP */ + +/* + * Disable the use of compiler builtin functions. + */ +#define CK_MD_CC_BUILTIN_DISABLE 1 + +/* + * CK expects those, which are normally defined by the build system. + */ +#if defined(__i386__) && !defined(__x86__) +#define __x86__ +/* + * If x86 becomes more relevant, we may want to consider importing in + * __mbk() to avoid potential issues around false sharing. + */ +#define CK_MD_TSO +#define CK_MD_SSE_DISABLE 1 +#elif defined(__amd64__) +#define CK_MD_TSO +#elif defined(__sparc64__) && !defined(__sparcv9__) +#define __sparcv9__ +#define CK_MD_TSO +#elif defined(__powerpc64__) && !defined(__ppc64__) +#define __ppc64__ +#elif defined(__powerpc__) && !defined(__ppc__) +#define __ppc__ +#endif + +/* If no memory model has been defined, assume RMO. */ +#if !defined(CK_MD_RMO) && !defined(CK_MD_TSO) && !defined(CK_MD_PSO) +#define CK_MD_RMO +#endif + +#define CK_VERSION "@VERSION@" +#define CK_GIT_SHA "@GIT_SHA@" + +#endif /* CK_MD_H */ diff --git a/include/gcc/aarch64/ck_pr.h b/include/gcc/aarch64/ck_pr.h index e739c4d..0a47307 100644 --- a/include/gcc/aarch64/ck_pr.h +++ b/include/gcc/aarch64/ck_pr.h @@ -92,7 +92,7 @@ CK_PR_FENCE(unlock, CK_DMB_SY) ck_pr_md_load_##S(const M *target) \ { \ long r = 0; \ - __asm__ __volatile__(I " %w0, [%1];" \ + __asm__ __volatile__(I " %w0, [%1]\n" \ : "=r" (r) \ : "r" (target) \ : "memory"); \ @@ -103,7 +103,7 @@ CK_PR_FENCE(unlock, CK_DMB_SY) ck_pr_md_load_##S(const M *target) \ { \ long r = 0; \ - __asm__ __volatile__(I " %0, [%1];" \ + __asm__ __volatile__(I " %0, [%1]\n" \ : "=r" (r) \ : "r" (target) \ : "memory"); \ @@ -195,10 +195,10 @@ CK_PR_STORE_S_64(double, double, "str") T previous = 0; \ T tmp = 0; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "neg %" R "0, %" R "0;" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + "neg %" R "0, %" R "0\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target) \ diff --git a/include/gcc/aarch64/ck_pr_llsc.h b/include/gcc/aarch64/ck_pr_llsc.h index aa4e309..6500d96 100644 --- a/include/gcc/aarch64/ck_pr_llsc.h +++ b/include/gcc/aarch64/ck_pr_llsc.h @@ -38,17 +38,17 @@ ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], u uint64_t tmp1, tmp2; __asm__ __volatile__("1:" - "ldxp %0, %1, [%4];" - "mov %2, %0;" - "mov %3, %1;" - "eor %0, %0, %5;" - "eor %1, %1, %6;" - "orr %1, %0, %1;" - "mov %w0, #0;" - "cbnz %1, 2f;" - "stxp %w0, %7, %8, [%4];" - "cbnz %w0, 1b;" - "mov %w0, #1;" + "ldxp %0, %1, [%4]\n" + "mov %2, %0\n" + "mov %3, %1\n" + "eor %0, %0, %5\n" + "eor %1, %1, %6\n" + "orr %1, %0, %1\n" + "mov %w0, #0\n" + "cbnz %1, 2f\n" + "stxp %w0, %7, %8, [%4]\n" + "cbnz %w0, 1b\n" + "mov %w0, #1\n" "2:" : "=&r" (tmp1), "=&r" (tmp2), "=&r" (value[0]), "=&r" (value[1]) : "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1]) @@ -72,15 +72,15 @@ ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2]) uint64_t tmp1, tmp2; __asm__ __volatile__("1:" - "ldxp %0, %1, [%2];" - "eor %0, %0, %3;" - "eor %1, %1, %4;" - "orr %1, %0, %1;" - "mov %w0, #0;" - "cbnz %1, 2f;" - "stxp %w0, %5, %6, [%2];" - "cbnz %w0, 1b;" - "mov %w0, #1;" + "ldxp %0, %1, [%2]\n" + "eor %0, %0, %3\n" + "eor %1, %1, %4\n" + "orr %1, %0, %1\n" + "mov %w0, #0\n" + "cbnz %1, 2f\n" + "stxp %w0, %5, %6, [%2]\n" + "cbnz %w0, 1b\n" + "mov %w0, #1\n" "2:" : "=&r" (tmp1), "=&r" (tmp2) : "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1]) @@ -103,12 +103,12 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) { \ T previous; \ T tmp; \ - __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "cmp %" R "0, %" R "4;" \ - "b.ne 2f;" \ - "stxr" W " %w1, %" R "3, [%2];" \ - "cbnz %w1, 1b;" \ + __asm__ __volatile__("1:\n" \ + "ldxr" W " %" R "0, [%2]\n" \ + "cmp %" R "0, %" R "4\n" \ + "b.ne 2f\n" \ + "stxr" W " %w1, %" R "3, [%2]\n" \ + "cbnz %w1, 1b\n" \ "2:" \ : "=&r" (previous), \ "=&r" (tmp) \ @@ -126,11 +126,11 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) T tmp; \ __asm__ __volatile__( \ "1:" \ - "ldxr" W " %" R "0, [%2];" \ - "cmp %" R "0, %" R "4;" \ - "b.ne 2f;" \ - "stxr" W " %w1, %" R "3, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n" \ + "cmp %" R "0, %" R "4\n" \ + "b.ne 2f\n" \ + "stxr" W " %w1, %" R "3, [%2]\n" \ + "cbnz %w1, 1b\n" \ "2:" \ : "=&r" (previous), \ "=&r" (tmp) \ @@ -167,9 +167,9 @@ CK_PR_CAS_S(char, char, "b", "w") T previous; \ T tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "stxr" W " %w1, %" R "3, [%2];"\ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + "stxr" W " %w1, %" R "3, [%2]\n"\ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ @@ -198,10 +198,10 @@ CK_PR_FAS(char, char, char, "b", "w") T previous = 0; \ T tmp = 0; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - I ";" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + I "\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target) \ @@ -239,10 +239,10 @@ CK_PR_UNARY_S(char, char, "b") T previous; \ T tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];"\ - I " %" R "0, %" R "0, %" R "3;" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + I " %" R "0, %" R "0, %" R "3\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ @@ -286,10 +286,10 @@ ck_pr_faa_ptr(void *target, uintptr_t delta) uintptr_t previous, r, tmp; __asm__ __volatile__("1:" - "ldxr %0, [%3];" - "add %1, %4, %0;" - "stxr %w2, %1, [%3];" - "cbnz %w2, 1b;" + "ldxr %0, [%3]\n" + "add %1, %4, %0\n" + "stxr %w2, %1, [%3]\n" + "cbnz %w2, 1b\n" : "=&r" (previous), "=&r" (r), "=&r" (tmp) @@ -306,9 +306,9 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) uint64_t previous, r, tmp; __asm__ __volatile__("1:" - "ldxr %0, [%3];" - "add %1, %4, %0;" - "stxr %w2, %1, [%3];" + "ldxr %0, [%3]\n" + "add %1, %4, %0\n" + "stxr %w2, %1, [%3]\n" "cbnz %w2, 1b;" : "=&r" (previous), "=&r" (r), @@ -326,10 +326,10 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) { \ T previous, r, tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %w0, [%3];" \ - "add %w1, %w4, %w0;" \ - "stxr" W " %w2, %w1, [%3];" \ - "cbnz %w2, 1b;" \ + "ldxr" W " %w0, [%3]\n" \ + "add %w1, %w4, %w0\n" \ + "stxr" W " %w2, %w1, [%3]\n" \ + "cbnz %w2, 1b\n" \ : "=&r" (previous), \ "=&r" (r), \ "=&r" (tmp) \ diff --git a/include/gcc/aarch64/ck_pr_lse.h b/include/gcc/aarch64/ck_pr_lse.h index e2c9554..e450e72 100644 --- a/include/gcc/aarch64/ck_pr_lse.h +++ b/include/gcc/aarch64/ck_pr_lse.h @@ -29,6 +29,7 @@ #ifndef CK_PR_AARCH64_LSE_H #define CK_PR_AARCH64_LSE_H +#error bite #ifndef CK_PR_H #error Do not include this file directly, use ck_pr.h #endif @@ -43,10 +44,10 @@ ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], u register uint64_t x2 __asm__ ("x2") = set[0]; register uint64_t x3 __asm__ ("x3") = set[1]; - __asm__ __volatile__("casp %0, %1, %4, %5, [%6];" - "eor %2, %0, %7;" - "eor %3, %1, %8;" - "orr %2, %2, %3;" + __asm__ __volatile__("casp %0, %1, %4, %5, [%6]\n" + "eor %2, %0, %7\n" + "eor %3, %1, %8\n" + "orr %2, %2, %3\n" : "+&r" (x0), "+&r" (x1), "=&r" (tmp1), "=&r" (tmp2) : "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1]) : "memory"); @@ -74,10 +75,10 @@ ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2]) register uint64_t x2 __asm__ ("x2") = set[0]; register uint64_t x3 __asm__ ("x3") = set[1]; - __asm__ __volatile__("casp %0, %1, %2, %3, [%4];" - "eor %0, %0, %5;" - "eor %1, %1, %6;" - "orr %0, %0, %1;" + __asm__ __volatile__("casp %0, %1, %2, %3, [%4]\n" + "eor %0, %0, %5\n" + "eor %1, %1, %6\n" + "orr %0, %0, %1\n" : "+&r" (x0), "+&r" (x1) : "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1]) : "memory"); @@ -99,7 +100,7 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) { \ *(T *)value = compare; \ __asm__ __volatile__( \ - "cas" W " %" R "0, %" R "2, [%1];" \ + "cas" W " %" R "0, %" R "2, [%1]\n"\ : "+&r" (*(T *)value) \ : "r" (target), \ "r" (set) \ @@ -111,7 +112,7 @@ ck_pr_cas_ptr_2(void *target, void *compare, void *set) { \ T previous = compare; \ __asm__ __volatile__( \ - "cas" W " %" R "0, %" R "2, [%1];" \ + "cas" W " %" R "0, %" R "2, [%1]\n"\ : "+&r" (previous) \ : "r" (target), \ "r" (set) \ @@ -144,7 +145,7 @@ CK_PR_CAS_S(char, char, "b", "w") { \ T previous; \ __asm__ __volatile__( \ - "swp" W " %" R "2, %" R "0, [%1];" \ + "swp" W " %" R "2, %" R "0, [%1]\n"\ : "=&r" (previous) \ : "r" (target), \ "r" (v) \ @@ -169,8 +170,8 @@ CK_PR_FAS(char, char, char, "b", "w") CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target) \ { \ - __asm__ __volatile__(I ";" \ - "st" S W " " R "0, [%0];" \ + __asm__ __volatile__(I "\n" \ + "st" S W " " R "0, [%0]\n" \ : \ : "r" (target) \ : "x0", "memory"); \ @@ -204,8 +205,8 @@ CK_PR_UNARY_S(char, char, "b") CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target, T delta) \ { \ - __asm__ __volatile__(I ";" \ - "st" S W " %" R "0, [%1];" \ + __asm__ __volatile__(I "\n" \ + "st" S W " %" R "0, [%1]\n"\ : "+&r" (delta) \ : "r" (target) \ : "memory"); \ @@ -247,7 +248,7 @@ ck_pr_faa_ptr(void *target, uintptr_t delta) uintptr_t previous; __asm__ __volatile__( - "ldadd %2, %0, [%1];" + "ldadd %2, %0, [%1]\n" : "=r" (previous) : "r" (target), "r" (delta) @@ -262,7 +263,7 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) uint64_t previous; __asm__ __volatile__( - "ldadd %2, %0, [%1];" + "ldadd %2, %0, [%1]\n" : "=r" (previous) : "r" (target), "r" (delta) @@ -277,7 +278,7 @@ ck_pr_faa_64(uint64_t *target, uint64_t delta) { \ T previous; \ __asm__ __volatile__( \ - "ldadd" W " %w2, %w0, [%1];" \ + "ldadd" W " %w2, %w0, [%1]\n" \ : "=r" (previous) \ : "r" (target), \ "r" (delta) \ diff --git a/include/gcc/ck_cc.h b/include/gcc/ck_cc.h index a14a4b5..0a6d17b 100644 --- a/include/gcc/ck_cc.h +++ b/include/gcc/ck_cc.h @@ -39,6 +39,15 @@ #define CK_CC_UNUSED __attribute__((unused)) #define CK_CC_USED __attribute__((used)) #define CK_CC_IMM "i" + +#define CK_CC_CONTAINER(F, T, M, N) \ + CK_CC_INLINE static T * \ + N(F *p) \ + { \ + \ + return (T *)(void *)((char *)p - __builtin_offsetof(T, M)); \ + } + #if defined(__x86_64__) || defined(__x86__) #define CK_CC_IMM_U32 "Z" #define CK_CC_IMM_S32 "e" @@ -103,28 +112,26 @@ #define CK_CC_TYPEOF(X, DEFAULT) __typeof__(X) /* - * Portability wrappers for bitwise ops. + * Portability wrappers for bitwise operations. */ - +#ifndef CK_MD_CC_BUILTIN_DISABLE #define CK_F_CC_FFS -#define CK_F_CC_CLZ -#define CK_F_CC_CTZ -#define CK_F_CC_POPCOUNT - CK_CC_INLINE static int ck_cc_ffs(unsigned int x) { - return __builtin_ffs(x); + return __builtin_ffsl(x); } +#define CK_F_CC_FFSL CK_CC_INLINE static int -ck_cc_clz(unsigned int x) +ck_cc_ffsl(unsigned long x) { - return __builtin_clz(x); + return __builtin_ffsll(x); } +#define CK_F_CC_CTZ CK_CC_INLINE static int ck_cc_ctz(unsigned int x) { @@ -132,11 +139,12 @@ ck_cc_ctz(unsigned int x) return __builtin_ctz(x); } +#define CK_F_CC_POPCOUNT CK_CC_INLINE static int ck_cc_popcount(unsigned int x) { return __builtin_popcount(x); } - +#endif /* CK_MD_CC_BUILTIN_DISABLE */ #endif /* CK_GCC_CC_H */ diff --git a/include/gcc/ck_pr.h b/include/gcc/ck_pr.h index 084d423..108e983 100644 --- a/include/gcc/ck_pr.h +++ b/include/gcc/ck_pr.h @@ -80,7 +80,7 @@ ck_pr_md_load_ptr(const void *target) void *r; ck_pr_barrier(); - r = CK_CC_DECONST_PTR(CK_PR_ACCESS(target)); + r = CK_CC_DECONST_PTR(*(volatile void *const*)(target)); ck_pr_barrier(); return r; @@ -91,7 +91,7 @@ ck_pr_md_store_ptr(void *target, const void *v) { ck_pr_barrier(); - CK_PR_ACCESS(target) = CK_CC_DECONST_PTR(v); + *(volatile void **)target = CK_CC_DECONST_PTR(v); ck_pr_barrier(); return; } diff --git a/include/gcc/ppc/ck_pr.h b/include/gcc/ppc/ck_pr.h index cd7935d..73f0cb7 100644 --- a/include/gcc/ppc/ck_pr.h +++ b/include/gcc/ppc/ck_pr.h @@ -67,21 +67,29 @@ ck_pr_stall(void) __asm__ __volatile__(I ::: "memory"); \ } -CK_PR_FENCE(atomic, "lwsync") -CK_PR_FENCE(atomic_store, "lwsync") +#ifdef CK_MD_PPC32_LWSYNC +#define CK_PR_LWSYNCOP "lwsync" +#else /* CK_MD_PPC32_LWSYNC_DISABLE */ +#define CK_PR_LWSYNCOP "sync" +#endif + +CK_PR_FENCE(atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(atomic_store, CK_PR_LWSYNCOP) CK_PR_FENCE(atomic_load, "sync") -CK_PR_FENCE(store_atomic, "lwsync") -CK_PR_FENCE(load_atomic, "lwsync") -CK_PR_FENCE(store, "lwsync") +CK_PR_FENCE(store_atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(load_atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(store, CK_PR_LWSYNCOP) CK_PR_FENCE(store_load, "sync") -CK_PR_FENCE(load, "lwsync") -CK_PR_FENCE(load_store, "lwsync") +CK_PR_FENCE(load, CK_PR_LWSYNCOP) +CK_PR_FENCE(load_store, CK_PR_LWSYNCOP) CK_PR_FENCE(memory, "sync") -CK_PR_FENCE(acquire, "lwsync") -CK_PR_FENCE(release, "lwsync") -CK_PR_FENCE(acqrel, "lwsync") -CK_PR_FENCE(lock, "lwsync") -CK_PR_FENCE(unlock, "lwsync") +CK_PR_FENCE(acquire, CK_PR_LWSYNCOP) +CK_PR_FENCE(release, CK_PR_LWSYNCOP) +CK_PR_FENCE(acqrel, CK_PR_LWSYNCOP) +CK_PR_FENCE(lock, CK_PR_LWSYNCOP) +CK_PR_FENCE(unlock, CK_PR_LWSYNCOP) + +#undef CK_PR_LWSYNCOP #undef CK_PR_FENCE diff --git a/include/gcc/s390x/ck_f_pr.h b/include/gcc/s390x/ck_f_pr.h new file mode 100644 index 0000000..cd54a28 --- /dev/null +++ b/include/gcc/s390x/ck_f_pr.h @@ -0,0 +1,97 @@ +/* DO NOT EDIT. This is auto-generated from feature.sh */ +#define CK_F_PR_ADD_32 +#define CK_F_PR_ADD_64 +#define CK_F_PR_ADD_INT +#define CK_F_PR_ADD_PTR +#define CK_F_PR_ADD_UINT +#define CK_F_PR_AND_32 +#define CK_F_PR_AND_64 +#define CK_F_PR_AND_INT +#define CK_F_PR_AND_PTR +#define CK_F_PR_AND_UINT +#define CK_F_PR_CAS_32 +#define CK_F_PR_CAS_32_VALUE +#define CK_F_PR_CAS_64 +#define CK_F_PR_CAS_64_VALUE +#define CK_F_PR_CAS_INT +#define CK_F_PR_CAS_INT_VALUE +#define CK_F_PR_CAS_PTR +#define CK_F_PR_CAS_PTR_VALUE +#define CK_F_PR_CAS_UINT +#define CK_F_PR_CAS_UINT_VALUE +#define CK_F_PR_DEC_32 +#define CK_F_PR_DEC_64 +#define CK_F_PR_DEC_INT +#define CK_F_PR_DEC_PTR +#define CK_F_PR_DEC_UINT +#define CK_F_PR_FAA_32 +#define CK_F_PR_FAA_64 +#define CK_F_PR_FAA_INT +#define CK_F_PR_FAA_PTR +#define CK_F_PR_FAA_UINT +#define CK_F_PR_FAS_32 +#define CK_F_PR_FAS_64 +#define CK_F_PR_FAS_INT +#define CK_F_PR_FAS_PTR +#define CK_F_PR_FAS_UINT +#define CK_F_PR_FAS_DOUBLE +#define CK_F_PR_FENCE_LOAD +#define CK_F_PR_FENCE_LOAD_DEPENDS +#define CK_F_PR_FENCE_MEMORY +#define CK_F_PR_FENCE_STORE +#define CK_F_PR_FENCE_STRICT_LOAD +#define CK_F_PR_FENCE_STRICT_LOAD_DEPENDS +#define CK_F_PR_FENCE_STRICT_MEMORY +#define CK_F_PR_FENCE_STRICT_STORE +#define CK_F_PR_INC_32 +#define CK_F_PR_INC_64 +#define CK_F_PR_INC_INT +#define CK_F_PR_INC_PTR +#define CK_F_PR_INC_UINT +#define CK_F_PR_LOAD_16 +#define CK_F_PR_LOAD_32 +#define CK_F_PR_LOAD_64 +#define CK_F_PR_LOAD_8 +#define CK_F_PR_LOAD_CHAR +#define CK_F_PR_LOAD_DOUBLE +#define CK_F_PR_LOAD_INT +#define CK_F_PR_LOAD_PTR +#define CK_F_PR_LOAD_SHORT +#define CK_F_PR_LOAD_UINT +#define CK_F_PR_NEG_32 +#define CK_F_PR_NEG_64 +#define CK_F_PR_NEG_INT +#define CK_F_PR_NEG_PTR +#define CK_F_PR_NEG_UINT +#define CK_F_PR_NOT_32 +#define CK_F_PR_NOT_64 +#define CK_F_PR_NOT_INT +#define CK_F_PR_NOT_PTR +#define CK_F_PR_NOT_UINT +#define CK_F_PR_OR_32 +#define CK_F_PR_OR_64 +#define CK_F_PR_OR_INT +#define CK_F_PR_OR_PTR +#define CK_F_PR_OR_UINT +#define CK_F_PR_STALL +#define CK_F_PR_STORE_16 +#define CK_F_PR_STORE_32 +#define CK_F_PR_STORE_64 +#define CK_F_PR_STORE_8 +#define CK_F_PR_STORE_CHAR +#define CK_F_PR_STORE_DOUBLE +#define CK_F_PR_STORE_INT +#define CK_F_PR_STORE_PTR +#define CK_F_PR_STORE_SHORT +#define CK_F_PR_STORE_UINT +#define CK_F_PR_SUB_32 +#define CK_F_PR_SUB_64 +#define CK_F_PR_SUB_INT +#define CK_F_PR_SUB_PTR +#define CK_F_PR_SUB_UINT +#define CK_F_PR_XOR_32 +#define CK_F_PR_XOR_64 +#define CK_F_PR_XOR_INT +#define CK_F_PR_XOR_PTR +#define CK_F_PR_XOR_UINT + diff --git a/include/gcc/s390x/ck_pr.h b/include/gcc/s390x/ck_pr.h new file mode 100644 index 0000000..8ad22b2 --- /dev/null +++ b/include/gcc/s390x/ck_pr.h @@ -0,0 +1,373 @@ +/* + * Copyright 2009-2015 Samy Al Bahra. + * Copyright 2017 Neale Ferguson + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef CK_PR_S390X_H +#define CK_PR_S390X_H + +#ifndef CK_PR_H +#error Do not include this file directly, use ck_pr.h +#endif + +#include <ck_cc.h> +#include <ck_md.h> + +/* + * The following represent supported atomic operations. + * These operations may be emulated. + */ +#include "ck_f_pr.h" + +/* + * Minimum interface requirement met. + */ +#define CK_F_PR + +/* + * This bounces the hardware thread from low to medium + * priority. I am unsure of the benefits of this approach + * but it is used by the Linux kernel. + */ +CK_CC_INLINE static void +ck_pr_stall(void) +{ + __sync_synchronize(); + return; +} + +#define CK_PR_FENCE(T) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __sync_synchronize(); \ + } + +/* + * These are derived from: + * http://www.ibm.com/developerworks/systems/articles/powerpc.html + */ +CK_PR_FENCE(atomic) +CK_PR_FENCE(atomic_store) +CK_PR_FENCE(atomic_load) +CK_PR_FENCE(store_atomic) +CK_PR_FENCE(load_atomic) +CK_PR_FENCE(store) +CK_PR_FENCE(store_load) +CK_PR_FENCE(load) +CK_PR_FENCE(load_store) +CK_PR_FENCE(memory) +CK_PR_FENCE(acquire) +CK_PR_FENCE(release) +CK_PR_FENCE(acqrel) +CK_PR_FENCE(lock) +CK_PR_FENCE(unlock) + +#undef CK_PR_FENCE + +#define CK_PR_LOAD(S, M, T, C, I) \ + CK_CC_INLINE static T \ + ck_pr_md_load_##S(const M *target) \ + { \ + T r; \ + __asm__ __volatile__(I "\t%0, %1\n" \ + : "=r" (r) \ + : "Q" (*(const C *)target) \ + : "memory"); \ + return (r); \ + } + +CK_PR_LOAD(ptr, void, void *, uint64_t, "lg") + +#define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) + +CK_PR_LOAD_S(64, uint64_t, "lg") +CK_PR_LOAD_S(32, uint32_t, "llgf") +CK_PR_LOAD_S(16, uint16_t, "llgh") +CK_PR_LOAD_S(8, uint8_t, "llgc") +CK_PR_LOAD_S(uint, unsigned int, "llgf") +CK_PR_LOAD_S(int, int, "llgf") +CK_PR_LOAD_S(short, short, "lgh") +CK_PR_LOAD_S(char, char, "lgb") +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static double +ck_pr_md_load_double(const double *target) +{ + double r; + __asm__ __volatile__("ld %0, %1\n" + : "=f" (r) + : "Q" (*(const double *)target) + : "memory"); + return (r); +} +#endif + +#undef CK_PR_LOAD_S +#undef CK_PR_LOAD + +#define CK_PR_STORE(S, M, T, C, I) \ + CK_CC_INLINE static void \ + ck_pr_md_store_##S(M *target, T v) \ + { \ + __asm__ __volatile__(I "\t%1, %0\n" \ + : "=Q" (*(C *)target) \ + : "r" (v) \ + : "memory"); \ + return; \ + } + +CK_PR_STORE(ptr, void, const void *, uint64_t, "stg") + +#define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) + +CK_PR_STORE_S(64, uint64_t, "stg") +CK_PR_STORE_S(32, uint32_t, "st") +CK_PR_STORE_S(16, uint16_t, "sth") +CK_PR_STORE_S(8, uint8_t, "stc") +CK_PR_STORE_S(uint, unsigned int, "st") +CK_PR_STORE_S(int, int, "st") +CK_PR_STORE_S(short, short, "sth") +CK_PR_STORE_S(char, char, "stc") +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static void +ck_pr_md_store_double(double *target, double v) +{ + __asm__ __volatile__(" std %1, %0\n" + : "=Q" (*(double *)target) + : "f" (v) + : "0", "memory"); +} +#endif + +#undef CK_PR_STORE_S +#undef CK_PR_STORE + +CK_CC_INLINE static bool +ck_pr_cas_64_value(uint64_t *target, uint64_t compare, uint64_t set, uint64_t *value) +{ + *value = __sync_val_compare_and_swap(target,compare,set); + return (*value == compare); +} + +CK_CC_INLINE static bool +ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *value) +{ + uintptr_t previous; + + previous = __sync_val_compare_and_swap((uintptr_t *) target, + (uintptr_t) compare, + (uintptr_t) set); + *((uintptr_t *) value) = previous; + return (previous == (uintptr_t) compare); +} + +CK_CC_INLINE static bool +ck_pr_cas_64(uint64_t *target, uint64_t compare, uint64_t set) +{ + return(__sync_bool_compare_and_swap(target,compare,set)); +} + +CK_CC_INLINE static bool +ck_pr_cas_ptr(void *target, void *compare, void *set) +{ + return(__sync_bool_compare_and_swap((uintptr_t *) target, + (uintptr_t) compare, + (uintptr_t) set)); +} + +#define CK_PR_CAS(N, T) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N##_value(T *target, T compare, T set, T *value) \ + { \ + *value = __sync_val_compare_and_swap(target, \ + compare, \ + set); \ + return(*value == compare); \ + } \ + CK_CC_INLINE static bool \ + ck_pr_cas_##N(T *target, T compare, T set) \ + { \ + return(__sync_bool_compare_and_swap(target, \ + compare, \ + set)); \ + } + +CK_PR_CAS(32, uint32_t) +CK_PR_CAS(uint, unsigned int) +CK_PR_CAS(int, int) + +#undef CK_PR_CAS + +CK_CC_INLINE static void * +ck_pr_fas_ptr(void *target, void *v) +{ + return((void *)__atomic_exchange_n((uintptr_t *) target, (uintptr_t) v, __ATOMIC_ACQUIRE)); +} + +#define CK_PR_FAS(N, M, T) \ + CK_CC_INLINE static T \ + ck_pr_fas_##N(M *target, T v) \ + { \ + return(__atomic_exchange_n(target, v, __ATOMIC_ACQUIRE)); \ + } + +CK_PR_FAS(64, uint64_t, uint64_t) +CK_PR_FAS(32, uint32_t, uint32_t) +CK_PR_FAS(int, int, int) +CK_PR_FAS(uint, unsigned int, unsigned int) + +#ifndef CK_PR_DISABLE_DOUBLE +CK_CC_INLINE static double +ck_pr_fas_double(double *target, double *v) +{ + double previous; + + __asm__ __volatile__ (" lg 1,%2\n" + "0: lg 0,%1\n" + " csg 0,1,%1\n" + " jnz 0b\n" + " ldgr %0,0\n" + : "=f" (previous) + : "Q" (target), "Q" (v) + : "0", "1", "cc", "memory"); + return (previous); +} +#endif + +#undef CK_PR_FAS + +/* + * Atomic store-only binary operations. + */ +#define CK_PR_BINARY(K, S, M, T) \ + CK_CC_INLINE static void \ + ck_pr_##K##_##S(M *target, T d) \ + { \ + d = __sync_fetch_and_##K((T *)target, d); \ + return; \ + } + +#define CK_PR_BINARY_S(K, S, T) CK_PR_BINARY(K, S, T, T) + +#define CK_PR_GENERATE(K) \ + CK_PR_BINARY(K, ptr, void, void *) \ + CK_PR_BINARY_S(K, char, char) \ + CK_PR_BINARY_S(K, int, int) \ + CK_PR_BINARY_S(K, uint, unsigned int) \ + CK_PR_BINARY_S(K, 64, uint64_t) \ + CK_PR_BINARY_S(K, 32, uint32_t) \ + CK_PR_BINARY_S(K, 16, uint16_t) \ + CK_PR_BINARY_S(K, 8, uint8_t) + +CK_PR_GENERATE(add) +CK_PR_GENERATE(sub) +CK_PR_GENERATE(and) +CK_PR_GENERATE(or) +CK_PR_GENERATE(xor) + +#undef CK_PR_GENERATE +#undef CK_PR_BINARY_S +#undef CK_PR_BINARY + +#define CK_PR_UNARY(S, M, T) \ + CK_CC_INLINE static void \ + ck_pr_inc_##S(M *target) \ + { \ + ck_pr_add_##S(target, (T)1); \ + return; \ + } \ + CK_CC_INLINE static void \ + ck_pr_dec_##S(M *target) \ + { \ + ck_pr_sub_##S(target, (T)1); \ + return; \ + } + +#define CK_PR_UNARY_X(S, M) \ + CK_CC_INLINE static void \ + ck_pr_not_##S(M *target) \ + { \ + M newval; \ + do { \ + newval = ~(*target); \ + } while (!__sync_bool_compare_and_swap(target, \ + *target, \ + newval)); \ + } \ + CK_CC_INLINE static void \ + ck_pr_neg_##S(M *target) \ + { \ + M newval; \ + do { \ + newval = -(*target); \ + } while (!__sync_bool_compare_and_swap(target, \ + *target, \ + newval)); \ + } + +#define CK_PR_UNARY_S(S, M) CK_PR_UNARY(S, M, M) \ + CK_PR_UNARY_X(S, M) + +CK_PR_UNARY(ptr, void, void *) +CK_PR_UNARY_S(char, char) +CK_PR_UNARY_S(int, int) +CK_PR_UNARY_S(uint, unsigned int) +CK_PR_UNARY_S(64, uint64_t) +CK_PR_UNARY_S(32, uint32_t) +CK_PR_UNARY_S(16, uint16_t) +CK_PR_UNARY_S(8, uint8_t) + +#undef CK_PR_UNARY_S +#undef CK_PR_UNARY + +CK_CC_INLINE static void * +ck_pr_faa_ptr(void *target, uintptr_t delta) +{ + uintptr_t previous; + + previous = __sync_fetch_and_add((uintptr_t *) target, delta); + + return (void *)(previous); +} + +#define CK_PR_FAA(S, T) \ + CK_CC_INLINE static T \ + ck_pr_faa_##S(T *target, T delta) \ + { \ + T previous; \ + \ + previous = __sync_fetch_and_add(target, delta); \ + \ + return (previous); \ + } + +CK_PR_FAA(64, uint64_t) +CK_PR_FAA(32, uint32_t) +CK_PR_FAA(uint, unsigned int) +CK_PR_FAA(int, int) + +#undef CK_PR_FAA + +#endif /* CK_PR_S390X_H */ diff --git a/include/gcc/sparcv9/ck_pr.h b/include/gcc/sparcv9/ck_pr.h index 767af6a..b60e199 100644 --- a/include/gcc/sparcv9/ck_pr.h +++ b/include/gcc/sparcv9/ck_pr.h @@ -76,7 +76,7 @@ CK_PR_FENCE(store, "membar #StoreStore") CK_PR_FENCE(store_load, "membar #StoreLoad") CK_PR_FENCE(load, "membar #LoadLoad") CK_PR_FENCE(load_store, "membar #LoadStore") -CK_PR_FENCE(memory, "membar #LoadLoad | #LoadStore | #StoreStore | #StoreLoad") +CK_PR_FENCE(memory, "membar #MemIssue") CK_PR_FENCE(acquire, "membar #LoadLoad | #LoadStore") CK_PR_FENCE(release, "membar #LoadStore | #StoreStore") CK_PR_FENCE(acqrel, "membar #LoadLoad | #LoadStore | #StoreStore") @@ -136,11 +136,26 @@ CK_PR_STORE_S(int, int, "stsw") #undef CK_PR_STORE_S #undef CK_PR_STORE +/* Use the appropriate address space for atomics within the FreeBSD kernel. */ +#if defined(__FreeBSD__) && defined(_KERNEL) +#include <sys/cdefs.h> +#include <machine/atomic.h> +#define CK_PR_INS_CAS "casa" +#define CK_PR_INS_CASX "casxa" +#define CK_PR_INS_SWAP "swapa" +#define CK_PR_ASI_ATOMIC __XSTRING(__ASI_ATOMIC) +#else +#define CK_PR_INS_CAS "cas" +#define CK_PR_INS_CASX "casx" +#define CK_PR_INS_SWAP "swap" +#define CK_PR_ASI_ATOMIC "" +#endif + CK_CC_INLINE static bool ck_pr_cas_64_value(uint64_t *target, uint64_t compare, uint64_t set, uint64_t *value) { - __asm__ __volatile__("casx [%1], %2, %0" + __asm__ __volatile__(CK_PR_INS_CASX " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" : "+&r" (set) : "r" (target), "r" (compare) @@ -154,7 +169,7 @@ CK_CC_INLINE static bool ck_pr_cas_64(uint64_t *target, uint64_t compare, uint64_t set) { - __asm__ __volatile__("casx [%1], %2, %0" + __asm__ __volatile__(CK_PR_INS_CASX " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" : "+&r" (set) : "r" (target), "r" (compare) @@ -181,7 +196,7 @@ ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *previous) CK_CC_INLINE static bool \ ck_pr_cas_##N##_value(T *target, T compare, T set, T *value) \ { \ - __asm__ __volatile__("cas [%1], %2, %0" \ + __asm__ __volatile__(CK_PR_INS_CAS " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" \ : "+&r" (set) \ : "r" (target), \ "r" (compare) \ @@ -192,7 +207,7 @@ ck_pr_cas_ptr_value(void *target, void *compare, void *set, void *previous) CK_CC_INLINE static bool \ ck_pr_cas_##N(T *target, T compare, T set) \ { \ - __asm__ __volatile__("cas [%1], %2, %0" \ + __asm__ __volatile__(CK_PR_INS_CAS " [%1] " CK_PR_ASI_ATOMIC ", %2, %0" \ : "+&r" (set) \ : "r" (target), \ "r" (compare) \ @@ -211,7 +226,7 @@ CK_PR_CAS(int, int) ck_pr_fas_##N(T *target, T update) \ { \ \ - __asm__ __volatile__("swap [%1], %0" \ + __asm__ __volatile__(CK_PR_INS_SWAP " [%1] " CK_PR_ASI_ATOMIC ", %0" \ : "+&r" (update) \ : "r" (target) \ : "memory"); \ @@ -224,5 +239,10 @@ CK_PR_FAS(32, uint32_t) #undef CK_PR_FAS +#undef CK_PR_INS_CAS +#undef CK_PR_INS_CASX +#undef CK_PR_INS_SWAP +#undef CK_PR_ASI_ATOMIC + #endif /* CK_PR_SPARCV9_H */ diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index a04cebf..5194dee 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -45,15 +45,9 @@ /* Minimum requirements for the CK_PR interface are met. */ #define CK_F_PR -#ifdef CK_MD_UMP -#define CK_PR_LOCK_PREFIX -#else -#define CK_PR_LOCK_PREFIX "lock " -#endif - /* - * Prevent speculative execution in busy-wait loops (P4 <=) - * or "predefined delay". + * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined + * delay". */ CK_CC_INLINE static void ck_pr_stall(void) @@ -62,28 +56,52 @@ ck_pr_stall(void) return; } +#ifdef CK_MD_UMP +#define CK_PR_LOCK_PREFIX +#define CK_PR_FENCE(T, I) \ + CK_CC_INLINE static void \ + ck_pr_fence_strict_##T(void) \ + { \ + __asm__ __volatile__("" ::: "memory"); \ + return; \ + } +#else +#define CK_PR_LOCK_PREFIX "lock " #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ + return; \ } +#endif /* CK_MD_UMP */ -CK_PR_FENCE(atomic, "sfence") -CK_PR_FENCE(atomic_store, "sfence") -CK_PR_FENCE(atomic_load, "mfence") -CK_PR_FENCE(store_atomic, "sfence") -CK_PR_FENCE(load_atomic, "mfence") -CK_PR_FENCE(load, "lfence") -CK_PR_FENCE(load_store, "mfence") -CK_PR_FENCE(store, "sfence") -CK_PR_FENCE(store_load, "mfence") -CK_PR_FENCE(memory, "mfence") -CK_PR_FENCE(release, "mfence") -CK_PR_FENCE(acquire, "mfence") -CK_PR_FENCE(acqrel, "mfence") -CK_PR_FENCE(lock, "mfence") -CK_PR_FENCE(unlock, "mfence") +#if defined(CK_MD_SSE_DISABLE) +/* If SSE is disabled, then use atomic operations for serialization. */ +#define CK_MD_X86_MFENCE "lock addl $0, (%%esp)" +#define CK_MD_X86_SFENCE CK_MD_X86_MFENCE +#define CK_MD_X86_LFENCE CK_MD_X86_MFENCE +#else +#define CK_MD_X86_SFENCE "sfence" +#define CK_MD_X86_LFENCE "lfence" +#define CK_MD_X86_MFENCE "mfence" +#endif /* !CK_MD_SSE_DISABLE */ + +CK_PR_FENCE(atomic, "") +CK_PR_FENCE(atomic_store, "") +CK_PR_FENCE(atomic_load, "") +CK_PR_FENCE(store_atomic, "") +CK_PR_FENCE(load_atomic, "") +CK_PR_FENCE(load, CK_MD_X86_LFENCE) +CK_PR_FENCE(load_store, CK_MD_X86_MFENCE) +CK_PR_FENCE(store, CK_MD_X86_SFENCE) +CK_PR_FENCE(store_load, CK_MD_X86_MFENCE) +CK_PR_FENCE(memory, CK_MD_X86_MFENCE) +CK_PR_FENCE(release, CK_MD_X86_MFENCE) +CK_PR_FENCE(acquire, CK_MD_X86_MFENCE) +CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE) +CK_PR_FENCE(lock, CK_MD_X86_MFENCE) +CK_PR_FENCE(unlock, CK_MD_X86_MFENCE) #undef CK_PR_FENCE @@ -215,18 +233,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb") } #define CK_PR_UNARY_V(K, S, T, C, I) \ - CK_CC_INLINE static void \ - ck_pr_##K##_##S##_zero(T *target, bool *r) \ + CK_CC_INLINE static bool \ + ck_pr_##K##_##S##_is_zero(T *target) \ { \ + bool ret; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ : "+m" (*(C *)target), \ - "=m" (*r) \ + "=qm" (ret) \ : \ : "memory", "cc"); \ - return; \ + return ret; \ } - #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ @@ -289,8 +307,38 @@ CK_PR_GENERATE(xor) #undef CK_PR_BINARY /* - * Atomic compare and swap. + * Atomic compare and swap, with a variant that sets *v to the old value of target. */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +#define CK_PR_CAS(S, M, T, C, I) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S(M *target, T compare, T set) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + /* RAX is clobbered by cmpxchg. */ \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ + } +#else #define CK_PR_CAS(S, M, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_cas_##S(M *target, T compare, T set) \ @@ -303,7 +351,23 @@ CK_PR_GENERATE(xor) "a" (compare) \ : "memory", "cc"); \ return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + "setz %1;" \ + : "+m" (*(C *)target), \ + "=q" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ } +#endif CK_PR_CAS(ptr, void, void *, char, "cmpxchgl") @@ -320,41 +384,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #undef CK_PR_CAS /* - * Compare and swap, set *v to old value of target. - */ -#define CK_PR_CAS_O(S, M, T, C, I, R) \ - CK_CC_INLINE static bool \ - ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ - { \ - bool z; \ - __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ - "mov %% " R ", %2;" \ - "setz %1;" \ - : "+m" (*(C *)target), \ - "=a" (z), \ - "=m" (*(C *)v) \ - : "q" (set), \ - "a" (compare) \ - : "memory", "cc"); \ - return (bool)z; \ - } - -CK_PR_CAS_O(ptr, void, void *, char, "l", "eax") - -#define CK_PR_CAS_O_S(S, T, I, R) \ - CK_PR_CAS_O(S, T, T, T, I, R) - -CK_PR_CAS_O_S(char, char, "b", "al") -CK_PR_CAS_O_S(int, int, "l", "eax") -CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") -CK_PR_CAS_O_S(32, uint32_t, "l", "eax") -CK_PR_CAS_O_S(16, uint16_t, "w", "ax") -CK_PR_CAS_O_S(8, uint8_t, "b", "al") - -#undef CK_PR_CAS_O_S -#undef CK_PR_CAS_O - -/* * Atomic bit test operations. */ #define CK_PR_BT(K, S, T, P, C, I) \ diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index 532d593..4222729 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -58,8 +58,8 @@ #endif /* - * Prevent speculative execution in busy-wait loops (P4 <=) - * or "predefined delay". + * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined + * delay". */ CK_CC_INLINE static void ck_pr_stall(void) @@ -75,18 +75,39 @@ ck_pr_stall(void) __asm__ __volatile__(I ::: "memory"); \ } -CK_PR_FENCE(atomic, "sfence") -CK_PR_FENCE(atomic_store, "sfence") -CK_PR_FENCE(atomic_load, "mfence") -CK_PR_FENCE(store_atomic, "sfence") -CK_PR_FENCE(load_atomic, "mfence") +/* Atomic operations are always serializing. */ +CK_PR_FENCE(atomic, "") +CK_PR_FENCE(atomic_store, "") +CK_PR_FENCE(atomic_load, "") +CK_PR_FENCE(store_atomic, "") +CK_PR_FENCE(load_atomic, "") + +/* Traditional fence interface. */ CK_PR_FENCE(load, "lfence") CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") + +/* Below are stdatomic-style fences. */ + +/* + * Provides load-store and store-store ordering. However, Intel specifies that + * the WC memory model is relaxed. It is likely an sfence *is* sufficient (in + * particular, stores are not re-ordered with respect to prior loads and it is + * really just the stores that are subject to re-ordering). However, we take + * the conservative route as the manuals are too ambiguous for my taste. + */ CK_PR_FENCE(release, "mfence") + +/* + * Provides load-load and load-store ordering. The lfence instruction ensures + * all prior load operations are complete before any subsequent instructions + * actually begin execution. However, the manual also ends up going to describe + * WC memory as a relaxed model. + */ CK_PR_FENCE(acquire, "mfence") + CK_PR_FENCE(acqrel, "mfence") CK_PR_FENCE(lock, "mfence") CK_PR_FENCE(unlock, "mfence") @@ -311,18 +332,18 @@ CK_PR_FAA_S(8, uint8_t, "xaddb") } #define CK_PR_UNARY_V(K, S, T, C, I) \ - CK_CC_INLINE static void \ - ck_pr_##K##_##S##_zero(T *target, bool *r) \ + CK_CC_INLINE static bool \ + ck_pr_##K##_##S##_is_zero(T *target) \ { \ + bool ret; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ : "+m" (*(C *)target), \ - "=m" (*r) \ + "=rm" (ret) \ : \ : "memory", "cc"); \ - return; \ + return ret; \ } - #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ @@ -387,8 +408,38 @@ CK_PR_GENERATE(xor) #undef CK_PR_BINARY /* - * Atomic compare and swap. + * Atomic compare and swap, with a variant that sets *v to the old value of target. */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +#define CK_PR_CAS(S, M, T, C, I) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S(M *target, T compare, T set) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + /* RAX is clobbered by cmpxchg. */ \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ + } +#else #define CK_PR_CAS(S, M, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_cas_##S(M *target, T compare, T set) \ @@ -401,7 +452,23 @@ CK_PR_GENERATE(xor) "a" (compare) \ : "memory", "cc"); \ return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + "setz %1;" \ + : "+m" (*(C *)target), \ + "=q" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ } +#endif CK_PR_CAS(ptr, void, void *, char, "cmpxchgq") @@ -422,45 +489,6 @@ CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #undef CK_PR_CAS /* - * Compare and swap, set *v to old value of target. - */ -#define CK_PR_CAS_O(S, M, T, C, I, R) \ - CK_CC_INLINE static bool \ - ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ - { \ - bool z; \ - __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ - "mov %% " R ", %2;" \ - "setz %1;" \ - : "+m" (*(C *)target), \ - "=a" (z), \ - "=m" (*(C *)v) \ - : "q" (set), \ - "a" (compare) \ - : "memory", "cc"); \ - return z; \ - } - -CK_PR_CAS_O(ptr, void, void *, char, "q", "rax") - -#define CK_PR_CAS_O_S(S, T, I, R) \ - CK_PR_CAS_O(S, T, T, T, I, R) - -CK_PR_CAS_O_S(char, char, "b", "al") -CK_PR_CAS_O_S(int, int, "l", "eax") -CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") -#ifndef CK_PR_DISABLE_DOUBLE -CK_PR_CAS_O_S(double, double, "q", "rax") -#endif -CK_PR_CAS_O_S(64, uint64_t, "q", "rax") -CK_PR_CAS_O_S(32, uint32_t, "l", "eax") -CK_PR_CAS_O_S(16, uint16_t, "w", "ax") -CK_PR_CAS_O_S(8, uint8_t, "b", "al") - -#undef CK_PR_CAS_O_S -#undef CK_PR_CAS_O - -/* * Contrary to C-interface, alignment requirements are that of uint64_t[2]. */ CK_CC_INLINE static bool diff --git a/include/spinlock/dec.h b/include/spinlock/dec.h index 11d36dd..3e36bf7 100644 --- a/include/spinlock/dec.h +++ b/include/spinlock/dec.h @@ -111,7 +111,8 @@ ck_spinlock_dec_lock_eb(struct ck_spinlock_dec *lock) if (r == true) break; - ck_backoff_eb(&backoff); + while (ck_pr_load_uint(&lock->value) != 1) + ck_backoff_eb(&backoff); } ck_pr_fence_lock(); diff --git a/include/spinlock/fas.h b/include/spinlock/fas.h index 4e6c123..bfe91fe 100644 --- a/include/spinlock/fas.h +++ b/include/spinlock/fas.h @@ -77,10 +77,11 @@ CK_CC_INLINE static void ck_spinlock_fas_lock(struct ck_spinlock_fas *lock) { - while (ck_pr_fas_uint(&lock->value, true) == true) { - while (ck_pr_load_uint(&lock->value) == true) - ck_pr_stall(); - } + while (CK_CC_UNLIKELY(ck_pr_fas_uint(&lock->value, true) == true)) { + do { + ck_pr_stall(); + } while (ck_pr_load_uint(&lock->value) == true); + } ck_pr_fence_lock(); return; diff --git a/include/spinlock/hclh.h b/include/spinlock/hclh.h index 296448b..ece56c6 100644 --- a/include/spinlock/hclh.h +++ b/include/spinlock/hclh.h @@ -81,6 +81,8 @@ ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue, thread->wait = true; thread->splice = false; thread->cluster_id = (*local_queue)->cluster_id; + /* Make sure previous->previous doesn't appear to be NULL */ + thread->previous = *local_queue; /* Serialize with respect to update of local queue. */ ck_pr_fence_store_atomic(); @@ -91,13 +93,15 @@ ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue, /* Wait until previous thread from the local queue is done with lock. */ ck_pr_fence_load(); - if (previous->previous != NULL && - previous->cluster_id == thread->cluster_id) { - while (ck_pr_load_uint(&previous->wait) == true) + if (previous->previous != NULL) { + while (ck_pr_load_uint(&previous->wait) == true && + ck_pr_load_int(&previous->cluster_id) == thread->cluster_id && + ck_pr_load_uint(&previous->splice) == false) ck_pr_stall(); /* We're head of the global queue, we're done */ - if (ck_pr_load_uint(&previous->splice) == false) + if (ck_pr_load_int(&previous->cluster_id) == thread->cluster_id && + ck_pr_load_uint(&previous->splice) == false) return; } diff --git a/regressions/Makefile b/regressions/Makefile index 3195e52..c74b4fa 100644 --- a/regressions/Makefile +++ b/regressions/Makefile @@ -4,7 +4,9 @@ DIR=array \ bitmap \ brlock \ bytelock \ + cc \ cohort \ + ec \ epoch \ fifo \ hp \ @@ -27,6 +29,7 @@ DIR=array \ all: $(MAKE) -C ./ck_array/validate all + $(MAKE) -C ./ck_cc/validate all $(MAKE) -C ./ck_cohort/validate all $(MAKE) -C ./ck_cohort/benchmark all $(MAKE) -C ./ck_bitmap/validate all @@ -69,9 +72,12 @@ all: $(MAKE) -C ./ck_pflock/benchmark all $(MAKE) -C ./ck_hp/validate all $(MAKE) -C ./ck_hp/benchmark all + $(MAKE) -C ./ck_ec/validate all + $(MAKE) -C ./ck_ec/benchmark all clean: $(MAKE) -C ./ck_array/validate clean + $(MAKE) -C ./ck_cc/validate clean $(MAKE) -C ./ck_pflock/validate clean $(MAKE) -C ./ck_pflock/benchmark clean $(MAKE) -C ./ck_tflock/validate clean @@ -116,6 +122,8 @@ clean: $(MAKE) -C ./ck_pflock/benchmark clean $(MAKE) -C ./ck_hp/validate clean $(MAKE) -C ./ck_hp/benchmark clean + $(MAKE) -C ./ck_ec/validate clean + $(MAKE) -C ./ck_ec/benchmark clean check: all rc=0; \ diff --git a/regressions/ck_bitmap/validate/serial.c b/regressions/ck_bitmap/validate/serial.c index ba52588..1cf6c53 100644 --- a/regressions/ck_bitmap/validate/serial.c +++ b/regressions/ck_bitmap/validate/serial.c @@ -159,7 +159,7 @@ test_init(bool init) bytes = ck_bitmap_size(length); bitmap = malloc(bytes); - memset(bitmap, random(), bytes); + memset(bitmap, common_rand(), bytes); ck_bitmap_init(bitmap, length, init); @@ -188,7 +188,7 @@ random_init(void) ck_bitmap_init(bitmap, length, false); for (i = 0; i < length; i++) { - if (random() & 1) { + if (common_rand() & 1) { ck_bitmap_set(bitmap, i); } } @@ -259,7 +259,7 @@ random_test(unsigned int seed) ck_bitmap_t *x, *x_copy, *y; unsigned int i; - srandom(seed); + common_srand(seed); test_init(false); test_init(true); diff --git a/regressions/ck_cc/validate/Makefile b/regressions/ck_cc/validate/Makefile new file mode 100644 index 0000000..2da34d1 --- /dev/null +++ b/regressions/ck_cc/validate/Makefile @@ -0,0 +1,17 @@ +.PHONY: check clean distribution + +OBJECTS=ck_cc + +all: $(OBJECTS) + +ck_cc: ck_cc.c ../../../include/ck_cc.h + $(CC) $(CFLAGS) -g2 -o ck_cc ck_cc.c + +check: all + ./ck_cc + +clean: + rm -rf *~ *.o $(OBJECTS) *.dSYM *.exe + +include ../../../build/regressions.build +CFLAGS+=-D_GNU_SOURCE diff --git a/regressions/ck_cc/validate/ck_cc.c b/regressions/ck_cc/validate/ck_cc.c new file mode 100644 index 0000000..a22030f --- /dev/null +++ b/regressions/ck_cc/validate/ck_cc.c @@ -0,0 +1,37 @@ +#include <ck_pr.h> +#include <limits.h> +#include <stdio.h> + +#include "../../common.h" + +int +main(void) +{ + unsigned int x; + + ck_pr_store_uint(&x, 0x10110); + + if (ck_cc_ffs(0) != 0) + ck_error("ffs(0) = %d\n", ck_cc_ffs(0)); + if (ck_cc_ffs(4) != 3) + ck_error("ffs(4) = %d\n", ck_cc_ffs(4)); + if (ck_cc_ffs(UINT_MAX) != 1) + ck_error("ffs(UINT_MAX) = %d\n", ck_cc_ffs(UINT_MAX)); + if (ck_cc_ffs(x) != 5) + ck_error("ffs(%u) = %d\n", x, ck_cc_ffs(x)); + + if (ck_cc_ffs(x) != ck_cc_ffsl(x) || + ck_cc_ffsl(x) != ck_cc_ffsll(x) || + ck_cc_ffs(x) != ck_cc_ffsll(x)) { + ck_error(" ffs = %d, ffsl = %d, ffsll = %d\n", + ck_cc_ffs(x), ck_cc_ffsl(x), ck_cc_ffsll(x)); + } + + if (ck_cc_ctz(x) != 4) + ck_error("ctz = %d\n", ck_cc_ctz(x)); + + if (ck_cc_popcount(x) != 3) + ck_error("popcount = %d\n", ck_cc_popcount(x)); + + return 0; +} diff --git a/regressions/ck_ec/benchmark/Makefile b/regressions/ck_ec/benchmark/Makefile new file mode 100644 index 0000000..c266023 --- /dev/null +++ b/regressions/ck_ec/benchmark/Makefile @@ -0,0 +1,18 @@ +.PHONY: check clean distribution + +OBJECTS=ck_ec + +all: $(OBJECTS) + +ck_ec: ck_ec.c ../../../include/ck_ec.h + $(CC) $(CFLAGS) ../../../src/ck_ec.c -o ck_ec ck_ec.c + +check: all + ./ck_ec $(CORES) 1 + +clean: + rm -rf *~ *.o $(OBJECTS) *.dSYM *.exe + +include ../../../build/regressions.build +CFLAGS+=-D_GNU_SOURCE + diff --git a/regressions/ck_ec/benchmark/ck_ec.c b/regressions/ck_ec/benchmark/ck_ec.c new file mode 100644 index 0000000..655f9d8 --- /dev/null +++ b/regressions/ck_ec/benchmark/ck_ec.c @@ -0,0 +1,484 @@ +/* + * Copyright 2018 Paul Khuong. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <assert.h> +#include <ck_cc.h> +#include <ck_ec.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "../../common.h" + +#ifndef STEPS +#define STEPS (65536 * 64) +#endif + +static int gettime(const struct ck_ec_ops *, struct timespec *out); +static void wake32(const struct ck_ec_ops *, const uint32_t *); +static void wait32(const struct ck_ec_wait_state *, + const uint32_t *, uint32_t, const struct timespec *); +static void wake64(const struct ck_ec_ops *, const uint64_t *); +static void wait64(const struct ck_ec_wait_state *, + const uint64_t *, uint64_t, const struct timespec *); + +static const struct ck_ec_ops test_ops = { + .gettime = gettime, + .wait32 = wait32, + .wait64 = wait64, + .wake32 = wake32, + .wake64 = wake64 +}; + +#ifndef __linux__ +static int gettime(const struct ck_ec_ops *ops, struct timespec *out) +{ + (void)out; + + assert(ops == &test_ops); + return -1; +} + +static void wait32(const struct ck_ec_wait_state *state, + const uint32_t *address, uint32_t expected, + const struct timespec *deadline) +{ + (void)address; + (void)expected; + (void)deadline; + + assert(state->ops == &test_ops); + return; +} + +static void wait64(const struct ck_ec_wait_state *state, + const uint64_t *address, uint64_t expected, + const struct timespec *deadline) +{ + (void)address; + (void)expected; + (void)deadline; + + assert(state->ops == &test_ops); + return; +} + +static void wake32(const struct ck_ec_ops *ops, const uint32_t *address) +{ + (void)address; + + assert(ops == &test_ops); + return; +} + +static void wake64(const struct ck_ec_ops *ops, const uint64_t *address) +{ + (void)address; + + assert(ops == &test_ops); + return; +} +#else +#include <linux/futex.h> +#include <sys/syscall.h> +#include <time.h> +#include <unistd.h> + +static int gettime(const struct ck_ec_ops *ops, struct timespec *out) +{ + assert(ops == &test_ops); + return clock_gettime(CLOCK_MONOTONIC, out); +} + +static void wait32(const struct ck_ec_wait_state *state, + const uint32_t *address, uint32_t expected, + const struct timespec *deadline) +{ + assert(state->ops == &test_ops); + syscall(SYS_futex, address, + FUTEX_WAIT_BITSET, expected, deadline, + NULL, FUTEX_BITSET_MATCH_ANY, 0); + return; +} + +static void wait64(const struct ck_ec_wait_state *state, + const uint64_t *address, uint64_t expected, + const struct timespec *deadline) +{ + const void *low_half; + + assert(state->ops == &test_ops); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + low_half = address; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + low_half = (uintptr_t)address + sizeof(uint32_t); +#else +# error "__BYTE_ORDER__ must be defined." +#endif + + syscall(SYS_futex, low_half, + FUTEX_WAIT_BITSET, (uint32_t)expected, deadline, + NULL, FUTEX_BITSET_MATCH_ANY, 0); + return; +} + +static void wake32(const struct ck_ec_ops *ops, const uint32_t *address) +{ + assert(ops == &test_ops); + syscall(SYS_futex, address, + FUTEX_WAKE, INT_MAX, + /* ignored arguments */NULL, NULL, 0); + return; +} + +static void wake64(const struct ck_ec_ops *ops, const uint64_t *address) +{ + const void *low_half; + + assert(ops == &test_ops); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + low_half = address; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + low_half = (uintptr_t)address + sizeof(uint32_t); +#else +# error "__BYTE_ORDER__ must be defined." +#endif + + syscall(SYS_futex, low_half, + FUTEX_WAKE, INT_MAX, + /* ignored arguments */NULL, NULL, 0); + return; +} +#endif /* __linux__ */ + +static const struct ck_ec_mode sp = { + .ops = &test_ops, + .single_producer = true +}; + +static const struct ck_ec_mode mp = { + .ops = &test_ops, + .single_producer = false +}; + +static CK_CC_FORCE_INLINE void bench32(const struct ck_ec_mode mode) +{ + ck_ec32_t ec CK_CC_CACHELINE = CK_EC_INITIALIZER; + uint64_t a; + uint64_t baseline = 1000 * 1000; + uint32_t value; + + for (size_t i = 0; i < STEPS; i++) { + uint64_t s = rdtsc(); + uint64_t elapsed = rdtsc() - s; + + if (elapsed < baseline) { + baseline = elapsed; + } + } + + /* Read value. */ + a = 0; + value = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + value ^= ck_ec32_value(&ec); + value ^= ck_ec32_value(&ec); + value ^= ck_ec32_value(&ec); + value ^= ck_ec32_value(&ec); + + __asm__ volatile("" :: "r"(value)); + a += rdtsc() - s - baseline; + } + + printf("%s ec32_value: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Wait (fast path). */ + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + ck_ec32_wait(&ec, &mode, 1, NULL); + ck_ec32_wait(&ec, &mode, 1, NULL); + ck_ec32_wait(&ec, &mode, 1, NULL); + ck_ec32_wait(&ec, &mode, 1, NULL); + + a += rdtsc() - s - baseline; + } + + printf("%s ec32_wait fast: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* trywait. */ + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + struct timespec past = { .tv_sec = 0 }; + uint64_t s = rdtsc(); + + ck_ec32_wait(&ec, &mode, 0, &past); + ck_ec32_wait(&ec, &mode, 0, &past); + ck_ec32_wait(&ec, &mode, 0, &past); + ck_ec32_wait(&ec, &mode, 0, &past); + + a += rdtsc() - s - baseline; + } + + printf("%s ec32_wait timeout: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Inc (no waiter). */ + assert(!ck_ec32_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + ck_ec32_inc(&ec, &mode); + ck_ec32_inc(&ec, &mode); + ck_ec32_inc(&ec, &mode); + ck_ec32_inc(&ec, &mode); + + a += rdtsc() - s - baseline; + } + + printf("%s ec32_inc: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Inc (with waiter). */ + assert(!ck_ec32_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS; i++) { + struct timespec past = { .tv_sec = 1 }; + uint64_t s; + + ck_ec32_wait(&ec, &mode, ck_ec32_value(&ec), &past); + assert(ck_ec32_has_waiters(&ec)); + + s = rdtsc(); + ck_ec32_inc(&ec, &mode); + a += rdtsc() - s - baseline; + } + + printf("%s ec32_inc slow: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Add (no waiter). */ + assert(!ck_ec32_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + ck_ec32_add(&ec, &mode, i + 1); + ck_ec32_add(&ec, &mode, i + 2); + ck_ec32_add(&ec, &mode, i + 3); + ck_ec32_add(&ec, &mode, i + 4); + + a += rdtsc() - s - baseline; + } + + printf("%s ec32_add: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + assert(!ck_ec32_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS; i++) { + struct timespec past = { .tv_sec = 1 }; + uint64_t s; + + ck_ec32_wait(&ec, &mode, ck_ec32_value(&ec), &past); + assert(ck_ec32_has_waiters(&ec)); + + s = rdtsc(); + ck_ec32_add(&ec, &mode, i + 1); + a += rdtsc() - s - baseline; + } + + printf("%s ec32_add slow: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + return; +} + +#ifdef CK_F_EC64 +static CK_CC_FORCE_INLINE void bench64(const struct ck_ec_mode mode) +{ + ck_ec64_t ec CK_CC_CACHELINE = CK_EC_INITIALIZER; + uint64_t a; + uint64_t baseline = 1000 * 1000; + uint64_t value; + + for (size_t i = 0; i < STEPS; i++) { + uint64_t s = rdtsc(); + uint64_t elapsed = rdtsc() - s; + + if (elapsed < baseline) { + baseline = elapsed; + } + } + + /* Read value. */ + a = 0; + value = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + value ^= ck_ec64_value(&ec); + value ^= ck_ec64_value(&ec); + value ^= ck_ec64_value(&ec); + value ^= ck_ec64_value(&ec); + + __asm__ volatile("" :: "r"(value)); + a += rdtsc() - s - baseline; + } + + printf("%s ec64_value: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Wait (fast path). */ + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + ck_ec64_wait(&ec, &mode, 1, NULL); + ck_ec64_wait(&ec, &mode, 1, NULL); + ck_ec64_wait(&ec, &mode, 1, NULL); + ck_ec64_wait(&ec, &mode, 1, NULL); + + a += rdtsc() - s - baseline; + } + + printf("%s ec64_wait fast: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* trywait. */ + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + struct timespec past = { .tv_sec = 0 }; + uint64_t s = rdtsc(); + + ck_ec64_wait(&ec, &mode, 0, &past); + ck_ec64_wait(&ec, &mode, 0, &past); + ck_ec64_wait(&ec, &mode, 0, &past); + ck_ec64_wait(&ec, &mode, 0, &past); + + a += rdtsc() - s - baseline; + } + + printf("%s ec64_wait timeout: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Inc (no waiter). */ + assert(!ck_ec64_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + ck_ec64_inc(&ec, &mode); + ck_ec64_inc(&ec, &mode); + ck_ec64_inc(&ec, &mode); + ck_ec64_inc(&ec, &mode); + + a += rdtsc() - s - baseline; + } + + printf("%s ec64_inc: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Inc (with waiter). */ + assert(!ck_ec64_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS; i++) { + struct timespec past = { .tv_sec = 1 }; + uint64_t s; + + ck_ec64_wait(&ec, &mode, ck_ec64_value(&ec), &past); + assert(ck_ec64_has_waiters(&ec)); + + s = rdtsc(); + ck_ec64_inc(&ec, &mode); + a += rdtsc() - s - baseline; + } + + printf("%s ec64_inc slow: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + /* Add (no waiter). */ + assert(!ck_ec64_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS / 4; i++) { + uint64_t s = rdtsc(); + + ck_ec64_add(&ec, &mode, i + 1); + ck_ec64_add(&ec, &mode, i + 2); + ck_ec64_add(&ec, &mode, i + 3); + ck_ec64_add(&ec, &mode, i + 4); + + a += rdtsc() - s - baseline; + } + + printf("%s ec64_add: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + + assert(!ck_ec64_has_waiters(&ec)); + a = 0; + for (size_t i = 0; i < STEPS; i++) { + struct timespec past = { .tv_sec = 1 }; + uint64_t s; + + ck_ec64_wait(&ec, &mode, ck_ec64_value(&ec), &past); + assert(ck_ec64_has_waiters(&ec)); + + s = rdtsc(); + ck_ec64_add(&ec, &mode, i + 1); + a += rdtsc() - s - baseline; + } + + printf("%s ec64_add slow: %" PRIu64 "\n", + (mode.single_producer ? "SP" : "MP"), a / STEPS); + return; +} +#endif /* CK_F_EC64 */ + +int +main(void) +{ + printf("SP ec32\n"); + bench32(sp); + printf("\nMP ec32\n"); + bench32(mp); + +#ifdef CK_F_EC64 + printf("\nSP ec64\n"); + bench64(sp); + printf("\nMP ec64\n"); + bench64(mp); +#endif /* CK_F_EC64 */ + + return 0; +} diff --git a/regressions/ck_ec/validate/Makefile b/regressions/ck_ec/validate/Makefile new file mode 100644 index 0000000..f03f493 --- /dev/null +++ b/regressions/ck_ec/validate/Makefile @@ -0,0 +1,73 @@ +.PHONY: check clean distribution + +FUZZER ?= none + +FUZZ_CFLAGS ?= + +# See http://gallium.inria.fr/blog/portable-conditionals-in-makefiles/ for +# the portable conditional technique below. +none_fuzz_cflags = +libfuzzer_fuzz_cflags = -DUSE_LIBFUZZER -fsanitize=fuzzer,memory,undefined + +FUZZ_CFLAGS += ${${FUZZER}_fuzz_cflags} + +OBJECTS = ck_ec_smoke_test \ + prop_test_timeutil_add \ + prop_test_timeutil_add_ns \ + prop_test_timeutil_cmp \ + prop_test_timeutil_scale \ + prop_test_value \ + prop_test_wakeup \ + prop_test_slow_wakeup + +all: $(OBJECTS) + +check: all + ./ck_ec_smoke_test + # the command line arguments are only consumed by libfuzzer. + ./prop_test_slow_wakeup -max_total_time=60 + ./prop_test_timeutil_add -max_total_time=60 + ./prop_test_timeutil_add_ns -max_total_time=60 + ./prop_test_timeutil_cmp -max_total_time=60 + ./prop_test_timeutil_scale -max_total_time=60 + ./prop_test_value -max_total_time=60 + ./prop_test_wakeup -max_total_time=60 + +quickfuzz: all + ./prop_test_slow_wakeup -max_total_time=5 + ./prop_test_timeutil_add -max_total_time=5 + ./prop_test_timeutil_add_ns -max_total_time=5 + ./prop_test_timeutil_cmp -max_total_time=5 + ./prop_test_timeutil_scale -max_total_time=5 + ./prop_test_value -max_total_time=5 + ./prop_test_wakeup -max_total_time=5 + +ck_ec_smoke_test: ../../../src/ck_ec.c ck_ec_smoke_test.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h + $(CC) $(CFLAGS) -std=gnu11 ../../../src/ck_ec.c -o ck_ec_smoke_test ck_ec_smoke_test.c + +prop_test_slow_wakeup: ../../../src/ck_ec.c prop_test_slow_wakeup.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h fuzz_harness.h + $(CC) $(CFLAGS) $(FUZZ_CFLAGS) ../../../src/ck_ec.c -o prop_test_slow_wakeup prop_test_slow_wakeup.c + +prop_test_timeutil_add: ../../../src/ck_ec.c prop_test_timeutil_add.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h fuzz_harness.h + $(CC) $(CFLAGS) $(FUZZ_CFLAGS) ../../../src/ck_ec.c -o prop_test_timeutil_add prop_test_timeutil_add.c + +prop_test_timeutil_add_ns: ../../../src/ck_ec.c prop_test_timeutil_add_ns.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h fuzz_harness.h + $(CC) $(CFLAGS) $(FUZZ_CFLAGS) ../../../src/ck_ec.c -o prop_test_timeutil_add_ns prop_test_timeutil_add_ns.c + +prop_test_timeutil_cmp: ../../../src/ck_ec.c prop_test_timeutil_cmp.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h fuzz_harness.h + $(CC) $(CFLAGS) $(FUZZ_CFLAGS) ../../../src/ck_ec.c -o prop_test_timeutil_cmp prop_test_timeutil_cmp.c + +prop_test_timeutil_scale: ../../../src/ck_ec.c prop_test_timeutil_scale.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h fuzz_harness.h + $(CC) $(CFLAGS) $(FUZZ_CFLAGS) ../../../src/ck_ec.c -o prop_test_timeutil_scale prop_test_timeutil_scale.c + +prop_test_value: ../../../src/ck_ec.c prop_test_value.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h fuzz_harness.h + $(CC) $(CFLAGS) $(FUZZ_CFLAGS) ../../../src/ck_ec.c -o prop_test_value prop_test_value.c + +prop_test_wakeup: ../../../src/ck_ec.c prop_test_wakeup.c ../../../src/ck_ec_timeutil.h ../../../include/ck_ec.h fuzz_harness.h + $(CC) $(CFLAGS) $(FUZZ_CFLAGS) ../../../src/ck_ec.c -o prop_test_wakeup prop_test_wakeup.c + +clean: + rm -rf *~ *.o *.dSYM *.exe $(OBJECTS) + +include ../../../build/regressions.build +CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE diff --git a/regressions/ck_ec/validate/ck_ec_smoke_test.c b/regressions/ck_ec/validate/ck_ec_smoke_test.c new file mode 100644 index 0000000..3aca162 --- /dev/null +++ b/regressions/ck_ec/validate/ck_ec_smoke_test.c @@ -0,0 +1,450 @@ +#include <assert.h> +#include <ck_ec.h> +#include <ck_limits.h> +#include <ck_stdbool.h> +#include <pthread.h> +#include <stdio.h> +#include <sys/time.h> +#include <unistd.h> + +#define TIME_MAX ((time_t)((1ULL << ((sizeof(time_t) * CHAR_BIT) - 1)) - 1)) + +#ifndef __linux__ +/* Zero-initialize to mark the ops as unavailable. */ +static const struct ck_ec_ops test_ops; +#else +#include <linux/futex.h> +#include <sys/syscall.h> +#include <time.h> + +static int gettime(const struct ck_ec_ops *, struct timespec *out); +static void wake32(const struct ck_ec_ops *, const uint32_t *); +static void wait32(const struct ck_ec_wait_state *, const uint32_t *, + uint32_t, const struct timespec *); +static void wake64(const struct ck_ec_ops *, const uint64_t *); +static void wait64(const struct ck_ec_wait_state *, const uint64_t *, + uint64_t, const struct timespec *); + +static const struct ck_ec_ops test_ops = { + .gettime = gettime, + .wait32 = wait32, + .wait64 = wait64, + .wake32 = wake32, + .wake64 = wake64 +}; + +static int gettime(const struct ck_ec_ops *ops, struct timespec *out) +{ + assert(ops == &test_ops); + return clock_gettime(CLOCK_MONOTONIC, out); +} + +static void wait32(const struct ck_ec_wait_state *state, + const uint32_t *address, uint32_t expected, + const struct timespec *deadline) +{ + assert(state->ops == &test_ops); + syscall(SYS_futex, address, + FUTEX_WAIT_BITSET, expected, deadline, + NULL, FUTEX_BITSET_MATCH_ANY, 0); + return; +} + +static void wait64(const struct ck_ec_wait_state *state, + const uint64_t *address, uint64_t expected, + const struct timespec *deadline) +{ + const void *low_half; + + assert(state->ops == &test_ops); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + low_half = address; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + low_half = (uintptr_t)address + sizeof(uint32_t); +#else +# error "__BYTE_ORDER__ must be defined." +#endif + + syscall(SYS_futex, low_half, + FUTEX_WAIT_BITSET, (uint32_t)expected, deadline, + NULL, FUTEX_BITSET_MATCH_ANY, 0); + return; +} + +static void wake32(const struct ck_ec_ops *ops, const uint32_t *address) +{ + assert(ops == &test_ops); + syscall(SYS_futex, address, + FUTEX_WAKE, INT_MAX, + /* ignored arguments */NULL, NULL, 0); + return; +} + +static void wake64(const struct ck_ec_ops *ops, const uint64_t *address) +{ + const void *low_half; + + assert(ops == &test_ops); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + low_half = address; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + low_half = (uintptr_t)address + sizeof(uint32_t); +#else +# error "__BYTE_ORDER__ must be defined." +#endif + + syscall(SYS_futex, low_half, + FUTEX_WAKE, INT_MAX, + /* ignored arguments */NULL, NULL, 0); + return; +} +#endif /* __linux__ */ + +static const struct ck_ec_mode sp = { + .ops = &test_ops, + .single_producer = true +}; + +static const struct ck_ec_mode mp = { + .ops = &test_ops, + .single_producer = false +}; + +static void test_update_counter_32(const struct ck_ec_mode *mode) +{ + struct ck_ec32 ec = CK_EC_INITIALIZER; + + assert(ck_ec_value(&ec) == 0); + + ck_ec_inc(&ec, mode); + assert(ck_ec_value(&ec) == 1); + + uint32_t old = ck_ec_add(&ec, mode, 42); + assert(old == 1); + assert(ck_ec_value(&ec) == 43); + return; +} + +#ifdef CK_F_EC64 +static void test_update_counter_64(const struct ck_ec_mode *mode) +{ + struct ck_ec64 ec = CK_EC_INITIALIZER; + + assert(ck_ec_value(&ec) == 0); + + ck_ec_inc(&ec, mode); + assert(ck_ec_value(&ec) == 1); + + uint64_t old = ck_ec_add(&ec, mode, 42); + assert(old == 1); + assert(ck_ec_value(&ec) == 43); + return; +} +#endif + +static void test_deadline(void) +{ + struct timespec deadline; + + assert(ck_ec_deadline(&deadline, &sp, NULL) == 0); + assert(deadline.tv_sec == TIME_MAX); + + { + const struct timespec timeout = { + .tv_sec = 1, + .tv_nsec = 1000 + }; + const struct timespec no_timeout = { + .tv_sec = 0 + }; + struct timespec now; + + assert(ck_ec_deadline(&deadline, &sp, &timeout) == 0); + assert(ck_ec_deadline(&now, &sp, &no_timeout) == 0); + + double now_sec = now.tv_sec + 1e-9 * now.tv_nsec; + double deadline_sec = deadline.tv_sec + 1e-9 * deadline.tv_nsec; + assert(now_sec < deadline_sec); + assert(deadline_sec <= now_sec + 1 + 1000e-9); + } + + { + const struct timespec timeout = { + .tv_sec = TIME_MAX - 1, + .tv_nsec = 1000 + }; + + assert(ck_ec_deadline(&deadline, &sp, &timeout) == 0); + assert(deadline.tv_sec == TIME_MAX); + } + + return; +} + +static void test_wait_32(void) +{ + struct timespec deadline = { .tv_sec = 0 }; + struct ck_ec32 ec; + + ck_ec_init(&ec, 1); + assert(ck_ec_value(&ec) == 1); + assert(ck_ec_wait(&ec, &sp, 2, NULL) == 0); + assert(ck_ec_wait(&ec, &sp, 1, &deadline) == -1); + + { + const struct timespec timeout = { .tv_nsec = 1 }; + + assert(ck_ec_deadline(&deadline, &sp, &timeout) == 0); + assert(ck_ec_wait(&ec, &sp, 1, &deadline) == -1); + assert(ck_ec_has_waiters(&ec)); + } + + return; +} + +#ifdef CK_F_EC64 +static void test_wait_64(void) +{ + struct timespec deadline = { .tv_sec = 0 }; + struct ck_ec64 ec; + + ck_ec_init(&ec, 0); + assert(ck_ec_value(&ec) == 0); + assert(ck_ec_wait(&ec, &sp, 1, NULL) == 0); + assert(ck_ec_wait(&ec, &sp, 0, &deadline) == -1); + + { + const struct timespec timeout = { .tv_nsec = 1 }; + + assert(ck_ec_deadline(&deadline, &sp, &timeout) == 0); + assert(ck_ec_wait(&ec, &sp, 0, &deadline) == -1); + assert(ck_ec_has_waiters(&ec)); + } + + return; +} +#endif + +static int pred(const struct ck_ec_wait_state *state, + struct timespec *deadline) +{ + double initial_ts = state->start.tv_sec + + 1e-9 * state->start.tv_nsec; + int *count = state->data; + + printf("pred wait: %f\n", + deadline->tv_sec + 1e-9 * deadline->tv_nsec - initial_ts); + + if ((*count)++ < 3) { + return 0; + } + + return (*count)++; +} + +/* + * Check that pred's return value is correctly bubbled up, + * and that the event count is marked as having waiters. + */ +static void test_wait_pred_32(void) +{ + struct ck_ec32 ec = CK_EC_INITIALIZER; + int count = 0; + + assert(!ck_ec_has_waiters(&ec)); + assert(ck_ec_wait_pred(&ec, &sp, 0, pred, &count, NULL) == 4); + assert(ck_ec_has_waiters(&ec)); + assert(count == 5); + return; +} + +#ifdef CK_F_EC64 +static int pred2(const struct ck_ec_wait_state *state, + struct timespec *deadline) +{ + double initial_ts = state->start.tv_sec + + 1e-9 * state->start.tv_nsec; + int *count = state->data; + + printf("pred2 wait: %f\n", + deadline->tv_sec + 1e-9 * deadline->tv_nsec - initial_ts); + + *deadline = state->now; + deadline->tv_sec++; + + (*count)++; + return 0; +} + +/* + * wait_pred_64 is nearly identical to _32. Now check that deadline + * overriding works. + */ +static void test_wait_pred_64(void) +{ + const struct timespec timeout = { .tv_sec = 5 }; + struct timespec deadline; + struct ck_ec64 ec = CK_EC_INITIALIZER; + int count = 0; + + assert(!ck_ec_has_waiters(&ec)); + assert(ck_ec_deadline(&deadline, &sp, &timeout) == 0); + assert(ck_ec_wait_pred(&ec, &sp, 0, pred2, &count, &deadline) == -1); + assert(ck_ec_has_waiters(&ec)); + assert(count == 5); + return; +} +#endif + +static int woken = 0; + +static void *test_threaded_32_waiter(void *data) +{ + struct ck_ec32 *ec = data; + + ck_ec_wait(ec, &sp, 0, NULL); + ck_pr_store_int(&woken, 1); + return NULL; +} + +static void test_threaded_inc_32(const struct ck_ec_mode *mode) +{ + struct ck_ec32 ec = CK_EC_INITIALIZER; + pthread_t waiter; + + ck_pr_store_int(&woken, 0); + + pthread_create(&waiter, NULL, test_threaded_32_waiter, &ec); + usleep(10000); + + assert(ck_pr_load_int(&woken) == 0); + ck_ec_inc(&ec, mode); + + pthread_join(waiter, NULL); + assert(ck_pr_load_int(&woken) == 1); + return; +} + +static void test_threaded_add_32(const struct ck_ec_mode *mode) +{ + struct ck_ec32 ec = CK_EC_INITIALIZER; + pthread_t waiter; + + ck_pr_store_int(&woken, 0); + + pthread_create(&waiter, NULL, test_threaded_32_waiter, &ec); + usleep(10000); + + assert(ck_pr_load_int(&woken) == 0); + ck_ec_add(&ec, mode, 4); + + pthread_join(waiter, NULL); + assert(ck_pr_load_int(&woken) == 1); + return; +} + +#ifdef CK_F_EC64 +static void *test_threaded_64_waiter(void *data) +{ + struct ck_ec64 *ec = data; + + ck_ec_wait(ec, &sp, 0, NULL); + ck_pr_store_int(&woken, 1); + return NULL; +} + +static void test_threaded_inc_64(const struct ck_ec_mode *mode) +{ + struct ck_ec64 ec = CK_EC_INITIALIZER; + pthread_t waiter; + + ck_pr_store_int(&woken, 0); + + pthread_create(&waiter, NULL, test_threaded_64_waiter, &ec); + usleep(10000); + + assert(ck_pr_load_int(&woken) == 0); + ck_ec_inc(&ec, mode); + + pthread_join(waiter, NULL); + assert(ck_pr_load_int(&woken) == 1); + return; +} + +static void test_threaded_add_64(const struct ck_ec_mode *mode) +{ + struct ck_ec64 ec = CK_EC_INITIALIZER; + pthread_t waiter; + + ck_pr_store_int(&woken, 0); + + pthread_create(&waiter, NULL, test_threaded_64_waiter, &ec); + usleep(10000); + + assert(ck_pr_load_int(&woken) == 0); + ck_ec_add(&ec, mode, 4); + + pthread_join(waiter, NULL); + assert(ck_pr_load_int(&woken) == 1); + return; +} +#endif + +int main(int argc, char **argv) +{ + (void)argc; + (void)argv; + + if (test_ops.gettime == NULL || + test_ops.wake32 == NULL || + test_ops.wait32 == NULL) { + printf("No ck_ec ops for this platform. Trivial success.\n"); + return 0; + } + + test_update_counter_32(&sp); +#ifdef CK_F_EC64 + test_update_counter_64(&sp); +#endif + printf("test_update_counter SP passed.\n"); + + test_update_counter_32(&mp); +#ifdef CK_F_EC64 + test_update_counter_64(&mp); +#endif + printf("test_update_counter MP passed.\n"); + + test_deadline(); + printf("test_deadline passed.\n"); + + test_wait_32(); +#ifdef CK_F_EC64 + test_wait_64(); +#endif + printf("test_wait passed.\n"); + + test_wait_pred_32(); +#ifdef CK_F_EC64 + test_wait_pred_64(); +#endif + printf("test_wait_pred passed.\n"); + + test_threaded_inc_32(&sp); + test_threaded_add_32(&sp); +#ifdef CK_F_EC64 + test_threaded_inc_64(&sp); + test_threaded_add_64(&sp); +#endif + printf("test_threaded SP passed.\n"); + + test_threaded_inc_32(&mp); + test_threaded_add_32(&mp); +#ifdef CK_F_EC64 + test_threaded_inc_64(&mp); + test_threaded_add_64(&mp); +#endif + printf("test_threaded MP passed.\n"); + return 0; +} diff --git a/regressions/ck_ec/validate/fuzz_harness.h b/regressions/ck_ec/validate/fuzz_harness.h new file mode 100644 index 0000000..8ba6ebe --- /dev/null +++ b/regressions/ck_ec/validate/fuzz_harness.h @@ -0,0 +1,95 @@ +#ifndef FUZZ_HARNESS_H +#define FUZZ_HARNESS_H +#include <assert.h> +#include <ck_stddef.h> +#include <ck_string.h> +#include <stdio.h> +#include <unistd.h> + +#if defined(USE_LIBFUZZER) +#define TEST(function, examples) \ + void LLVMFuzzerInitialize(int *argcp, char ***argvp); \ + int LLVMFuzzerTestOneInput(const void *data, size_t n); \ + \ + void LLVMFuzzerInitialize(int *argcp, char ***argvp) \ + { \ + static char size[128]; \ + static char *argv[1024]; \ + int argc = *argcp; \ + \ + assert(argc < 1023); \ + \ + int r = snprintf(size, sizeof(size), \ + "-max_len=%zu", sizeof(examples[0])); \ + assert((size_t)r < sizeof(size)); \ + \ + memcpy(argv, *argvp, argc * sizeof(argv[0])); \ + argv[argc++] = size; \ + \ + *argcp = argc; \ + *argvp = argv; \ + \ + for (size_t i = 0; \ + i < sizeof(examples) / sizeof(examples[0]); \ + i++) { \ + assert(function(&examples[i]) == 0); \ + } \ + \ + return; \ + } \ + \ + int LLVMFuzzerTestOneInput(const void *data, size_t n) \ + { \ + char buf[sizeof(examples[0])]; \ + \ + memset(buf, 0, sizeof(buf)); \ + if (n < sizeof(buf)) { \ + memcpy(buf, data, n); \ + } else { \ + memcpy(buf, data, sizeof(buf)); \ + } \ + \ + assert(function((const void *)buf) == 0); \ + return 0; \ + } +#elif defined(USE_AFL) +#define TEST(function, examples) \ + int main(int argc, char **argv) \ + { \ + char buf[sizeof(examples[0])]; \ + \ + (void)argc; \ + (void)argv; \ + for (size_t i = 0; \ + i < sizeof(examples) / sizeof(examples[0]); \ + i++) { \ + assert(function(&examples[i]) == 0); \ + } \ + \ + \ + while (__AFL_LOOP(10000)) { \ + memset(buf, 0, sizeof(buf)); \ + read(0, buf, sizeof(buf)); \ + \ + assert(function((const void *)buf) == 0); \ + } \ + \ + return 0; \ + } +#else +#define TEST(function, examples) \ + int main(int argc, char **argv) \ + { \ + (void)argc; \ + (void)argv; \ + \ + for (size_t i = 0; \ + i < sizeof(examples) / sizeof(examples[0]); \ + i++) { \ + assert(function(&examples[i]) == 0); \ + } \ + \ + return 0; \ + } +#endif +#endif /* !FUZZ_HARNESS_H */ diff --git a/regressions/ck_ec/validate/prop_test_slow_wakeup.c b/regressions/ck_ec/validate/prop_test_slow_wakeup.c new file mode 100644 index 0000000..d172676 --- /dev/null +++ b/regressions/ck_ec/validate/prop_test_slow_wakeup.c @@ -0,0 +1,110 @@ +#include <assert.h> +#include <ck_ec.h> + +#include "fuzz_harness.h" + +static int gettime(const struct ck_ec_ops *, struct timespec *out); +static void wake32(const struct ck_ec_ops *, const uint32_t *); +static void wait32(const struct ck_ec_wait_state *, const uint32_t *, + uint32_t, const struct timespec *); +static void wake64(const struct ck_ec_ops *, const uint64_t *); +static void wait64(const struct ck_ec_wait_state *, const uint64_t *, + uint64_t, const struct timespec *); + +static const struct ck_ec_ops test_ops = { + .gettime = gettime, + .wait32 = wait32, + .wait64 = wait64, + .wake32 = wake32, + .wake64 = wake64 +}; + +static int gettime(const struct ck_ec_ops *ops, struct timespec *out) +{ + (void)out; + + assert(ops == &test_ops); + return -1; +} + +static void wait32(const struct ck_ec_wait_state *wait_state, + const uint32_t *addr, uint32_t expected, + const struct timespec *deadline) +{ + (void)addr; + (void)expected; + (void)deadline; + + assert(wait_state->ops == &test_ops); + return; +} + +static void wait64(const struct ck_ec_wait_state *wait_state, + const uint64_t *addr, uint64_t expected, + const struct timespec *deadline) +{ + (void)addr; + (void)expected; + (void)deadline; + + assert(wait_state->ops == &test_ops); + return; +} + +static void wake32(const struct ck_ec_ops *ops, const uint32_t *addr) +{ + (void)addr; + + assert(ops == &test_ops); + return; +} + +static void wake64(const struct ck_ec_ops *ops, const uint64_t *addr) +{ + (void)addr; + + assert(ops == &test_ops); + return; +} + +/* + * Check that calling ck_ec{32,64}_wake always clears the waiting bit. + */ + +struct example { + uint64_t value; +}; + +const struct example examples[] = { + { 0 }, + { 1 }, + { 1UL << 30 }, + { 1UL << 31 }, + { INT32_MAX }, + { INT64_MAX }, + { 1ULL << 62 }, + { 1ULL << 63 }, +}; + +static inline int test_slow_wakeup(const struct example *example) +{ + { + struct ck_ec32 ec = { .counter = example->value }; + + ck_ec32_wake(&ec, &test_ops); + assert(!ck_ec32_has_waiters(&ec)); + } + +#ifdef CK_F_EC64 + { + struct ck_ec64 ec = { .counter = example->value }; + + ck_ec64_wake(&ec, &test_ops); + assert(!ck_ec64_has_waiters(&ec)); + } +#endif /* CK_F_EC64 */ + + return 0; +} + +TEST(test_slow_wakeup, examples) diff --git a/regressions/ck_ec/validate/prop_test_timeutil_add.c b/regressions/ck_ec/validate/prop_test_timeutil_add.c new file mode 100644 index 0000000..bd44607 --- /dev/null +++ b/regressions/ck_ec/validate/prop_test_timeutil_add.c @@ -0,0 +1,101 @@ +#include <assert.h> +#include <ck_limits.h> +#include <ck_stdint.h> + +#include "../../../src/ck_ec_timeutil.h" +#include "fuzz_harness.h" + +#if ULONG_MAX > 4294967295 +typedef unsigned __int128 dword_t; +#else +typedef uint64_t dword_t; +#endif + +struct example { + struct timespec ts; + struct timespec inc; +}; + +static const struct example examples[] = { + { + { + 42, + 100 + }, + { + 1, + 2 + } + }, + { + { + 42, + 100 + }, + { + 1, + NSEC_MAX + } + }, + { + { + 42, + NSEC_MAX + }, + { + 0, + NSEC_MAX + } + }, + { + { + TIME_MAX - 1, + 1000 + }, + { + 2, + NSEC_MAX + } + } +}; + +static struct timespec normalize_ts(const struct timespec ts) +{ + struct timespec ret = ts; + + if (ret.tv_sec < 0) { + ret.tv_sec = ~ret.tv_sec; + } + + if (ret.tv_nsec < 0) { + ret.tv_nsec = ~ret.tv_nsec; + } + + ret.tv_nsec %= NSEC_MAX + 1; + return ret; +} + +static dword_t ts_to_nanos(const struct timespec ts) +{ + return (dword_t)ts.tv_sec * (NSEC_MAX + 1) + ts.tv_nsec; +} + +static inline int test_timespec_add(const struct example *example) +{ + const struct timespec ts = normalize_ts(example->ts); + const struct timespec inc = normalize_ts(example->inc); + const struct timespec actual = timespec_add(ts, inc); + const dword_t nanos = ts_to_nanos(ts) + ts_to_nanos(inc); + + if (nanos / (NSEC_MAX + 1) > TIME_MAX) { + assert(actual.tv_sec == TIME_MAX); + assert(actual.tv_nsec == NSEC_MAX); + } else { + assert(actual.tv_sec == (time_t)(nanos / (NSEC_MAX + 1))); + assert(actual.tv_nsec == (long)(nanos % (NSEC_MAX + 1))); + } + + return 0; +} + +TEST(test_timespec_add, examples) diff --git a/regressions/ck_ec/validate/prop_test_timeutil_add_ns.c b/regressions/ck_ec/validate/prop_test_timeutil_add_ns.c new file mode 100644 index 0000000..b62e1c7 --- /dev/null +++ b/regressions/ck_ec/validate/prop_test_timeutil_add_ns.c @@ -0,0 +1,88 @@ +#include <assert.h> + +#include "../../../src/ck_ec_timeutil.h" +#include "fuzz_harness.h" + +#if ULONG_MAX > 4294967295 +typedef unsigned __int128 dword_t; +#else +typedef uint64_t dword_t; +#endif + +struct example { + struct timespec ts; + uint32_t ns; +}; + +static const struct example examples[] = { + { + { + 42, + 100 + }, + 1 + }, + { + { + 42, + 100 + }, + 2 * NSEC_MAX + }, + { + { + 42, + NSEC_MAX + }, + NSEC_MAX + }, + { + { + TIME_MAX - 1, + 1000 + }, + 2 * NSEC_MAX + } +}; + +static inline int test_timespec_add_ns(const struct example *example) +{ + struct timespec ts = { + .tv_sec = example->ts.tv_sec, + .tv_nsec = example->ts.tv_nsec + }; + const uint32_t ns = example->ns; + + if (ts.tv_sec < 0) { + ts.tv_sec = ~ts.tv_sec; + } + + if (ts.tv_nsec < 0) { + ts.tv_nsec = ~ts.tv_nsec; + } + + ts.tv_nsec %= NSEC_MAX + 1; + + const struct timespec actual = timespec_add_ns(ts, ns); + + dword_t nanos = + (dword_t)ts.tv_sec * (NSEC_MAX + 1) + ts.tv_nsec; + + if (ns > NSEC_MAX) { + nanos += NSEC_MAX + 1; + } else { + nanos += ns; + } + + if (nanos / (NSEC_MAX + 1) > TIME_MAX) { + assert(actual.tv_sec == TIME_MAX); + assert(actual.tv_nsec == NSEC_MAX); + } else { + assert(actual.tv_sec == (time_t)(nanos / (NSEC_MAX + 1))); + assert(actual.tv_nsec == (long)(nanos % (NSEC_MAX + 1))); + } + + return 0; +} + +TEST(test_timespec_add_ns, examples) diff --git a/regressions/ck_ec/validate/prop_test_timeutil_cmp.c b/regressions/ck_ec/validate/prop_test_timeutil_cmp.c new file mode 100644 index 0000000..00e7b2e --- /dev/null +++ b/regressions/ck_ec/validate/prop_test_timeutil_cmp.c @@ -0,0 +1,99 @@ +#include <assert.h> + +#include "../../../src/ck_ec_timeutil.h" +#include "fuzz_harness.h" + +#if ULONG_MAX > 4294967295 +typedef __int128 dsword_t; +#else +typedef int64_t dsword_t; +#endif + +struct example { + struct timespec x; + struct timespec y; +}; + +static const struct example examples[] = { + { + { + 42, + 100 + }, + { + 1, + 2 + } + }, + { + { + 42, + 100 + }, + { + 1, + NSEC_MAX + } + }, + { + { + 42, + NSEC_MAX + }, + { + 0, + NSEC_MAX + } + }, + { + { + TIME_MAX - 1, + 1000 + }, + { + 2, + NSEC_MAX + } + } +}; + +static struct timespec normalize_ts(const struct timespec ts) +{ + struct timespec ret = ts; + + if (ret.tv_nsec < 0) { + ret.tv_nsec = ~ret.tv_nsec; + } + + ret.tv_nsec %= NSEC_MAX + 1; + return ret; +} + +static dsword_t ts_to_nanos(const struct timespec ts) +{ + return (dsword_t)ts.tv_sec * (NSEC_MAX + 1) + ts.tv_nsec; +} + +static inline int test_timespec_cmp(const struct example *example) +{ + const struct timespec x = normalize_ts(example->y); + const struct timespec y = normalize_ts(example->x); + const dsword_t x_nanos = ts_to_nanos(x); + const dsword_t y_nanos = ts_to_nanos(y); + + assert(timespec_cmp(x, x) == 0); + assert(timespec_cmp(y, y) == 0); + assert(timespec_cmp(x, y) == -timespec_cmp(y, x)); + + if (x_nanos == y_nanos) { + assert(timespec_cmp(x, y) == 0); + } else if (x_nanos < y_nanos) { + assert(timespec_cmp(x, y) == -1); + } else { + assert(timespec_cmp(x, y) == 1); + } + + return 0; +} + +TEST(test_timespec_cmp, examples) diff --git a/regressions/ck_ec/validate/prop_test_timeutil_scale.c b/regressions/ck_ec/validate/prop_test_timeutil_scale.c new file mode 100644 index 0000000..eb3040f --- /dev/null +++ b/regressions/ck_ec/validate/prop_test_timeutil_scale.c @@ -0,0 +1,41 @@ +#include <assert.h> + +#include "../../../src/ck_ec_timeutil.h" +#include "fuzz_harness.h" + +struct example { + uint32_t nsec; + uint32_t multiplier; + unsigned int shift; +}; + +static const struct example examples[] = { + { + UINT32_MAX, + UINT32_MAX, + 1 + }, + { + 10, + 20, + 0 + } +}; + +static inline int test_wait_time_scale(const struct example *example) +{ + const uint32_t nsec = example->nsec; + const uint32_t multiplier = example->multiplier; + const unsigned int shift = example->shift % 32; + uint32_t actual = wait_time_scale(nsec, multiplier, shift); + uint64_t expected = ((uint64_t)nsec * multiplier) >> shift; + + if (expected > UINT32_MAX) { + expected = UINT32_MAX; + } + + assert(actual == expected); + return 0; +} + +TEST(test_wait_time_scale, examples) diff --git a/regressions/ck_ec/validate/prop_test_value.c b/regressions/ck_ec/validate/prop_test_value.c new file mode 100644 index 0000000..8f9eab8 --- /dev/null +++ b/regressions/ck_ec/validate/prop_test_value.c @@ -0,0 +1,150 @@ +#include <assert.h> +#include <ck_ec.h> + +#include "fuzz_harness.h" + +static int gettime(const struct ck_ec_ops *, struct timespec *out); +static void wake32(const struct ck_ec_ops *, const uint32_t *); +static void wait32(const struct ck_ec_wait_state *, const uint32_t *, + uint32_t, const struct timespec *); +static void wake64(const struct ck_ec_ops *, const uint64_t *); +static void wait64(const struct ck_ec_wait_state *, const uint64_t *, + uint64_t, const struct timespec *); + +static const struct ck_ec_ops test_ops = { + .gettime = gettime, + .wait32 = wait32, + .wait64 = wait64, + .wake32 = wake32, + .wake64 = wake64 +}; + +static const struct ck_ec_mode modes[] = { + { + .single_producer = true, + .ops = &test_ops + }, + { + .single_producer = false, + .ops = &test_ops + }, +}; + +static int gettime(const struct ck_ec_ops *ops, struct timespec *out) +{ + (void)out; + + assert(ops == &test_ops); + return -1; +} + +static void wait32(const struct ck_ec_wait_state *wait_state, + const uint32_t *addr, uint32_t expected, + const struct timespec *deadline) +{ + (void)addr; + (void)expected; + (void)deadline; + + assert(wait_state->ops == &test_ops); + return; +} + +static void wait64(const struct ck_ec_wait_state *wait_state, + const uint64_t *addr, uint64_t expected, + const struct timespec *deadline) +{ + (void)addr; + (void)expected; + (void)deadline; + + assert(wait_state->ops == &test_ops); + return; +} + +static void wake32(const struct ck_ec_ops *ops, const uint32_t *addr) +{ + (void)addr; + + assert(ops == &test_ops); + return; +} + +static void wake64(const struct ck_ec_ops *ops, const uint64_t *addr) +{ + (void)addr; + + assert(ops == &test_ops); + return; +} + +/* + * Check that adding a value correctly updates the counter, and that + * incrementing after that also works. + */ +struct example { + uint64_t value[2]; +}; + +static const struct example examples[] = { + { { 0, 0 } }, + { { 1, 2 } }, + { { 0, INT32_MAX - 2 } }, + { { 0, INT32_MAX - 1 } }, + { { 0, INT32_MAX } }, + { { 0, INT64_MAX - 2 } }, + { { 0, INT64_MAX - 1 } }, + { { 0, INT64_MAX } }, +}; + +static inline int test_value(const struct example *example) +{ + for (size_t i = 0; i < 2; i++) { + const struct ck_ec_mode *mode = &modes[i]; + const uint32_t value0 = example->value[0] & INT32_MAX; + const uint32_t value1 = example->value[1] & INT32_MAX; + struct ck_ec32 ec; + + ck_ec32_init(&ec, 0); + assert(ck_ec32_value(&ec) == 0); + + ck_ec32_add(&ec, mode, value0); + assert(ck_ec32_value(&ec) == value0); + + ck_ec32_add(&ec, mode, value1); + assert(ck_ec32_value(&ec) == + ((value0 + value1) & INT32_MAX)); + + + ck_ec32_inc(&ec, mode); + assert(ck_ec32_value(&ec) == + ((value0 + value1 + 1) & INT32_MAX)); + } + +#ifdef CK_F_EC64 + for (size_t i = 0; i < 2; i++) { + const struct ck_ec_mode *mode = &modes[i]; + const uint64_t value0 = example->value[0] & INT64_MAX; + const uint64_t value1 = example->value[1] & INT64_MAX; + struct ck_ec64 ec; + + ck_ec64_init(&ec, 0); + assert(ck_ec64_value(&ec) == 0); + + ck_ec64_add(&ec, mode, value0); + assert(ck_ec64_value(&ec) == value0); + + ck_ec64_add(&ec, mode, value1); + assert(ck_ec64_value(&ec) == + ((value0 + value1) & INT64_MAX)); + + ck_ec64_inc(&ec, mode); + assert(ck_ec64_value(&ec) == + ((value0 + value1 + 1) & INT64_MAX)); + } +#endif /* CK_F_EC64 */ + + return 0; +} + +TEST(test_value, examples) diff --git a/regressions/ck_ec/validate/prop_test_wakeup.c b/regressions/ck_ec/validate/prop_test_wakeup.c new file mode 100644 index 0000000..a858e2b --- /dev/null +++ b/regressions/ck_ec/validate/prop_test_wakeup.c @@ -0,0 +1,193 @@ +#include <assert.h> +#include <ck_ec.h> +#include <ck_stdbool.h> + +#include "fuzz_harness.h" + +static int gettime(const struct ck_ec_ops *, struct timespec *out); +static void wake32(const struct ck_ec_ops *, const uint32_t *); +static void wait32(const struct ck_ec_wait_state *, const uint32_t *, + uint32_t, const struct timespec *); +static void wake64(const struct ck_ec_ops *, const uint64_t *); +static void wait64(const struct ck_ec_wait_state *, const uint64_t *, + uint64_t, const struct timespec *); + +static const struct ck_ec_ops test_ops = { + .gettime = gettime, + .wait32 = wait32, + .wait64 = wait64, + .wake32 = wake32, + .wake64 = wake64 +}; + +static const struct ck_ec_mode modes[] = { + { + .single_producer = true, + .ops = &test_ops + }, + { + .single_producer = false, + .ops = &test_ops + }, +}; + +static bool woken = false; + +static int gettime(const struct ck_ec_ops *ops, struct timespec *out) +{ + (void)out; + + assert(ops == &test_ops); + return -1; +} + +static void wait32(const struct ck_ec_wait_state *state, const uint32_t *addr, + uint32_t expected, const struct timespec *deadline) +{ + (void)addr; + (void)expected; + (void)deadline; + + assert(state->ops == &test_ops); + return; +} + +static void wait64(const struct ck_ec_wait_state *state, const uint64_t *addr, + uint64_t expected, const struct timespec *deadline) +{ + (void)addr; + (void)expected; + (void)deadline; + + assert(state->ops == &test_ops); + return; +} + +static void wake32(const struct ck_ec_ops *ops, const uint32_t *addr) +{ + (void)addr; + + assert(ops == &test_ops); + woken = true; + return; +} + +static void wake64(const struct ck_ec_ops *ops, const uint64_t *addr) +{ + (void)addr; + + assert(ops == &test_ops); + woken = true; + return; +} + +/* + * Check that adding a value calls the wake function when the sign bit + * is set, and does not call it when the sign bit is unset (modulo + * wrap-around). + */ +struct example { + uint64_t initial; + uint64_t increment; +}; + +const struct example examples[] = { + { INT32_MAX, 0 }, + { INT32_MAX, 1 }, + { 0 + (0U << 31), 0 }, + { 1 + (0U << 31), 0 }, + { 0 + (1U << 31), 0 }, + { 1 + (1U << 31), 0 }, + + { 0 + (0U << 31), 1 }, + { 1 + (0U << 31), 1 }, + { 0 + (1U << 31), 1 }, + { 1 + (1U << 31), 1 }, + + { 0 + (0U << 31), INT32_MAX }, + { 1 + (0U << 31), INT32_MAX }, + { 0 + (1U << 31), INT32_MAX }, + { 1 + (1U << 31), INT32_MAX }, + + { INT64_MAX, 0 }, + { INT64_MAX, 1 }, + { 0 + (0ULL << 63), 0 }, + { 1 + (0ULL << 63), 0 }, + { 0 + (1ULL << 63), 0 }, + { 1 + (1ULL << 63), 0 }, + + { 0 + (0ULL << 63), 1 }, + { 1 + (0ULL << 63), 1 }, + { 0 + (1ULL << 63), 1 }, + { 1 + (1ULL << 63), 1 }, + + { 0 + (0ULL << 63), INT64_MAX }, + { 1 + (0ULL << 63), INT64_MAX }, + { 0 + (1ULL << 63), INT64_MAX }, + { 1 + (1ULL << 63), INT64_MAX }, +}; + +static inline int test_wakeup(const struct example *example) +{ + for (size_t i = 0; i < 2; i++) { + const struct ck_ec_mode *mode = &modes[i]; + const uint32_t increment = example->increment & INT32_MAX; + struct ck_ec32 ec; + bool should_wake; + bool may_wake; + + ec.counter = example->initial; + should_wake = increment != 0 && (ec.counter & (1U << 31)); + may_wake = should_wake || (ec.counter & (1U << 31)); + + woken = false; + ck_ec32_add(&ec, mode, increment); + assert(!should_wake || woken); + assert(may_wake || !woken); + assert(!woken || ck_ec32_has_waiters(&ec) == false); + + /* Test inc now. */ + ec.counter = example->initial + increment; + should_wake = ec.counter & (1U << 31); + may_wake = should_wake || ((ec.counter + 1) & (1U << 31)); + + woken = false; + ck_ec32_inc(&ec, mode); + assert(!should_wake || woken); + assert(may_wake || !woken); + assert(!woken || ck_ec32_has_waiters(&ec) == false); + } + +#ifdef CK_F_EC64 + for (size_t i = 0; i < 2; i++) { + const struct ck_ec_mode *mode = &modes[i]; + const uint64_t increment = example->increment & INT64_MAX; + struct ck_ec64 ec; + bool should_wake; + bool may_wake; + + ec.counter = example->initial; + should_wake = increment != 0 && (ec.counter & 1); + may_wake = should_wake || (ec.counter & 1); + + woken = false; + ck_ec64_add(&ec, mode, increment); + assert(!should_wake || woken); + assert(may_wake || !woken); + assert(!woken || ck_ec64_has_waiters(&ec) == false); + + /* Test inc now. */ + ec.counter = example->initial + increment; + should_wake = ec.counter & 1; + + woken = false; + ck_ec64_inc(&ec, mode); + assert(should_wake == woken); + assert(!woken || ck_ec64_has_waiters(&ec) == false); + } +#endif /* CK_F_EC64 */ + + return 0; +} + +TEST(test_wakeup, examples) diff --git a/regressions/ck_epoch/validate/ck_epoch_call.c b/regressions/ck_epoch/validate/ck_epoch_call.c index 29e0df8..1c274e0 100644 --- a/regressions/ck_epoch/validate/ck_epoch_call.c +++ b/regressions/ck_epoch/validate/ck_epoch_call.c @@ -37,6 +37,7 @@ static void cb(ck_epoch_entry_t *p) { + /* Test that we can reregister the callback. */ if (counter == 0) ck_epoch_call(&record[1], p, cb); @@ -50,15 +51,22 @@ int main(void) { ck_epoch_entry_t entry; + ck_epoch_entry_t another; - ck_epoch_register(&epoch, &record[0]); - ck_epoch_register(&epoch, &record[1]); + ck_epoch_register(&epoch, &record[0], NULL); + ck_epoch_register(&epoch, &record[1], NULL); ck_epoch_call(&record[1], &entry, cb); ck_epoch_barrier(&record[1]); ck_epoch_barrier(&record[1]); - if (counter != 2) - ck_error("Expected counter value 2, read %u.\n", counter); + + /* Make sure that strict works. */ + ck_epoch_call_strict(&record[1], &entry, cb); + ck_epoch_call_strict(&record[1], &another, cb); + ck_epoch_barrier(&record[1]); + + if (counter != 4) + ck_error("Expected counter value 4, read %u.\n", counter); return 0; } diff --git a/regressions/ck_epoch/validate/ck_epoch_poll.c b/regressions/ck_epoch/validate/ck_epoch_poll.c index aec6dd0..6f782ee 100644 --- a/regressions/ck_epoch/validate/ck_epoch_poll.c +++ b/regressions/ck_epoch/validate/ck_epoch_poll.c @@ -86,10 +86,14 @@ static void * read_thread(void *unused CK_CC_UNUSED) { unsigned int j; - ck_epoch_record_t record CK_CC_CACHELINE; + ck_epoch_record_t *record CK_CC_CACHELINE; ck_stack_entry_t *cursor, *n; - ck_epoch_register(&stack_epoch, &record); + record = malloc(sizeof *record); + if (record == NULL) + ck_error("record allocation failure"); + + ck_epoch_register(&stack_epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -108,7 +112,7 @@ read_thread(void *unused CK_CC_UNUSED) j = 0; for (;;) { - ck_epoch_begin(&record, NULL); + ck_epoch_begin(record, NULL); CK_STACK_FOREACH(&stack, cursor) { if (cursor == NULL) continue; @@ -116,7 +120,7 @@ read_thread(void *unused CK_CC_UNUSED) n = CK_STACK_NEXT(cursor); j += ck_pr_load_ptr(&n) != NULL; } - ck_epoch_end(&record, NULL); + ck_epoch_end(record, NULL); if (j != 0 && ck_pr_load_uint(&readers) == 0) ck_pr_store_uint(&readers, 1); @@ -138,10 +142,13 @@ write_thread(void *unused CK_CC_UNUSED) { struct node **entry, *e; unsigned int i, j, tid; - ck_epoch_record_t record; + ck_epoch_record_t *record; ck_stack_entry_t *s; - ck_epoch_register(&stack_epoch, &record); + record = malloc(sizeof *record); + if (record == NULL) + ck_error("record allocation failure"); + ck_epoch_register(&stack_epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -178,23 +185,23 @@ write_thread(void *unused CK_CC_UNUSED) } for (i = 0; i < PAIRS_S; i++) { - ck_epoch_begin(&record, NULL); + ck_epoch_begin(record, NULL); s = ck_stack_pop_upmc(&stack); e = stack_container(s); - ck_epoch_end(&record, NULL); + ck_epoch_end(record, NULL); - ck_epoch_call(&record, &e->epoch_entry, destructor); - ck_epoch_poll(&record); + ck_epoch_call(record, &e->epoch_entry, destructor); + ck_epoch_poll(record); } } - ck_epoch_barrier(&record); + ck_epoch_barrier(record); if (tid == 0) { - fprintf(stderr, "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[W] Peak: %u (%2.2f%%)\n Reclamations: %lu\n\n", - record.n_peak, - (double)record.n_peak / ((double)PAIRS_S * ITERATE_S) * 100, - record.n_dispatch); + fprintf(stderr, "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b[W] Peak: %u (%2.2f%%)\n Reclamations: %u\n\n", + record->n_peak, + (double)record->n_peak / ((double)PAIRS_S * ITERATE_S) * 100, + record->n_dispatch); } ck_pr_inc_uint(&e_barrier); diff --git a/regressions/ck_epoch/validate/ck_epoch_section.c b/regressions/ck_epoch/validate/ck_epoch_section.c index 12bcca1..7b76d1c 100644 --- a/regressions/ck_epoch/validate/ck_epoch_section.c +++ b/regressions/ck_epoch/validate/ck_epoch_section.c @@ -46,8 +46,8 @@ setup_test(void) { ck_epoch_init(&epc); - ck_epoch_register(&epc, &record); - ck_epoch_register(&epc, &record2); + ck_epoch_register(&epc, &record, NULL); + ck_epoch_register(&epc, &record2, NULL); cleanup_calls = 0; return; @@ -88,7 +88,8 @@ test_simple_read_section(void) ck_epoch_begin(&record, §ion); ck_epoch_call(&record, &entry, cleanup); assert(cleanup_calls == 0); - ck_epoch_end(&record, §ion); + if (ck_epoch_end(&record, §ion) == false) + ck_error("expected no more sections"); ck_epoch_barrier(&record); assert(cleanup_calls == 1); @@ -157,7 +158,7 @@ reader_work(void *arg) ck_epoch_section_t section; struct obj *o; - ck_epoch_register(&epc, &local_record); + ck_epoch_register(&epc, &local_record, NULL); o = (struct obj *)arg; diff --git a/regressions/ck_epoch/validate/ck_epoch_section_2.c b/regressions/ck_epoch/validate/ck_epoch_section_2.c index aed3661..dcb3fd0 100644 --- a/regressions/ck_epoch/validate/ck_epoch_section_2.c +++ b/regressions/ck_epoch/validate/ck_epoch_section_2.c @@ -64,7 +64,7 @@ read_thread(void *unused CK_CC_UNUSED) record = malloc(sizeof *record); assert(record != NULL); - ck_epoch_register(&epoch, record); + ck_epoch_register(&epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -110,11 +110,14 @@ read_thread(void *unused CK_CC_UNUSED) } ck_epoch_begin(record, §ion[1]); - - assert(section[0].bucket != section[1].bucket); + if (section[0].bucket == section[1].bucket) { + ck_error("%u == %u\n", + section[0].bucket, section[1].bucket); + } ck_epoch_end(record, §ion[0]); - assert(ck_pr_load_uint(&record->active) > 0); + if (ck_pr_load_uint(&record->active) == 0) + ck_error("active: %u\n", record->active); if (ck_pr_load_uint(&leave) == 1) { ck_epoch_end(record, §ion[1]); @@ -130,10 +133,14 @@ read_thread(void *unused CK_CC_UNUSED) static void * write_thread(void *unused CK_CC_UNUSED) { - ck_epoch_record_t record; + ck_epoch_record_t *record; unsigned long iterations = 0; - ck_epoch_register(&epoch, &record); + record = malloc(sizeof *record); + if (record == NULL) + ck_error("record allocation failure"); + + ck_epoch_register(&epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -147,7 +154,7 @@ write_thread(void *unused CK_CC_UNUSED) if (!(iterations % 1048575)) fprintf(stderr, "."); - ck_epoch_synchronize(&record); + ck_epoch_synchronize(record); iterations++; if (ck_pr_load_uint(&leave) == 1) diff --git a/regressions/ck_epoch/validate/ck_epoch_synchronize.c b/regressions/ck_epoch/validate/ck_epoch_synchronize.c index a03a4f7..67e23a3 100644 --- a/regressions/ck_epoch/validate/ck_epoch_synchronize.c +++ b/regressions/ck_epoch/validate/ck_epoch_synchronize.c @@ -86,12 +86,15 @@ static void * read_thread(void *unused CK_CC_UNUSED) { unsigned int j; - ck_epoch_record_t record CK_CC_CACHELINE; + ck_epoch_record_t *record CK_CC_CACHELINE; ck_stack_entry_t *cursor; ck_stack_entry_t *n; unsigned int i; - ck_epoch_register(&stack_epoch, &record); + record = malloc(sizeof *record); + if (record == NULL) + ck_error("record allocation failure"); + ck_epoch_register(&stack_epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -112,7 +115,7 @@ read_thread(void *unused CK_CC_UNUSED) for (;;) { i = 0; - ck_epoch_begin(&record, NULL); + ck_epoch_begin(record, NULL); CK_STACK_FOREACH(&stack, cursor) { if (cursor == NULL) continue; @@ -123,7 +126,7 @@ read_thread(void *unused CK_CC_UNUSED) if (i++ > 4098) break; } - ck_epoch_end(&record, NULL); + ck_epoch_end(record, NULL); if (j != 0 && ck_pr_load_uint(&readers) == 0) ck_pr_store_uint(&readers, 1); @@ -145,10 +148,13 @@ write_thread(void *unused CK_CC_UNUSED) { struct node **entry, *e; unsigned int i, j, tid; - ck_epoch_record_t record; + ck_epoch_record_t *record; ck_stack_entry_t *s; - ck_epoch_register(&stack_epoch, &record); + record = malloc(sizeof *record); + if (record == NULL) + ck_error("record allocation failure"); + ck_epoch_register(&stack_epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -180,17 +186,17 @@ write_thread(void *unused CK_CC_UNUSED) ck_pr_stall(); for (i = 0; i < PAIRS_S; i++) { - ck_epoch_begin(&record, NULL); + ck_epoch_begin(record, NULL); s = ck_stack_pop_upmc(&stack); e = stack_container(s); - ck_epoch_end(&record, NULL); + ck_epoch_end(record, NULL); if (i & 1) { - ck_epoch_synchronize(&record); - ck_epoch_reclaim(&record); - ck_epoch_call(&record, &e->epoch_entry, destructor); + ck_epoch_synchronize(record); + ck_epoch_reclaim(record); + ck_epoch_call(record, &e->epoch_entry, destructor); } else { - ck_epoch_barrier(&record); + ck_epoch_barrier(record); destructor(&e->epoch_entry); } @@ -201,13 +207,13 @@ write_thread(void *unused CK_CC_UNUSED) } } - ck_epoch_synchronize(&record); + ck_epoch_synchronize(record); if (tid == 0) { - fprintf(stderr, "[W] Peak: %u (%2.2f%%)\n Reclamations: %lu\n\n", - record.n_peak, - (double)record.n_peak / ((double)PAIRS_S * ITERATE_S) * 100, - record.n_dispatch); + fprintf(stderr, "[W] Peak: %u (%2.2f%%)\n Reclamations: %u\n\n", + record->n_peak, + (double)record->n_peak / ((double)PAIRS_S * ITERATE_S) * 100, + record->n_dispatch); } ck_pr_inc_uint(&e_barrier); diff --git a/regressions/ck_epoch/validate/ck_stack.c b/regressions/ck_epoch/validate/ck_stack.c index fc50228..6d493e1 100644 --- a/regressions/ck_epoch/validate/ck_stack.c +++ b/regressions/ck_epoch/validate/ck_stack.c @@ -81,7 +81,7 @@ thread(void *unused CK_CC_UNUSED) unsigned long smr = 0; unsigned int i; - ck_epoch_register(&stack_epoch, &record); + ck_epoch_register(&stack_epoch, &record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -118,7 +118,7 @@ thread(void *unused CK_CC_UNUSED) while (ck_pr_load_uint(&e_barrier) < n_threads); fprintf(stderr, "Deferrals: %lu (%2.2f)\n", smr, (double)smr / PAIRS); - fprintf(stderr, "Peak: %u (%2.2f%%), %u pending\nReclamations: %lu\n\n", + fprintf(stderr, "Peak: %u (%2.2f%%), %u pending\nReclamations: %u\n\n", record.n_peak, (double)record.n_peak / PAIRS * 100, record.n_pending, diff --git a/regressions/ck_epoch/validate/torture.c b/regressions/ck_epoch/validate/torture.c index ce3c049..f49d412 100644 --- a/regressions/ck_epoch/validate/torture.c +++ b/regressions/ck_epoch/validate/torture.c @@ -31,8 +31,8 @@ #include <unistd.h> #include <ck_cc.h> #include <ck_pr.h> +#include <inttypes.h> #include <stdbool.h> -#include <stddef.h> #include <string.h> #include <ck_epoch.h> #include <ck_stack.h> @@ -119,7 +119,7 @@ read_thread(void *unused CK_CC_UNUSED) record = malloc(sizeof *record); assert(record != NULL); - ck_epoch_register(&epoch, record); + ck_epoch_register(&epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -147,10 +147,11 @@ write_thread(void *unused CK_CC_UNUSED) ck_epoch_record_t *record; unsigned long iterations = 0; bool c = ck_pr_faa_uint(&first, 1); + uint64_t ac = 0; record = malloc(sizeof *record); assert(record != NULL); - ck_epoch_register(&epoch, record); + ck_epoch_register(&epoch, record, NULL); if (aff_iterate(&a)) { perror("ERROR: failed to affine thread"); @@ -160,6 +161,12 @@ write_thread(void *unused CK_CC_UNUSED) ck_pr_inc_uint(&barrier); while (ck_pr_load_uint(&barrier) < n_threads); +#define CK_EPOCH_S do { \ + uint64_t _s = rdtsc(); \ + ck_epoch_synchronize(record); \ + ac += rdtsc() - _s; \ +} while (0) + do { /* * A thread should never observe invalid.value > valid.value. @@ -167,33 +174,34 @@ write_thread(void *unused CK_CC_UNUSED) * invalid.value <= valid.value is valid. */ if (!c) ck_pr_store_uint(&valid.value, 1); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 1); ck_pr_fence_store(); if (!c) ck_pr_store_uint(&valid.value, 2); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 2); ck_pr_fence_store(); if (!c) ck_pr_store_uint(&valid.value, 3); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 3); ck_pr_fence_store(); if (!c) ck_pr_store_uint(&valid.value, 4); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 4); - ck_epoch_synchronize(record); + CK_EPOCH_S; if (!c) ck_pr_store_uint(&invalid.value, 0); - ck_epoch_synchronize(record); + CK_EPOCH_S; - iterations += 4; + iterations += 6; } while (ck_pr_load_uint(&leave) == 0 && ck_pr_load_uint(&n_rd) > 0); fprintf(stderr, "%lu iterations\n", iterations); + fprintf(stderr, "%" PRIu64 " average latency\n", ac / iterations); return NULL; } diff --git a/regressions/ck_hp/validate/ck_hp_fifo.c b/regressions/ck_hp/validate/ck_hp_fifo.c index 4454283..5820f1a 100644 --- a/regressions/ck_hp/validate/ck_hp_fifo.c +++ b/regressions/ck_hp/validate/ck_hp_fifo.c @@ -55,6 +55,7 @@ static struct affinity a; static int size; static unsigned int barrier; static unsigned int e_barrier; +static unsigned int s_barrier; static void * test(void *c) @@ -98,6 +99,9 @@ test(void *c) } } + ck_pr_inc_uint(&s_barrier); + while (ck_pr_load_uint(&s_barrier) < (unsigned int)nthr); + for (i = 0; i < ITERATIONS; i++) { for (j = 0; j < size; j++) { fifo_entry = malloc(sizeof(ck_hp_fifo_entry_t)); diff --git a/regressions/ck_hs/benchmark/apply.c b/regressions/ck_hs/benchmark/apply.c index ca4a3da..e8b2294 100644 --- a/regressions/ck_hs/benchmark/apply.c +++ b/regressions/ck_hs/benchmark/apply.c @@ -6,9 +6,9 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyrighs + * 1. Redistributions of source code must retain the above copyrights * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyrighs + * 2. Redistributions in binary form must reproduce the above copyrights * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * diff --git a/regressions/ck_hs/benchmark/parallel_bytestring.c b/regressions/ck_hs/benchmark/parallel_bytestring.c index 6d38379..3275b05 100644 --- a/regressions/ck_hs/benchmark/parallel_bytestring.c +++ b/regressions/ck_hs/benchmark/parallel_bytestring.c @@ -5,9 +5,9 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyrighs + * 1. Redistributions of source code must retain the above copyrights * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyrighs + * 2. Redistributions in binary form must reproduce the above copyrights * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * @@ -147,7 +147,7 @@ set_init(void) #endif ck_epoch_init(&epoch_hs); - ck_epoch_register(&epoch_hs, &epoch_wr); + ck_epoch_register(&epoch_hs, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_hs_init(&hs, mode, hs_hash, hs_compare, &my_allocator, 65536, common_lrand48()) == false) { perror("ck_hs_init"); @@ -234,7 +234,7 @@ reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_hs, &epoch_record); + ck_epoch_register(&epoch_hs, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -454,8 +454,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -593,8 +593,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/regressions/ck_hs/benchmark/serial.c b/regressions/ck_hs/benchmark/serial.c index ac4caff..5b4cd50 100644 --- a/regressions/ck_hs/benchmark/serial.c +++ b/regressions/ck_hs/benchmark/serial.c @@ -5,9 +5,9 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyrighs + * 1. Redistributions of source code must retain the above copyrights * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyrighs + * 2. Redistributions in binary form must reproduce the above copyrights * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * diff --git a/regressions/ck_hs/validate/serial.c b/regressions/ck_hs/validate/serial.c index a16fc82..634924a 100644 --- a/regressions/ck_hs/validate/serial.c +++ b/regressions/ck_hs/validate/serial.c @@ -5,9 +5,9 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyrighs + * 1. Redistributions of source code must retain the above copyrights * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyrighs + * 2. Redistributions in binary form must reproduce the above copyrights * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * @@ -57,12 +57,28 @@ static struct ck_malloc my_allocator = { .free = hs_free }; +static void +stub_free(void *p, size_t b, bool r) +{ + + (void)b; + (void)r; + + fprintf(stderr, "Ignoring reclamation of %p\n", p); + return; +} + +static struct ck_malloc stub_allocator = { + .malloc = hs_malloc, + .free = stub_free +}; + const char *test[] = { "Samy", "Al", "Bahra", "dances", "in", "the", "wind.", "Once", - "upon", "a", "time", "his", "gypsy", "ate", "one", "itsy", - "bitsy", "spider.", "What", "goes", "up", "must", - "come", "down.", "What", "is", "down", "stays", - "down.", "A", "B", "C", "D", "E", "F", "G", "H", - "I", "J", "K", "L", "M", "N", "O", "P", "Q" }; + "upon", "a", "time", "his", "gypsy", "ate", "one", "itsy", + "bitsy", "spider.", "What", "goes", "up", "must", + "come", "down.", "What", "is", "down", "stays", + "down.", "A", "B", "C", "D", "E", "F", "G", "H", + "I", "J", "K", "L", "M", "N", "O", "P", "Q" }; const char *negative = "negative"; @@ -136,13 +152,21 @@ run_test(unsigned int is, unsigned int ad) size_t i, j; const char *blob = "#blobs"; unsigned long h; + ck_hs_iterator_t it; if (ck_hs_init(&hs[0], CK_HS_MODE_SPMC | CK_HS_MODE_OBJECT | ad, hs_hash, hs_compare, &my_allocator, is, 6602834) == false) ck_error("ck_hs_init\n"); for (j = 0; j < size; j++) { for (i = 0; i < sizeof(test) / sizeof(*test); i++) { - h = test[i][0]; + unsigned long h_1; + + h = CK_HS_HASH(&hs[j], hs_hash, test[i]); + h_1 = ck_hs_hash(&hs[j], test[i]); + + if (h != h_1) + ck_error("h != h_1 (%lu != %lu)\n", h, h_1); + if (ck_hs_get(&hs[j], h, test[i]) != NULL) { continue; } @@ -181,6 +205,58 @@ run_test(unsigned int is, unsigned int ad) } } + /* Test iteration */ + if (j == 0) { + /* Avoid the blob stuff as it's not in the test array. */ + ck_hs_iterator_init(&it); + void *k = NULL; + int matches = 0; + int entries = 0; + while (ck_hs_next(&hs[j], &it, &k) == true) { + entries++; + for (i = 0; i < sizeof(test) / sizeof(*test); i++) { + int x = strcmp(test[i], (char *)k); + if (x == 0) { + matches++; + break; + } + } + } + + if (entries != matches) { + ck_error("Iteration must match all elements, has: %d, matched: %d [%d]", entries, matches, is); + } + + /* + * Now test iteration in the face of grows (spmc). + * In order to test usage after reclamation, we + * stub the allocator. + */ + ck_hs_iterator_init(&it); + k = NULL; + matches = 0; + entries = 0; + hs[j].m = &stub_allocator; + while (ck_hs_next_spmc(&hs[j], &it, &k) == true) { + entries++; + for (i = 0; i < sizeof(test) / sizeof(*test); i++) { + int x = strcmp(test[i], (char *)k); + if (x == 0) { + matches++; + break; + } + } + if (entries == 20) { + ck_hs_grow(&hs[j], 128); + } + } + hs[j].m = &my_allocator; + + if (entries != matches) { + ck_error("After growth, iteration must match all elements, has: %d, matched: %d [%d]", entries, matches, is); + } + } + /* Test grow semantics. */ ck_hs_grow(&hs[j], 128); for (i = 0; i < sizeof(test) / sizeof(*test); i++) { diff --git a/regressions/ck_ht/benchmark/parallel_bytestring.c b/regressions/ck_ht/benchmark/parallel_bytestring.c index f3d3854..bb8f462 100644 --- a/regressions/ck_ht/benchmark/parallel_bytestring.c +++ b/regressions/ck_ht/benchmark/parallel_bytestring.c @@ -132,7 +132,7 @@ table_init(void) #endif ck_epoch_init(&epoch_ht); - ck_epoch_register(&epoch_ht, &epoch_wr); + ck_epoch_register(&epoch_ht, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_ht_init(&ht, mode, NULL, &my_allocator, 8, common_lrand48()) == false) { perror("ck_ht_init"); @@ -221,7 +221,7 @@ reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_ht, &epoch_record); + ck_epoch_register(&epoch_ht, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -426,8 +426,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -551,8 +551,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/regressions/ck_ht/benchmark/parallel_direct.c b/regressions/ck_ht/benchmark/parallel_direct.c index 195bb25..de1d12e 100644 --- a/regressions/ck_ht/benchmark/parallel_direct.c +++ b/regressions/ck_ht/benchmark/parallel_direct.c @@ -136,7 +136,7 @@ table_init(void) { ck_epoch_init(&epoch_ht); - ck_epoch_register(&epoch_ht, &epoch_wr); + ck_epoch_register(&epoch_ht, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_ht_init(&ht, CK_HT_MODE_DIRECT, hash_function, &my_allocator, 8, common_lrand48()) == false) { perror("ck_ht_init"); @@ -221,7 +221,7 @@ ht_reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_ht, &epoch_record); + ck_epoch_register(&epoch_ht, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -412,8 +412,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -537,8 +537,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/regressions/ck_pr/benchmark/Makefile b/regressions/ck_pr/benchmark/Makefile index 55183d8..3d2654d 100644 --- a/regressions/ck_pr/benchmark/Makefile +++ b/regressions/ck_pr/benchmark/Makefile @@ -1,6 +1,8 @@ .PHONY: clean -all: ck_pr_cas_64 ck_pr_fas_64 ck_pr_cas_64_2 ck_pr_add_64 ck_pr_faa_64 ck_pr_neg_64 fp +OBJECTS=ck_pr_cas_64 ck_pr_fas_64 ck_pr_cas_64_2 ck_pr_add_64 ck_pr_faa_64 ck_pr_neg_64 fp + +all: $(OBJECTS) fp: fp.c $(CC) $(CFLAGS) -o fp fp.c @@ -24,8 +26,7 @@ ck_pr_neg_64: ck_pr_neg_64.c $(CC) $(CFLAGS) -o ck_pr_neg_64 ck_pr_neg_64.c -lm clean: - rm -rf ck_pr_cas_64 ck_pr_fas_64 ck_pr_cas_64_2 ck_pr_add_64 \ - ck_pr_faa_64 ck_pr_neg_64 *.dSYM *.exe + rm -rf *.dSYM *.exe *.o $(OBJECTS) include ../../../build/regressions.build CFLAGS+=$(PTHREAD_CFLAGS) -D_GNU_SOURCE diff --git a/regressions/ck_pr/validate/Makefile b/regressions/ck_pr/validate/Makefile index 9e4a82d..11f1b8d 100644 --- a/regressions/ck_pr/validate/Makefile +++ b/regressions/ck_pr/validate/Makefile @@ -4,7 +4,7 @@ OBJECTS=ck_pr_cas ck_pr_faa ck_pr_inc ck_pr_dec ck_pr_bts \ ck_pr_btr ck_pr_btc ck_pr_load ck_pr_store \ ck_pr_and ck_pr_or ck_pr_xor ck_pr_add ck_pr_sub \ ck_pr_fas ck_pr_bin ck_pr_btx ck_pr_fax ck_pr_n \ - ck_pr_unary + ck_pr_unary ck_pr_fence ck_pr_dec_zero ck_pr_inc_zero all: $(OBJECTS) @@ -20,12 +20,21 @@ ck_pr_cas: ck_pr_cas.c ck_pr_inc: ck_pr_inc.c $(CC) $(CFLAGS) -o ck_pr_inc ck_pr_inc.c +ck_pr_inc_zero: ck_pr_inc_zero.c + $(CC) $(CFLAGS) -o ck_pr_inc_zero ck_pr_inc_zero.c + ck_pr_dec: ck_pr_dec.c $(CC) $(CFLAGS) -o ck_pr_dec ck_pr_dec.c +ck_pr_dec_zero: ck_pr_dec_zero.c + $(CC) $(CFLAGS) -o ck_pr_dec_zero ck_pr_dec_zero.c + ck_pr_faa: ck_pr_faa.c $(CC) $(CFLAGS) -o ck_pr_faa ck_pr_faa.c +ck_pr_fence: ck_pr_fence.c + $(CC) $(CFLAGS) -o ck_pr_fence ck_pr_fence.c + ck_pr_btc: ck_pr_btc.c $(CC) $(CFLAGS) -o ck_pr_btc ck_pr_btc.c diff --git a/regressions/ck_pr/validate/ck_pr_dec_zero.c b/regressions/ck_pr/validate/ck_pr_dec_zero.c new file mode 100644 index 0000000..0f3e85f --- /dev/null +++ b/regressions/ck_pr/validate/ck_pr_dec_zero.c @@ -0,0 +1,105 @@ +#include <inttypes.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> + +#include <ck_pr.h> + +#define EXPECT(ACTUAL, IS_ZERO, TYPE, INITIAL) do { \ + TYPE expected = (TYPE)((TYPE)INITIAL - (TYPE)1); \ + if ((ACTUAL) != expected) { \ + printf("FAIL [ %" PRIx64" != %" PRIx64" ]\n", \ + (uint64_t)(ACTUAL), \ + (uint64_t)expected); \ + exit(EXIT_FAILURE); \ + } \ + \ + if ((IS_ZERO) != ((ACTUAL) == 0)) { \ + printf("FAIL [ %s != %s ]\n", \ + ((IS_ZERO) ? "true" : "false"), \ + (((ACTUAL) == 0) ? "true" : "false")); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define TEST_ZERO(TYPE, SUFFIX) do { \ + TYPE datum; \ + bool is_zero; \ + \ + datum = 0; \ + ck_pr_dec_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, 0); \ + \ + datum = (TYPE)-1; \ + ck_pr_dec_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, -1); \ + \ + datum = (TYPE)1; \ + ck_pr_dec_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, 1); \ + \ + datum = (TYPE)2; \ + ck_pr_dec_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, 2); \ + } while (0) + +#define TEST_IS_ZERO(TYPE, SUFFIX) do { \ + TYPE datum; \ + bool is_zero; \ + \ + datum = 0; \ + is_zero = ck_pr_dec_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, 0); \ + \ + datum = (TYPE)-1; \ + is_zero = ck_pr_dec_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, -1); \ + \ + datum = (TYPE)1; \ + is_zero = ck_pr_dec_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, 1); \ + \ + datum = (TYPE)2; \ + is_zero = ck_pr_dec_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, 2); \ + } while (0) + +#define TEST(TYPE, SUFFIX) do { \ + TEST_ZERO(TYPE, SUFFIX); \ + TEST_IS_ZERO(TYPE, SUFFIX); \ +} while (0) + +int +main(void) +{ + +#ifdef CK_F_PR_DEC_64_ZERO + TEST(uint64_t, 64); +#endif + +#ifdef CK_F_PR_DEC_32_ZERO + TEST(uint32_t, 32); +#endif + +#ifdef CK_F_PR_DEC_16_ZERO + TEST(uint16_t, 16); +#endif + +#ifdef CK_F_PR_DEC_8_ZERO + TEST(uint8_t, 8); +#endif + +#ifdef CK_F_PR_DEC_UINT_ZERO + TEST(unsigned int, uint); +#endif + +#ifdef CK_F_PR_DEC_INT_ZERO + TEST(int, int); +#endif + +#ifdef CK_F_PR_DEC_CHAR_ZERO + TEST(char, char); +#endif + + return (0); +} diff --git a/regressions/ck_pr/validate/ck_pr_fence.c b/regressions/ck_pr/validate/ck_pr_fence.c new file mode 100644 index 0000000..976a184 --- /dev/null +++ b/regressions/ck_pr/validate/ck_pr_fence.c @@ -0,0 +1,80 @@ +/* + * Copyright 2009-2018 Samy Al Bahra. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <ck_pr.h> +#include "../../common.h" + +int +main(void) +{ + int r = 0; + + /* Below serves as a marker. */ + ck_pr_sub_int(&r, 31337); + + /* + * This is a simple test to help ensure all fences compile or crash + * on target. Below are generated according to the underlying memory + * model's ordering. + */ + ck_pr_fence_atomic(); + ck_pr_fence_atomic_store(); + ck_pr_fence_atomic_load(); + ck_pr_fence_store_atomic(); + ck_pr_fence_load_atomic(); + ck_pr_fence_load(); + ck_pr_fence_load_store(); + ck_pr_fence_store(); + ck_pr_fence_store_load(); + ck_pr_fence_memory(); + ck_pr_fence_release(); + ck_pr_fence_acquire(); + ck_pr_fence_acqrel(); + ck_pr_fence_lock(); + ck_pr_fence_unlock(); + + /* Below serves as a marker. */ + ck_pr_sub_int(&r, 31337); + + /* The following are generating assuming RMO. */ + ck_pr_fence_strict_atomic(); + ck_pr_fence_strict_atomic_store(); + ck_pr_fence_strict_atomic_load(); + ck_pr_fence_strict_store_atomic(); + ck_pr_fence_strict_load_atomic(); + ck_pr_fence_strict_load(); + ck_pr_fence_strict_load_store(); + ck_pr_fence_strict_store(); + ck_pr_fence_strict_store_load(); + ck_pr_fence_strict_memory(); + ck_pr_fence_strict_release(); + ck_pr_fence_strict_acquire(); + ck_pr_fence_strict_acqrel(); + ck_pr_fence_strict_lock(); + ck_pr_fence_strict_unlock(); + return 0; +} + diff --git a/regressions/ck_pr/validate/ck_pr_inc_zero.c b/regressions/ck_pr/validate/ck_pr_inc_zero.c new file mode 100644 index 0000000..e74ffba --- /dev/null +++ b/regressions/ck_pr/validate/ck_pr_inc_zero.c @@ -0,0 +1,105 @@ +#include <inttypes.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> + +#include <ck_pr.h> + +#define EXPECT(ACTUAL, IS_ZERO, TYPE, INITIAL) do { \ + TYPE expected = (TYPE)((TYPE)INITIAL + (TYPE)1); \ + if ((ACTUAL) != expected) { \ + printf("FAIL [ %" PRIx64" != %" PRIx64" ]\n", \ + (uint64_t)(ACTUAL), \ + (uint64_t)expected); \ + exit(EXIT_FAILURE); \ + } \ + \ + if ((IS_ZERO) != ((ACTUAL) == 0)) { \ + printf("FAIL [ %s != %s ]\n", \ + ((IS_ZERO) ? "true" : "false"), \ + (((ACTUAL) == 0) ? "true" : "false")); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define TEST_ZERO(TYPE, SUFFIX) do { \ + TYPE datum; \ + bool is_zero; \ + \ + datum = 0; \ + ck_pr_inc_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, 0); \ + \ + datum = (TYPE)-1; \ + ck_pr_inc_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, -1); \ + \ + datum = (TYPE)1; \ + ck_pr_inc_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, 1); \ + \ + datum = (TYPE)2; \ + ck_pr_inc_##SUFFIX##_zero(&datum, &is_zero); \ + EXPECT(datum, is_zero, TYPE, 2); \ + } while (0) + +#define TEST_IS_ZERO(TYPE, SUFFIX) do { \ + TYPE datum; \ + bool is_zero; \ + \ + datum = 0; \ + is_zero = ck_pr_inc_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, 0); \ + \ + datum = (TYPE)-1; \ + is_zero = ck_pr_inc_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, -1); \ + \ + datum = (TYPE)1; \ + is_zero = ck_pr_inc_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, 1); \ + \ + datum = (TYPE)2; \ + is_zero = ck_pr_inc_##SUFFIX##_is_zero(&datum); \ + EXPECT(datum, is_zero, TYPE, 2); \ + } while (0) + +#define TEST(TYPE, SUFFIX) do { \ + TEST_ZERO(TYPE, SUFFIX); \ + TEST_IS_ZERO(TYPE, SUFFIX); \ +} while (0) + +int +main(void) +{ + +#ifdef CK_F_PR_INC_64_ZERO + TEST(uint64_t, 64); +#endif + +#ifdef CK_F_PR_INC_32_ZERO + TEST(uint32_t, 32); +#endif + +#ifdef CK_F_PR_INC_16_ZERO + TEST(uint16_t, 16); +#endif + +#ifdef CK_F_PR_INC_8_ZERO + TEST(uint8_t, 8); +#endif + +#ifdef CK_F_PR_INC_UINT_ZERO + TEST(unsigned int, uint); +#endif + +#ifdef CK_F_PR_INC_INT_ZERO + TEST(int, int); +#endif + +#ifdef CK_F_PR_INC_CHAR_ZERO + TEST(char, char); +#endif + + return (0); +} diff --git a/regressions/ck_pr/validate/ck_pr_load.c b/regressions/ck_pr/validate/ck_pr_load.c index a15acd0..d3b8520 100644 --- a/regressions/ck_pr/validate/ck_pr_load.c +++ b/regressions/ck_pr/validate/ck_pr_load.c @@ -118,6 +118,7 @@ rg_width(int m) int main(void) { + void *ptr = (void *)(intptr_t)-1; common_srand((unsigned int)getpid()); @@ -143,6 +144,11 @@ main(void) ck_pr_load_64_2(&b, &a); printf("%" PRIx64 ":%" PRIx64 "\n", a[0], a[1]); #endif + printf("ck_pr_load_ptr: "); + if (ck_pr_load_ptr(&ptr) != (void *)(intptr_t)(-1)) + printf("Failed : %p != %p\n", ck_pr_load_ptr(&ptr), (void *)(intptr_t)(-1)); + else + printf("SUCCESS\n"); return (0); } diff --git a/regressions/ck_pr/validate/ck_pr_store.c b/regressions/ck_pr/validate/ck_pr_store.c index e4b852b..e012b22 100644 --- a/regressions/ck_pr/validate/ck_pr_store.c +++ b/regressions/ck_pr/validate/ck_pr_store.c @@ -119,6 +119,8 @@ rg_width(int m) int main(void) { + void *ptr; + #if defined(CK_F_PR_STORE_DOUBLE) && defined(CK_F_PR_LOAD_DOUBLE) double d; @@ -145,6 +147,12 @@ main(void) #ifdef CK_F_PR_STORE_8 CK_PR_STORE_B(8); #endif + printf("ck_pr_store_ptr: "); + ck_pr_store_ptr(&ptr, (void *)(intptr_t)-1); + if (ptr != (void *)(intptr_t)(-1)) + printf("Failed : %p != %p\n", ptr, (void *)(intptr_t)-1); + else + printf("SUCCESS\n"); return (0); } diff --git a/regressions/ck_rhs/benchmark/parallel_bytestring.c b/regressions/ck_rhs/benchmark/parallel_bytestring.c index a95d940..1c2d244 100644 --- a/regressions/ck_rhs/benchmark/parallel_bytestring.c +++ b/regressions/ck_rhs/benchmark/parallel_bytestring.c @@ -5,9 +5,9 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyrighs + * 1. Redistributions of source code must retain the above copyrights * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyrighs + * 2. Redistributions in binary form must reproduce the above copyrights * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * @@ -144,7 +144,7 @@ set_init(void) ck_epoch_init(&epoch_hs); - ck_epoch_register(&epoch_hs, &epoch_wr); + ck_epoch_register(&epoch_hs, &epoch_wr, NULL); common_srand48((long int)time(NULL)); if (ck_rhs_init(&hs, mode, hs_hash, hs_compare, &my_allocator, 65536, common_lrand48()) == false) { perror("ck_rhs_init"); @@ -231,7 +231,7 @@ reader(void *unused) perror("WARNING: Failed to affine thread"); s = j = a = 0; - ck_epoch_register(&epoch_hs, &epoch_record); + ck_epoch_register(&epoch_hs, &epoch_record, NULL); for (;;) { j++; ck_epoch_begin(&epoch_record, NULL); @@ -451,8 +451,8 @@ main(int argc, char *argv[]) ck_epoch_record_t epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); @@ -590,8 +590,8 @@ main(int argc, char *argv[]) epoch_temporary = epoch_wr; ck_epoch_synchronize(&epoch_wr); - fprintf(stderr, " '- Summary: %u pending, %u peak, %lu reclamations -> " - "%u pending, %u peak, %lu reclamations\n\n", + fprintf(stderr, " '- Summary: %u pending, %u peak, %u reclamations -> " + "%u pending, %u peak, %u reclamations\n\n", epoch_temporary.n_pending, epoch_temporary.n_peak, epoch_temporary.n_dispatch, epoch_wr.n_pending, epoch_wr.n_peak, epoch_wr.n_dispatch); return 0; diff --git a/regressions/ck_rhs/benchmark/serial.c b/regressions/ck_rhs/benchmark/serial.c index 18fa892..9689d2c 100644 --- a/regressions/ck_rhs/benchmark/serial.c +++ b/regressions/ck_rhs/benchmark/serial.c @@ -5,9 +5,9 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyrighs + * 1. Redistributions of source code must retain the above copyrights * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyrighs + * 2. Redistributions in binary form must reproduce the above copyrights * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * diff --git a/regressions/ck_rhs/validate/serial.c b/regressions/ck_rhs/validate/serial.c index ef9365f..92caf18 100644 --- a/regressions/ck_rhs/validate/serial.c +++ b/regressions/ck_rhs/validate/serial.c @@ -5,9 +5,9 @@ * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: - * 1. Redistributions of source code must retain the above copyrighs + * 1. Redistributions of source code must retain the above copyrights * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyrighs + * 2. Redistributions in binary form must reproduce the above copyrights * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * diff --git a/regressions/ck_ring/validate/Makefile b/regressions/ck_ring/validate/Makefile index 0b68fad..f849a56 100644 --- a/regressions/ck_ring/validate/Makefile +++ b/regressions/ck_ring/validate/Makefile @@ -2,7 +2,7 @@ OBJECTS=ck_ring_spsc ck_ring_spmc ck_ring_spmc_template ck_ring_mpmc \ ck_ring_mpmc_template -SIZE=16384 +SIZE=2048 all: $(OBJECTS) diff --git a/regressions/ck_sequence/validate/ck_sequence.c b/regressions/ck_sequence/validate/ck_sequence.c index e0bc700..47de852 100644 --- a/regressions/ck_sequence/validate/ck_sequence.c +++ b/regressions/ck_sequence/validate/ck_sequence.c @@ -122,7 +122,7 @@ main(int argc, char *argv[]) ck_error("Usage: ck_sequence <number of threads> <affinity delta>\n"); } - n_threads = atoi(argv[1]); + n_threads = atoi(argv[1]) - 1; if (n_threads <= 0) { ck_error("ERROR: Number of threads must be greater than 0\n"); } @@ -163,6 +163,8 @@ main(int argc, char *argv[]) counter++; if (ck_pr_load_uint(&barrier) == 0) break; + + ck_pr_stall(); } printf("%u updates made.\n", counter); diff --git a/regressions/ck_spinlock/ck_hclh.h b/regressions/ck_spinlock/ck_hclh.h index eb2e6eb..cdc0474 100644 --- a/regressions/ck_spinlock/ck_hclh.h +++ b/regressions/ck_spinlock/ck_hclh.h @@ -1,9 +1,16 @@ #define MAX(a,b) ((a) > (b) ? (a) : (b)) + +#if CORES < 2 +#undef CORES +#define CORES 2 +#endif + #define LOCK_NAME "ck_clh" #define LOCK_DEFINE static ck_spinlock_hclh_t CK_CC_CACHELINE *glob_lock; \ static ck_spinlock_hclh_t CK_CC_CACHELINE *local_lock[CORES / 2] + #define LOCK_STATE ck_spinlock_hclh_t *na = malloc(MAX(sizeof(ck_spinlock_hclh_t), 64)) -#define LOCK ck_spinlock_hclh_lock(&glob_lock, &local_lock[(core % CORES) / 2], na) +#define LOCK ck_spinlock_hclh_lock(&glob_lock, &local_lock[core % (CORES / 2)], na) #define UNLOCK ck_spinlock_hclh_unlock(&na) #define LOCK_INIT do { \ int _i; \ diff --git a/regressions/common.h b/regressions/common.h index f67c2af..9cdc690 100644 --- a/regressions/common.h +++ b/regressions/common.h @@ -267,13 +267,11 @@ struct affinity { #define AFFINITY_INITIALIZER {0, 0} #ifdef __linux__ -#ifndef gettid static pid_t -gettid(void) +common_gettid(void) { return syscall(__NR_gettid); } -#endif /* gettid */ CK_CC_UNUSED static int aff_iterate(struct affinity *acb) @@ -285,7 +283,10 @@ aff_iterate(struct affinity *acb) CPU_ZERO(&s); CPU_SET(c % CORES, &s); - return sched_setaffinity(gettid(), sizeof(s), &s); + if (sched_setaffinity(common_gettid(), sizeof(s), &s) != 0) + perror("WARNING: Could not affine thread"); + + return 0; } CK_CC_UNUSED static int @@ -297,7 +298,10 @@ aff_iterate_core(struct affinity *acb, unsigned int *core) CPU_ZERO(&s); CPU_SET((*core) % CORES, &s); - return sched_setaffinity(gettid(), sizeof(s), &s); + if (sched_setaffinity(common_gettid(), sizeof(s), &s) != 0) + perror("WARNING: Could not affine thread"); + + return 0; } #elif defined(__MACH__) CK_CC_UNUSED static int diff --git a/src/Makefile.in b/src/Makefile.in index 0d84e76..7378849 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -11,6 +11,7 @@ OBJECTS=ck_barrier_centralized.o \ ck_barrier_dissemination.o \ ck_barrier_tournament.o \ ck_barrier_mcs.o \ + ck_ec.o \ ck_epoch.o \ ck_ht.o \ ck_hp.o \ @@ -24,11 +25,14 @@ libck.so: $(OBJECTS) $(LD) $(LDFLAGS) -o $(TARGET_DIR)/libck.so $(OBJECTS) libck.a: $(OBJECTS) - ar rcs $(TARGET_DIR)/libck.a $(OBJECTS) + $(AR) rcs $(TARGET_DIR)/libck.a $(OBJECTS) ck_array.o: $(INCLUDE_DIR)/ck_array.h $(SDIR)/ck_array.c $(CC) $(CFLAGS) -c -o $(TARGET_DIR)/ck_array.o $(SDIR)/ck_array.c +ck_ec.o: $(INCLUDE_DIR)/ck_ec.h $(SDIR)/ck_ec.c $(SDIR)/ck_ec_timeutil.h + $(CC) $(CFLAGS) -c -o $(TARGET_DIR)/ck_ec.o $(SDIR)/ck_ec.c + ck_epoch.o: $(INCLUDE_DIR)/ck_epoch.h $(SDIR)/ck_epoch.c $(INCLUDE_DIR)/ck_stack.h $(CC) $(CFLAGS) -c -o $(TARGET_DIR)/ck_epoch.o $(SDIR)/ck_epoch.c diff --git a/src/ck_barrier_combining.c b/src/ck_barrier_combining.c index 3ee72fd..ed1960c 100644 --- a/src/ck_barrier_combining.c +++ b/src/ck_barrier_combining.c @@ -35,7 +35,7 @@ struct ck_barrier_combining_queue { struct ck_barrier_combining_group *tail; }; -CK_CC_INLINE static struct ck_barrier_combining_group * +static struct ck_barrier_combining_group * ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue) { struct ck_barrier_combining_group *front = NULL; @@ -48,7 +48,7 @@ ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue) return front; } -CK_CC_INLINE static void +static void ck_barrier_combining_insert(struct ck_barrier_combining_group *parent, struct ck_barrier_combining_group *tnode, struct ck_barrier_combining_group **child) @@ -72,7 +72,7 @@ ck_barrier_combining_insert(struct ck_barrier_combining_group *parent, * into the barrier's tree. We use a queue to implement this * traversal. */ -CK_CC_INLINE static void +static void ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue, struct ck_barrier_combining_group *node_value) { @@ -185,10 +185,10 @@ ck_barrier_combining_aux(struct ck_barrier_combining *barrier, ck_pr_fence_store(); ck_pr_store_uint(&tnode->sense, ~tnode->sense); } else { - ck_pr_fence_memory(); while (sense != ck_pr_load_uint(&tnode->sense)) ck_pr_stall(); } + ck_pr_fence_memory(); return; } diff --git a/src/ck_ec.c b/src/ck_ec.c new file mode 100644 index 0000000..9b24e76 --- /dev/null +++ b/src/ck_ec.c @@ -0,0 +1,425 @@ +#include <ck_ec.h> +#include <ck_limits.h> + +#include "ck_ec_timeutil.h" + +#define DEFAULT_BUSY_LOOP_ITER 100U + +/* + * The 2ms, 8x/iter default parameter hit 1.024 seconds after 3 + * iterations. + */ +#define DEFAULT_INITIAL_WAIT_NS 2000000L /* Start at 2 ms */ +/* Grow the wait time 8x/iteration. */ +#define DEFAULT_WAIT_SCALE_FACTOR 8 +#define DEFAULT_WAIT_SHIFT_COUNT 0 + +struct ck_ec32_slow_path_state { + struct ck_ec32 *ec; + uint32_t flagged_word; +}; + +#ifdef CK_F_EC64 +struct ck_ec64_slow_path_state { + struct ck_ec64 *ec; + uint64_t flagged_word; +}; +#endif + +/* Once we've waited for >= 1 sec, go for the full deadline. */ +static const struct timespec final_wait_time = { + .tv_sec = 1 +}; + +void +ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops) +{ + /* Spurious wake-ups are OK. Clear the flag before futexing. */ + ck_pr_and_32(&ec->counter, (1U << 31) - 1); + ops->wake32(ops, &ec->counter); + return; +} + +int +ck_ec32_wait_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + const struct timespec *deadline) +{ + return ck_ec32_wait_pred_slow(ec, ops, old_value, + NULL, NULL, deadline); +} + +#ifdef CK_F_EC64 +void +ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops) +{ + ck_pr_and_64(&ec->counter, ~1); + ops->wake64(ops, &ec->counter); + return; +} + +int +ck_ec64_wait_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + const struct timespec *deadline) +{ + return ck_ec64_wait_pred_slow(ec, ops, old_value, + NULL, NULL, deadline); +} +#endif + +int +ck_ec_deadline_impl(struct timespec *new_deadline, + const struct ck_ec_ops *ops, + const struct timespec *timeout) +{ + struct timespec now; + int r; + + if (timeout == NULL) { + new_deadline->tv_sec = TIME_MAX; + new_deadline->tv_nsec = NSEC_MAX; + return 0; + } + + r = ops->gettime(ops, &now); + if (r != 0) { + return -1; + } + + *new_deadline = timespec_add(now, *timeout); + return 0; +} + +/* The rest of the file implements wait_pred_slow. */ + +/* + * Returns a timespec value for deadline_ptr. If deadline_ptr is NULL, + * returns a timespec far in the future. + */ +static struct timespec +canonical_deadline(const struct timespec *deadline_ptr) +{ + + if (deadline_ptr == NULL) { + return (struct timespec) { .tv_sec = TIME_MAX }; + } + + return *deadline_ptr; +} + +/* + * Really slow (sleeping) path for ck_ec_wait. Drives the exponential + * backoff scheme to sleep for longer and longer periods of time, + * until either the sleep function returns true (the eventcount's + * value has changed), or the predicate returns non-0 (something else + * has changed). + * + * If deadline is ever reached, returns -1 (timeout). + * + * TODO: add some form of randomisation to the intermediate timeout + * values. + */ +static int +exponential_backoff(struct ck_ec_wait_state *wait_state, + bool (*sleep)(const void *sleep_state, + const struct ck_ec_wait_state *wait_state, + const struct timespec *partial_deadline), + const void *sleep_state, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + const struct timespec *deadline) +{ + struct timespec begin; + struct timespec stop_backoff; + const struct ck_ec_ops *ops = wait_state->ops; + const uint32_t scale_factor = (ops->wait_scale_factor != 0) + ? ops->wait_scale_factor + : DEFAULT_WAIT_SCALE_FACTOR; + const uint32_t shift_count = (ops->wait_shift_count != 0) + ? ops->wait_shift_count + : DEFAULT_WAIT_SHIFT_COUNT; + uint32_t wait_ns = (ops->initial_wait_ns != 0) + ? ops->initial_wait_ns + : DEFAULT_INITIAL_WAIT_NS; + bool first = true; + + for (;;) { + struct timespec now; + struct timespec partial_deadline; + + if (check_deadline(&now, ops, *deadline) == true) { + /* Timeout. Bail out. */ + return -1; + } + + if (first) { + begin = now; + wait_state->start = begin; + stop_backoff = timespec_add(begin, final_wait_time); + first = false; + } + + wait_state->now = now; + if (timespec_cmp(now, stop_backoff) >= 0) { + partial_deadline = *deadline; + } else { + do { + partial_deadline = + timespec_add_ns(begin, wait_ns); + wait_ns = + wait_time_scale(wait_ns, + scale_factor, + shift_count); + } while (timespec_cmp(partial_deadline, now) <= 0); + } + + if (pred != NULL) { + int r = pred(wait_state, &partial_deadline); + if (r != 0) { + return r; + } + } + + /* Canonicalize deadlines in the far future to NULL. */ + if (sleep(sleep_state, wait_state, + ((partial_deadline.tv_sec == TIME_MAX) + ? NULL : &partial_deadline)) == true) { + return 0; + } + } +} + +/* + * Loops up to BUSY_LOOP_ITER times, or until ec's counter value + * (including the flag) differs from old_value. + * + * Returns the new value in ec. + */ +#define DEF_WAIT_EASY(W) \ + static uint##W##_t ck_ec##W##_wait_easy(struct ck_ec##W* ec, \ + const struct ck_ec_ops *ops, \ + uint##W##_t expected) \ + { \ + uint##W##_t current = ck_pr_load_##W(&ec->counter); \ + size_t n = (ops->busy_loop_iter != 0) \ + ? ops->busy_loop_iter \ + : DEFAULT_BUSY_LOOP_ITER; \ + \ + for (size_t i = 0; \ + i < n && current == expected; \ + i++) { \ + ck_pr_stall(); \ + current = ck_pr_load_##W(&ec->counter); \ + } \ + \ + return current; \ + } + +DEF_WAIT_EASY(32) +#ifdef CK_F_EC64 +DEF_WAIT_EASY(64) +#endif +#undef DEF_WAIT_EASY +/* + * Attempts to upgrade ec->counter from unflagged to flagged. + * + * Returns true if the event count has changed. Otherwise, ec's + * counter word is equal to flagged on return, or has been at some + * time before the return. + */ +#define DEF_UPGRADE(W) \ + static bool ck_ec##W##_upgrade(struct ck_ec##W* ec, \ + uint##W##_t current, \ + uint##W##_t unflagged, \ + uint##W##_t flagged) \ + { \ + uint##W##_t old_word; \ + \ + if (current == flagged) { \ + /* Nothing to do, no change. */ \ + return false; \ + } \ + \ + if (current != unflagged) { \ + /* We have a different counter value! */ \ + return true; \ + } \ + \ + /* \ + * Flag the counter value. The CAS only fails if the \ + * counter is already flagged, or has a new value. \ + */ \ + return (ck_pr_cas_##W##_value(&ec->counter, \ + unflagged, flagged, \ + &old_word) == false && \ + old_word != flagged); \ + } + +DEF_UPGRADE(32) +#ifdef CK_F_EC64 +DEF_UPGRADE(64) +#endif +#undef DEF_UPGRADE + +/* + * Blocks until partial_deadline on the ck_ec. Returns true if the + * eventcount's value has changed. If partial_deadline is NULL, wait + * forever. + */ +static bool +ck_ec32_wait_slow_once(const void *vstate, + const struct ck_ec_wait_state *wait_state, + const struct timespec *partial_deadline) +{ + const struct ck_ec32_slow_path_state *state = vstate; + const struct ck_ec32 *ec = state->ec; + const uint32_t flagged_word = state->flagged_word; + + wait_state->ops->wait32(wait_state, &ec->counter, + flagged_word, partial_deadline); + return ck_pr_load_32(&ec->counter) != flagged_word; +} + +#ifdef CK_F_EC64 +static bool +ck_ec64_wait_slow_once(const void *vstate, + const struct ck_ec_wait_state *wait_state, + const struct timespec *partial_deadline) +{ + const struct ck_ec64_slow_path_state *state = vstate; + const struct ck_ec64 *ec = state->ec; + const uint64_t flagged_word = state->flagged_word; + + /* futex_wait will only compare the low 32 bits. Perform a + * full comparison here to maximise the changes of catching an + * ABA in the low 32 bits. + */ + if (ck_pr_load_64(&ec->counter) != flagged_word) { + return true; + } + + wait_state->ops->wait64(wait_state, &ec->counter, + flagged_word, partial_deadline); + return ck_pr_load_64(&ec->counter) != flagged_word; +} +#endif + +/* + * The full wait logic is a lot of code (> 1KB). Encourage the + * compiler to lay this all out linearly with LIKELY annotations on + * every early exit. + */ +#define WAIT_SLOW_BODY(W, ec, ops, pred, data, deadline_ptr, \ + old_value, unflagged, flagged) \ + do { \ + struct ck_ec_wait_state wait_state = { \ + .ops = ops, \ + .data = data \ + }; \ + const struct ck_ec##W##_slow_path_state state = { \ + .ec = ec, \ + .flagged_word = flagged \ + }; \ + const struct timespec deadline = \ + canonical_deadline(deadline_ptr); \ + \ + /* Detect infinite past deadlines. */ \ + if (CK_CC_LIKELY(deadline.tv_sec <= 0)) { \ + return -1; \ + } \ + \ + for (;;) { \ + uint##W##_t current; \ + int r; \ + \ + current = ck_ec##W##_wait_easy(ec, ops, unflagged); \ + \ + /* \ + * We're about to wait harder (i.e., \ + * potentially with futex). Make sure the \ + * counter word is flagged. \ + */ \ + if (CK_CC_LIKELY( \ + ck_ec##W##_upgrade(ec, current, \ + unflagged, flagged) == true)) { \ + ck_pr_fence_acquire(); \ + return 0; \ + } \ + \ + /* \ + * By now, ec->counter == flagged_word (at \ + * some point in the past). Spin some more to \ + * heuristically let any in-flight SP inc/add \ + * to retire. This does not affect \ + * correctness, but practically eliminates \ + * lost wake-ups. \ + */ \ + current = ck_ec##W##_wait_easy(ec, ops, flagged); \ + if (CK_CC_LIKELY(current != flagged_word)) { \ + ck_pr_fence_acquire(); \ + return 0; \ + } \ + \ + r = exponential_backoff(&wait_state, \ + ck_ec##W##_wait_slow_once, \ + &state, \ + pred, &deadline); \ + if (r != 0) { \ + return r; \ + } \ + \ + if (ck_ec##W##_value(ec) != old_value) { \ + ck_pr_fence_acquire(); \ + return 0; \ + } \ + \ + /* Spurious wake-up. Redo the slow path. */ \ + } \ + } while (0) + +int +ck_ec32_wait_pred_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline_ptr) +{ + const uint32_t unflagged_word = old_value; + const uint32_t flagged_word = old_value | (1UL << 31); + + if (CK_CC_UNLIKELY(ck_ec32_value(ec) != old_value)) { + return 0; + } + + WAIT_SLOW_BODY(32, ec, ops, pred, data, deadline_ptr, + old_value, unflagged_word, flagged_word); +} + +#ifdef CK_F_EC64 +int +ck_ec64_wait_pred_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline_ptr) +{ + const uint64_t unflagged_word = old_value << 1; + const uint64_t flagged_word = unflagged_word | 1; + + if (CK_CC_UNLIKELY(ck_ec64_value(ec) != old_value)) { + return 0; + } + + WAIT_SLOW_BODY(64, ec, ops, pred, data, deadline_ptr, + old_value, unflagged_word, flagged_word); +} +#endif + +#undef WAIT_SLOW_BODY diff --git a/src/ck_ec_timeutil.h b/src/ck_ec_timeutil.h new file mode 100644 index 0000000..50cfb67 --- /dev/null +++ b/src/ck_ec_timeutil.h @@ -0,0 +1,150 @@ +#ifndef CK_EC_TIMEUTIL_H +#define CK_EC_TIMEUTIL_H +#include <ck_cc.h> +#include <ck_ec.h> +#include <ck_limits.h> +#include <ck_stdint.h> +#include <sys/time.h> + +#define TIME_MAX ((time_t)((1ULL << ((sizeof(time_t) * CHAR_BIT) - 1)) - 1)) +#define NSEC_MAX ((1000L * 1000 * 1000) - 1) + +/* + * Approximates (nsec * multiplier) >> shift. Clamps to UINT32_MAX on + * overflow. + */ +CK_CC_UNUSED static uint32_t +wait_time_scale(uint32_t nsec, + uint32_t multiplier, + unsigned int shift) +{ + uint64_t temp = (uint64_t)nsec * multiplier; + uint64_t max = (uint64_t)UINT32_MAX << shift; + + if (temp >= max) { + return UINT32_MAX; + } + + return temp >> shift; +} + + +/* + * Returns ts + ns. ns is clamped to at most 1 second. Clamps the + * return value to TIME_MAX, NSEC_MAX on overflow. + * + */ +CK_CC_UNUSED static struct timespec timespec_add_ns(const struct timespec ts, + uint32_t ns) +{ + struct timespec ret = { + .tv_sec = TIME_MAX, + .tv_nsec = NSEC_MAX + }; + time_t sec; + uint32_t sum_ns; + + if (ns > (uint32_t)NSEC_MAX) { + if (ts.tv_sec >= TIME_MAX) { + return ret; + } + + ret.tv_sec = ts.tv_sec + 1; + ret.tv_nsec = ts.tv_nsec; + return ret; + } + + sec = ts.tv_sec; + sum_ns = ns + ts.tv_nsec; + if (sum_ns > NSEC_MAX) { + if (sec >= TIME_MAX) { + return ret; + } + + sec++; + sum_ns -= (NSEC_MAX + 1); + } + + ret.tv_sec = sec; + ret.tv_nsec = sum_ns; + return ret; +} + + +/* + * Returns ts + inc. If inc is negative, it is normalized to 0. + * Clamps the return value to TIME_MAX, NSEC_MAX on overflow. + */ +CK_CC_UNUSED static struct timespec timespec_add(const struct timespec ts, + const struct timespec inc) +{ + /* Initial return value is clamped to infinite future. */ + struct timespec ret = { + .tv_sec = TIME_MAX, + .tv_nsec = NSEC_MAX + }; + time_t sec; + unsigned long nsec; + + /* Non-positive delta is a no-op. Invalid nsec is another no-op. */ + if (inc.tv_sec < 0 || inc.tv_nsec < 0 || inc.tv_nsec > NSEC_MAX) { + return ts; + } + + /* Detect overflow early. */ + if (inc.tv_sec > TIME_MAX - ts.tv_sec) { + return ret; + } + + sec = ts.tv_sec + inc.tv_sec; + /* This sum can't overflow if the inputs are valid.*/ + nsec = (unsigned long)ts.tv_nsec + inc.tv_nsec; + + if (nsec > NSEC_MAX) { + if (sec >= TIME_MAX) { + return ret; + } + + sec++; + nsec -= (NSEC_MAX + 1); + } + + ret.tv_sec = sec; + ret.tv_nsec = nsec; + return ret; +} + +/* Compares two timespecs. Returns -1 if x < y, 0 if x == y, and 1 if x > y. */ +CK_CC_UNUSED static int timespec_cmp(const struct timespec x, + const struct timespec y) +{ + if (x.tv_sec != y.tv_sec) { + return (x.tv_sec < y.tv_sec) ? -1 : 1; + } + + if (x.tv_nsec != y.tv_nsec) { + return (x.tv_nsec < y.tv_nsec) ? -1 : 1; + } + + return 0; +} + +/* + * Overwrites now with the current CLOCK_MONOTONIC time, and returns + * true if the current time is greater than or equal to the deadline, + * or the clock is somehow broken. + */ +CK_CC_UNUSED static bool check_deadline(struct timespec *now, + const struct ck_ec_ops *ops, + const struct timespec deadline) +{ + int r; + + r = ops->gettime(ops, now); + if (r != 0) { + return true; + } + + return timespec_cmp(*now, deadline) >= 0; +} +#endif /* !CK_EC_TIMEUTIL_H */ diff --git a/src/ck_epoch.c b/src/ck_epoch.c index a0e9180..4871930 100644 --- a/src/ck_epoch.c +++ b/src/ck_epoch.c @@ -127,6 +127,14 @@ */ #define CK_EPOCH_GRACE 3U +/* + * CK_EPOCH_LENGTH must be a power-of-2 (because (CK_EPOCH_LENGTH - 1) is used + * as a mask, and it must be at least 3 (see comments above). + */ +#if (CK_EPOCH_LENGTH < 3 || (CK_EPOCH_LENGTH & (CK_EPOCH_LENGTH - 1)) != 0) +#error "CK_EPOCH_LENGTH must be a power of 2 and >= 3" +#endif + enum { CK_EPOCH_STATE_USED = 0, CK_EPOCH_STATE_FREE = 1 @@ -139,7 +147,7 @@ CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry, #define CK_EPOCH_SENSE_MASK (CK_EPOCH_SENSE - 1) -void +bool _ck_epoch_delref(struct ck_epoch_record *record, struct ck_epoch_section *section) { @@ -150,7 +158,7 @@ _ck_epoch_delref(struct ck_epoch_record *record, current->count--; if (current->count > 0) - return; + return false; /* * If the current bucket no longer has any references, then @@ -161,8 +169,7 @@ _ck_epoch_delref(struct ck_epoch_record *record, * If no other active bucket exists, then the record will go * inactive in order to allow for forward progress. */ - other = &record->local.bucket[(i + 1) & - CK_EPOCH_SENSE_MASK]; + other = &record->local.bucket[(i + 1) & CK_EPOCH_SENSE_MASK]; if (other->count > 0 && ((int)(current->epoch - other->epoch) < 0)) { /* @@ -172,7 +179,7 @@ _ck_epoch_delref(struct ck_epoch_record *record, ck_pr_store_uint(&record->epoch, other->epoch); } - return; + return true; } void @@ -230,7 +237,7 @@ ck_epoch_init(struct ck_epoch *global) } struct ck_epoch_record * -ck_epoch_recycle(struct ck_epoch *global) +ck_epoch_recycle(struct ck_epoch *global, void *ct) { struct ck_epoch_record *record; ck_stack_entry_t *cursor; @@ -249,6 +256,12 @@ ck_epoch_recycle(struct ck_epoch *global) CK_EPOCH_STATE_USED); if (state == CK_EPOCH_STATE_FREE) { ck_pr_dec_uint(&global->n_free); + ck_pr_store_ptr(&record->ct, ct); + + /* + * The context pointer is ordered by a + * subsequent protected section. + */ return record; } } @@ -258,7 +271,8 @@ ck_epoch_recycle(struct ck_epoch *global) } void -ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record) +ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record, + void *ct) { size_t i; @@ -269,6 +283,7 @@ ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record) record->n_dispatch = 0; record->n_peak = 0; record->n_pending = 0; + record->ct = ct; memset(&record->local, 0, sizeof record->local); for (i = 0; i < CK_EPOCH_LENGTH; i++) @@ -295,6 +310,7 @@ ck_epoch_unregister(struct ck_epoch_record *record) for (i = 0; i < CK_EPOCH_LENGTH; i++) ck_stack_init(&record->pending[i]); + ck_pr_store_ptr(&record->ct, NULL); ck_pr_fence_store(); ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE); ck_pr_inc_uint(&global->n_free); @@ -340,31 +356,41 @@ ck_epoch_scan(struct ck_epoch *global, return NULL; } -static void -ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e) +static unsigned int +ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *deferred) { unsigned int epoch = e & (CK_EPOCH_LENGTH - 1); ck_stack_entry_t *head, *next, *cursor; + unsigned int n_pending, n_peak; unsigned int i = 0; - head = CK_STACK_FIRST(&record->pending[epoch]); - ck_stack_init(&record->pending[epoch]); - + head = ck_stack_batch_pop_upmc(&record->pending[epoch]); for (cursor = head; cursor != NULL; cursor = next) { struct ck_epoch_entry *entry = ck_epoch_entry_container(cursor); next = CK_STACK_NEXT(cursor); - entry->function(entry); + if (deferred != NULL) + ck_stack_push_spnc(deferred, &entry->stack_entry); + else + entry->function(entry); + i++; } - if (record->n_pending > record->n_peak) - record->n_peak = record->n_pending; + n_peak = ck_pr_load_uint(&record->n_peak); + n_pending = ck_pr_load_uint(&record->n_pending); - record->n_dispatch += i; - record->n_pending -= i; - return; + /* We don't require accuracy around peak calculation. */ + if (n_pending > n_peak) + ck_pr_store_uint(&record->n_peak, n_peak); + + if (i > 0) { + ck_pr_add_uint(&record->n_dispatch, i); + ck_pr_sub_uint(&record->n_pending, i); + } + + return i; } /* @@ -376,7 +402,18 @@ ck_epoch_reclaim(struct ck_epoch_record *record) unsigned int epoch; for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++) - ck_epoch_dispatch(record, epoch); + ck_epoch_dispatch(record, epoch, NULL); + + return; +} + +CK_CC_FORCE_INLINE static void +epoch_block(struct ck_epoch *global, struct ck_epoch_record *cr, + ck_epoch_wait_cb_t *cb, void *ct) +{ + + if (cb != NULL) + cb(global, cr, ct); return; } @@ -385,9 +422,9 @@ ck_epoch_reclaim(struct ck_epoch_record *record) * This function must not be called with-in read section. */ void -ck_epoch_synchronize(struct ck_epoch_record *record) +ck_epoch_synchronize_wait(struct ck_epoch *global, + ck_epoch_wait_cb_t *cb, void *ct) { - struct ck_epoch *global = record->global; struct ck_epoch_record *cr; unsigned int delta, epoch, goal, i; bool active; @@ -424,10 +461,27 @@ ck_epoch_synchronize(struct ck_epoch_record *record) * period. */ e_d = ck_pr_load_uint(&global->epoch); - if (e_d != delta) { - delta = e_d; - goto reload; + if (e_d == delta) { + epoch_block(global, cr, cb, ct); + continue; } + + /* + * If the epoch has been updated, we may have already + * met our goal. + */ + delta = e_d; + if ((goal > epoch) & (delta >= goal)) + goto leave; + + epoch_block(global, cr, cb, ct); + + /* + * If the epoch has been updated, then a grace period + * requires that all threads are observed idle at the + * same epoch. + */ + cr = NULL; } /* @@ -459,20 +513,6 @@ ck_epoch_synchronize(struct ck_epoch_record *record) * Otherwise, we have just acquired latest snapshot. */ delta = delta + r; - continue; - -reload: - if ((goal > epoch) & (delta >= goal)) { - /* - * Right now, epoch overflow is handled as an edge - * case. If we have already observed an epoch - * generation, then we can be sure no hazardous - * references exist to objects from this generation. We - * can actually avoid an addtional scan step at this - * point. - */ - break; - } } /* @@ -480,8 +520,16 @@ reload: * However, if non-temporal instructions are used, full barrier * semantics are necessary. */ +leave: ck_pr_fence_memory(); - record->epoch = delta; + return; +} + +void +ck_epoch_synchronize(struct ck_epoch_record *record) +{ + + ck_epoch_synchronize_wait(record->global, NULL, NULL); return; } @@ -494,6 +542,16 @@ ck_epoch_barrier(struct ck_epoch_record *record) return; } +void +ck_epoch_barrier_wait(struct ck_epoch_record *record, ck_epoch_wait_cb_t *cb, + void *ct) +{ + + ck_epoch_synchronize_wait(record->global, cb, ct); + ck_epoch_reclaim(record); + return; +} + /* * It may be worth it to actually apply these deferral semantics to an epoch * that was observed at ck_epoch_call time. The problem is that the latter @@ -505,41 +563,61 @@ ck_epoch_barrier(struct ck_epoch_record *record) * is far from ideal too. */ bool -ck_epoch_poll(struct ck_epoch_record *record) +ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred) { bool active; unsigned int epoch; - unsigned int snapshot; struct ck_epoch_record *cr = NULL; struct ck_epoch *global = record->global; + unsigned int n_dispatch; epoch = ck_pr_load_uint(&global->epoch); /* Serialize epoch snapshots with respect to global epoch. */ ck_pr_fence_memory(); + + /* + * At this point, epoch is the current global epoch value. + * There may or may not be active threads which observed epoch - 1. + * (ck_epoch_scan() will tell us that). However, there should be + * no active threads which observed epoch - 2. + * + * Note that checking epoch - 2 is necessary, as race conditions can + * allow another thread to increment the global epoch before this + * thread runs. + */ + n_dispatch = ck_epoch_dispatch(record, epoch - 2, deferred); + cr = ck_epoch_scan(global, cr, epoch, &active); - if (cr != NULL) { - record->epoch = epoch; - return false; - } + if (cr != NULL) + return (n_dispatch > 0); /* We are at a grace period if all threads are inactive. */ if (active == false) { record->epoch = epoch; for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++) - ck_epoch_dispatch(record, epoch); + ck_epoch_dispatch(record, epoch, deferred); return true; } - /* If an active thread exists, rely on epoch observation. */ - if (ck_pr_cas_uint_value(&global->epoch, epoch, epoch + 1, - &snapshot) == false) { - record->epoch = snapshot; - } else { - record->epoch = epoch + 1; - } + /* + * If an active thread exists, rely on epoch observation. + * + * All the active threads entered the epoch section during + * the current epoch. Therefore, we can now run the handlers + * for the immediately preceding epoch and attempt to + * advance the epoch if it hasn't been already. + */ + (void)ck_pr_cas_uint(&global->epoch, epoch, epoch + 1); - ck_epoch_dispatch(record, epoch + 1); + ck_epoch_dispatch(record, epoch - 1, deferred); return true; } + +bool +ck_epoch_poll(struct ck_epoch_record *record) +{ + + return ck_epoch_poll_deferred(record, NULL); +} diff --git a/src/ck_hs.c b/src/ck_hs.c index 31510ec..246bceb 100644 --- a/src/ck_hs.c +++ b/src/ck_hs.c @@ -105,19 +105,10 @@ ck_hs_map_signal(struct ck_hs_map *map, unsigned long h) return; } -void -ck_hs_iterator_init(struct ck_hs_iterator *iterator) -{ - - iterator->cursor = NULL; - iterator->offset = 0; - return; -} - -bool -ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key) +static bool +_ck_hs_next(struct ck_hs *hs, struct ck_hs_map *map, + struct ck_hs_iterator *i, void **key) { - struct ck_hs_map *map = hs->map; void *value; if (i->offset >= map->capacity) @@ -129,6 +120,8 @@ ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key) #ifdef CK_HS_PP if (hs->mode & CK_HS_MODE_OBJECT) value = CK_HS_VMA(value); +#else + (void)hs; /* Avoid unused parameter warning. */ #endif i->offset++; *key = value; @@ -140,6 +133,35 @@ ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key) } void +ck_hs_iterator_init(struct ck_hs_iterator *iterator) +{ + + iterator->cursor = NULL; + iterator->offset = 0; + iterator->map = NULL; + return; +} + +bool +ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key) +{ + + return _ck_hs_next(hs, hs->map, i, key); +} + +bool +ck_hs_next_spmc(struct ck_hs *hs, struct ck_hs_iterator *i, void **key) +{ + struct ck_hs_map *m = i->map; + + if (m == NULL) { + m = i->map = ck_pr_load_ptr(&hs->map); + } + + return _ck_hs_next(hs, m, i, key); +} + +void ck_hs_stat(struct ck_hs *hs, struct ck_hs_stat *st) { struct ck_hs_map *map = hs->map; @@ -206,7 +228,7 @@ ck_hs_map_create(struct ck_hs *hs, unsigned long entries) map->probe_limit = (unsigned int)limit; map->probe_maximum = 0; map->capacity = n_entries; - map->step = ck_internal_bsf(n_entries); + map->step = ck_cc_ffsl(n_entries); map->mask = n_entries - 1; map->n_entries = 0; diff --git a/src/ck_ht.c b/src/ck_ht.c index 2c864c5..66c7315 100644 --- a/src/ck_ht.c +++ b/src/ck_ht.c @@ -30,9 +30,6 @@ /* * This implementation borrows several techniques from Josh Dybnis's * nbds library which can be found at http://code.google.com/p/nbds - * - * This release currently only includes support for 64-bit platforms. - * We can address 32-bit platforms in a future release. */ #include <ck_cc.h> #include <ck_md.h> @@ -171,7 +168,7 @@ ck_ht_map_create(struct ck_ht *table, CK_HT_TYPE entries) map->deletions = 0; map->probe_maximum = 0; map->capacity = n_entries; - map->step = ck_internal_bsf_64(map->capacity); + map->step = ck_cc_ffsll(map->capacity); map->mask = map->capacity - 1; map->n_entries = 0; map->entries = (struct ck_ht_entry *)(((uintptr_t)&map[1] + prefix + diff --git a/src/ck_ht_hash.h b/src/ck_ht_hash.h index cd3d7a5..a47dc40 100644 --- a/src/ck_ht_hash.h +++ b/src/ck_ht_hash.h @@ -88,7 +88,15 @@ static inline uint64_t rotl64 ( uint64_t x, int8_t r ) FORCE_INLINE static uint32_t getblock ( const uint32_t * p, int i ) { +#ifdef __s390x__ + uint32_t res; + + __asm__ (" lrv %0,%1\n" + : "=r" (res) : "Q" (p[i]) : "cc", "mem"); + return res; +#else return p[i]; +#endif /* !__s390x__ */ } //----------------------------------------------------------------------------- @@ -147,7 +155,9 @@ static inline void MurmurHash3_x86_32 ( const void * key, int len, switch(len & 3) { case 3: k1 ^= tail[2] << 16; + /* fall through */ case 2: k1 ^= tail[1] << 8; + /* fall through */ case 1: k1 ^= tail[0]; k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; }; @@ -196,11 +206,17 @@ static inline uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed switch(len & 7) { case 7: h ^= (uint64_t)(data2[6]) << 48; + /* fall through */ case 6: h ^= (uint64_t)(data2[5]) << 40; + /* fall through */ case 5: h ^= (uint64_t)(data2[4]) << 32; + /* fall through */ case 4: h ^= (uint64_t)(data2[3]) << 24; + /* fall through */ case 3: h ^= (uint64_t)(data2[2]) << 16; + /* fall through */ case 2: h ^= (uint64_t)(data2[1]) << 8; + /* fall through */ case 1: h ^= (uint64_t)(data2[0]); h *= m; }; @@ -249,7 +265,9 @@ static inline uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed switch(len) { case 3: h2 ^= ((const unsigned char*)data)[2] << 16; + /* fall through */ case 2: h2 ^= ((const unsigned char*)data)[1] << 8; + /* fall through */ case 1: h2 ^= ((const unsigned char*)data)[0]; h2 *= m; }; diff --git a/src/ck_internal.h b/src/ck_internal.h index 7aad3d7..1bca36a 100644 --- a/src/ck_internal.h +++ b/src/ck_internal.h @@ -80,40 +80,3 @@ ck_internal_max_32(uint32_t x, uint32_t y) return x ^ ((x ^ y) & -(x < y)); } - -CK_CC_INLINE static unsigned long -ck_internal_bsf(unsigned long v) -{ -#if defined(__GNUC__) - return __builtin_ffs(v); -#else - unsigned int i; - const unsigned int s = sizeof(unsigned long) * 8 - 1; - - for (i = 0; i < s; i++) { - if (v & (1UL << (s - i))) - return sizeof(unsigned long) * 8 - i; - } - - return 1; -#endif /* !__GNUC__ */ -} - -CK_CC_INLINE static uint64_t -ck_internal_bsf_64(uint64_t v) -{ -#if defined(__GNUC__) - return __builtin_ffs(v); -#else - unsigned int i; - const unsigned int s = sizeof(unsigned long) * 8 - 1; - - for (i = 0; i < s; i++) { - if (v & (1ULL << (63U - i))) - return i; - } -#endif /* !__GNUC__ */ - - return 1; -} - diff --git a/src/ck_rhs.c b/src/ck_rhs.c index f6dd2ee..1d6b0f0 100644 --- a/src/ck_rhs.c +++ b/src/ck_rhs.c @@ -366,7 +366,7 @@ ck_rhs_map_create(struct ck_rhs *hs, unsigned long entries) map->probe_limit = (unsigned int)limit; map->probe_maximum = 0; map->capacity = n_entries; - map->step = ck_internal_bsf(n_entries); + map->step = ck_cc_ffsl(n_entries); map->mask = n_entries - 1; map->n_entries = 0; diff --git a/tools/ci-build.sh b/tools/ci-build.sh new file mode 100755 index 0000000..d84fdd5 --- /dev/null +++ b/tools/ci-build.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# +# Skeleton for continuous integration testing. +############################################################################## + +set -x + +export CFLAGS="-DITERATE=400 -DPAIRS_S=100 -DITERATIONS=24 -DSTEPS=10000" +./configure $@ + +if [ `uname -s` = "FreeBSD" ]; then + make -j $(sysctl -n hw.ncpu) +else + make -j +fi |