summaryrefslogtreecommitdiffstats
path: root/src/port/pg_crc32c_sse42.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/port/pg_crc32c_sse42.c')
-rw-r--r--src/port/pg_crc32c_sse42.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
new file mode 100644
index 0000000..10fc01e
--- /dev/null
+++ b/src/port/pg_crc32c_sse42.c
@@ -0,0 +1,69 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_crc32c_sse42.c
+ * Compute CRC-32C checksum using Intel SSE 4.2 instructions.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/port/pg_crc32c_sse42.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include <nmmintrin.h>
+
+#include "port/pg_crc32c.h"
+
+pg_attribute_no_sanitize_alignment()
+pg_crc32c
+pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
+{
+ const unsigned char *p = data;
+ const unsigned char *pend = p + len;
+
+ /*
+ * Process eight bytes of data at a time.
+ *
+ * NB: We do unaligned accesses here. The Intel architecture allows that,
+ * and performance testing didn't show any performance gain from aligning
+ * the begin address.
+ */
+#ifdef __x86_64__
+ while (p + 8 <= pend)
+ {
+ crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
+ p += 8;
+ }
+
+ /* Process remaining full four bytes if any */
+ if (p + 4 <= pend)
+ {
+ crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
+ p += 4;
+ }
+#else
+
+ /*
+ * Process four bytes at a time. (The eight byte instruction is not
+ * available on the 32-bit x86 architecture).
+ */
+ while (p + 4 <= pend)
+ {
+ crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
+ p += 4;
+ }
+#endif /* __x86_64__ */
+
+ /* Process any remaining bytes one at a time. */
+ while (p < pend)
+ {
+ crc = _mm_crc32_u8(crc, *p);
+ p++;
+ }
+
+ return crc;
+}