summaryrefslogtreecommitdiffstats
path: root/dev/phash/phash.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--dev/phash/phash.c113
1 files changed, 113 insertions, 0 deletions
diff --git a/dev/phash/phash.c b/dev/phash/phash.c
new file mode 100644
index 0000000..8a27405
--- /dev/null
+++ b/dev/phash/phash.c
@@ -0,0 +1,113 @@
+/* Brute-force based perfect hash generator for small sets of integers. Just
+ * fill the table below with the integer values, try to pad a little bit to
+ * avoid too complicated divides, experiment with a few operations in the
+ * hash function and reuse the output as-is to make your table. You may also
+ * want to experiment with the random generator to use either one or two
+ * distinct values for mul and key.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* warning no more than 32 distinct values! */
+
+//#define CODES 21
+//#define CODES 20
+//#define CODES 19
+//const int codes[CODES] = { 200,400,401,403,404,405,407,408,410,413,421,422,425,429,500,501,502,503,504};
+
+#define CODES 32
+const int codes[CODES] = { 200,400,401,403,404,405,407,408,410,413,421,422,425,429,500,501,502,503,504,
+ /* padding entries below, which will fall back to the default code */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+
+unsigned mul, xor;
+unsigned bmul = 0, bxor = 0;
+
+static unsigned rnd32seed = 0x11111111U;
+static unsigned rnd32()
+{
+ rnd32seed ^= rnd32seed << 13;
+ rnd32seed ^= rnd32seed >> 17;
+ rnd32seed ^= rnd32seed << 5;
+ return rnd32seed;
+}
+
+/* the hash function to use in the target code. Try various combinations of
+ * multiplies and xor, always folded with a modulo, and try to spot the
+ * simplest operations if possible. Sometimes it may be worth adding a few
+ * dummy codes to get a better modulo code. In this case, just add dummy
+ * values at the end, but always distinct from the original ones. If the
+ * number of codes is even, it might be needed to rotate left the result
+ * before the modulo to compensate for lost LSBs.
+ */
+unsigned hash(unsigned i)
+{
+ //return ((i * mul) - (i ^ xor)) % CODES; // more solutions
+ //return ((i * mul) + (i ^ xor)) % CODES; // alternate
+ //return ((i ^ xor) * mul) % CODES; // less solutions but still OK for sequences up to 19 long
+ //return ((i * mul) ^ xor) % CODES; // less solutions but still OK for sequences up to 19 long
+
+ i = i * mul;
+ i >>= 5;
+ //i = i ^ xor;
+ //i = (i << 30) | (i >> 2); // rotate 2 right
+ //i = (i << 2) | (i >> 30); // rotate 2 left
+ //i |= i >> 20;
+ //i += i >> 30;
+ //i |= i >> 16;
+ return i % CODES;
+ //return ((i * mul) ^ xor) % CODES; // less solutions but still OK for sequences up to 19 long
+}
+
+int main(int argc, char **argv)
+{
+ unsigned h, i, flag, best, tests;
+
+ if (argc > 2) {
+ mul = atol(argv[1]);
+ xor = atol(argv[2]);
+ for (i = 0; i < CODES && codes[i] >= 0; i++)
+ printf("hash(%4u) = %4u // [%4u] = %4u\n", codes[i], hash(codes[i]), hash(codes[i]), codes[i]);
+ return 0;
+ }
+
+ tests = 0;
+ best = 0;
+ while (/*best < CODES &&*/ ++tests) {
+ mul = rnd32();
+ xor = mul; // works for some sequences up to 21 long
+ //xor = rnd32(); // more solutions
+
+ flag = 0;
+ for (i = 0; i < CODES && codes[i] >= 0; i++) {
+ h = hash(codes[i]);
+ if (flag & (1 << h))
+ break;
+ flag |= 1 << h;
+ }
+
+ if (i > best ||
+ (i == best && mul <= bmul && xor <= bxor)) {
+ /* find the best code and try to find the smallest
+ * parameters among the best ones (need to disable
+ * best<CODES in the loop for this). Small values are
+ * interesting for some multipliers, and for some RISC
+ * architectures where literals can be loaded in less
+ * instructions.
+ */
+ best = i;
+ bmul = mul;
+ bxor = xor;
+ printf("%u: mul=%u xor=%u\n", best, bmul, bxor);
+ }
+
+ if ((tests & 0x7ffff) == 0)
+ printf("%u tests...\r", tests);
+ }
+ printf("%u tests, %u vals with mul=%u xor=%u:\n", tests, best, bmul, bxor);
+
+ mul = bmul; xor = bxor;
+ for (i = 0; i < CODES && codes[i] >= 0; i++)
+ printf("hash(%4u) = %2u // [%2u] = %4u\n", codes[i], hash(codes[i]), hash(codes[i]), codes[i]);
+}