#include <stdbool.h>
#include <sys/types.h>
#include <machine/endian.h>
#include <net/flowhash.h>
static inline u_int32_t getblock32(const u_int32_t *, int);
static inline u_int64_t getblock64(const u_int64_t *, int);
static inline u_int32_t mh3_fmix32(u_int32_t);
static inline u_int64_t mh3_fmix64(u_int64_t);
#define ALIGNED16(v) ((((uintptr_t)(v)) & 1) == 0)
#define ALIGNED32(v) ((((uintptr_t)(v)) & 3) == 0)
#define ALIGNED64(v) ((((uintptr_t)(v)) & 7) == 0)
#define ROTL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
#define ROTL64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
#if defined(__LP64__)
net_flowhash_fn_t *net_flowhash = net_flowhash_mh3_x64_128;
#else
net_flowhash_fn_t *net_flowhash = net_flowhash_jhash;
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(__arm64__)
static inline u_int32_t
getblock32(const u_int32_t *p, int i)
{
return p[i];
}
static inline u_int64_t
getblock64(const u_int64_t *p, int i)
{
return p[i];
}
#else
static inline u_int32_t
getblock32(const u_int32_t *p, int i)
{
const u_int8_t *bytes = (u_int8_t *)(void *)(uintptr_t)(p + i);
u_int32_t value;
if (ALIGNED32(p)) {
value = p[i];
} else {
#if BYTE_ORDER == BIG_ENDIAN
value =
(((u_int32_t)bytes[0]) << 24) |
(((u_int32_t)bytes[1]) << 16) |
(((u_int32_t)bytes[2]) << 8) |
((u_int32_t)bytes[3]);
#else
value =
(((u_int32_t)bytes[3]) << 24) |
(((u_int32_t)bytes[2]) << 16) |
(((u_int32_t)bytes[1]) << 8) |
((u_int32_t)bytes[0]);
#endif
}
return value;
}
static inline u_int64_t
getblock64(const u_int64_t *p, int i)
{
const u_int8_t *bytes = (const u_int8_t *)(void *)(uintptr_t)(p + i);
u_int64_t value;
if (ALIGNED64(p)) {
value = p[i];
} else {
#if BYTE_ORDER == BIG_ENDIAN
value =
(((u_int64_t)bytes[0]) << 56) |
(((u_int64_t)bytes[1]) << 48) |
(((u_int64_t)bytes[2]) << 40) |
(((u_int64_t)bytes[3]) << 32) |
(((u_int64_t)bytes[4]) << 24) |
(((u_int64_t)bytes[5]) << 16) |
(((u_int64_t)bytes[6]) << 8) |
((u_int64_t)bytes[7]);
#else
value =
(((u_int64_t)bytes[7]) << 56) |
(((u_int64_t)bytes[6]) << 48) |
(((u_int64_t)bytes[5]) << 40) |
(((u_int64_t)bytes[4]) << 32) |
(((u_int64_t)bytes[3]) << 24) |
(((u_int64_t)bytes[2]) << 16) |
(((u_int64_t)bytes[1]) << 8) |
((u_int64_t)bytes[0]);
#endif
}
return value;
}
#endif
static inline u_int32_t
mh3_fmix32(u_int32_t h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
static inline u_int64_t
mh3_fmix64(u_int64_t k)
{
k ^= k >> 33;
k *= 0xff51afd7ed558ccdLLU;
k ^= k >> 33;
k *= 0xc4ceb9fe1a85ec53LLU;
k ^= k >> 33;
return k;
}
#define MH3_X86_32_C1 0xcc9e2d51
#define MH3_X86_32_C2 0x1b873593
u_int32_t
net_flowhash_mh3_x86_32(const void *key, u_int32_t len, const u_int32_t seed)
{
const u_int8_t *data = (const u_int8_t *)key;
const u_int32_t nblocks = len / 4;
const u_int32_t *blocks;
const u_int8_t *tail;
u_int32_t h1 = seed, k1;
int i;
blocks = (const u_int32_t *)(const void *)(data + nblocks * 4);
for (i = -nblocks; i; i++) {
k1 = getblock32(blocks, i);
k1 *= MH3_X86_32_C1;
k1 = ROTL32(k1, 15);
k1 *= MH3_X86_32_C2;
h1 ^= k1;
h1 = ROTL32(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}
tail = (const u_int8_t *)(const void *)(data + nblocks * 4);
k1 = 0;
switch (len & 3) {
case 3:
k1 ^= tail[2] << 16;
case 2:
k1 ^= tail[1] << 8;
case 1:
k1 ^= tail[0];
k1 *= MH3_X86_32_C1;
k1 = ROTL32(k1, 15);
k1 *= MH3_X86_32_C2;
h1 ^= k1;
}
;
h1 ^= len;
h1 = mh3_fmix32(h1);
return h1;
}
#define MH3_X64_128_C1 0x87c37b91114253d5LLU
#define MH3_X64_128_C2 0x4cf5ad432745937fLLU
u_int32_t
net_flowhash_mh3_x64_128(const void *key, u_int32_t len, const u_int32_t seed)
{
const u_int8_t *data = (const u_int8_t *)key;
const u_int32_t nblocks = len / 16;
const u_int64_t *blocks;
const u_int8_t *tail;
u_int64_t h1 = seed, k1;
u_int64_t h2 = seed, k2;
u_int32_t i;
blocks = (const u_int64_t *)(const void *)data;
for (i = 0; i < nblocks; i++) {
k1 = getblock64(blocks, i * 2 + 0);
k2 = getblock64(blocks, i * 2 + 1);
k1 *= MH3_X64_128_C1;
#if defined(__x86_64__)
__asm__ ( "rol $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
#elif defined(__arm64__)
__asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
#else
k1 = ROTL64(k1, 31);
#endif
k1 *= MH3_X64_128_C2;
h1 ^= k1;
#if defined(__x86_64__)
__asm__ ( "rol $27, %[h1]\n\t" :[h1] "+r" (h1) : :);
#elif defined(__arm64__)
__asm__ ( "ror %[h1], %[h1], #(64-27)\n\t" :[h1] "+r" (h1) : :);
#else
h1 = ROTL64(h1, 27);
#endif
h1 += h2;
h1 = h1 * 5 + 0x52dce729;
k2 *= MH3_X64_128_C2;
#if defined(__x86_64__)
__asm__ ( "rol $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
#elif defined(__arm64__)
__asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
#else
k2 = ROTL64(k2, 33);
#endif
k2 *= MH3_X64_128_C1;
h2 ^= k2;
#if defined(__x86_64__)
__asm__ ( "rol $31, %[h2]\n\t" :[h2] "+r" (h2) : :);
#elif defined(__arm64__)
__asm__ ( "ror %[h2], %[h2], #(64-31)\n\t" :[h2] "+r" (h2) : :);
#else
h2 = ROTL64(h2, 31);
#endif
h2 += h1;
h2 = h2 * 5 + 0x38495ab5;
}
tail = (const u_int8_t *)(const void *)(data + nblocks * 16);
k1 = 0;
k2 = 0;
switch (len & 15) {
case 15:
k2 ^= ((u_int64_t)tail[14]) << 48;
case 14:
k2 ^= ((u_int64_t)tail[13]) << 40;
case 13:
k2 ^= ((u_int64_t)tail[12]) << 32;
case 12:
k2 ^= ((u_int64_t)tail[11]) << 24;
case 11:
k2 ^= ((u_int64_t)tail[10]) << 16;
case 10:
k2 ^= ((u_int64_t)tail[9]) << 8;
case 9:
k2 ^= ((u_int64_t)tail[8]) << 0;
k2 *= MH3_X64_128_C2;
#if defined(__x86_64__)
__asm__ ( "rol $33, %[k2]\n\t" :[k2] "+r" (k2) : :);
#elif defined(__arm64__)
__asm__ ( "ror %[k2], %[k2], #(64-33)\n\t" :[k2] "+r" (k2) : :);
#else
k2 = ROTL64(k2, 33);
#endif
k2 *= MH3_X64_128_C1;
h2 ^= k2;
case 8:
k1 ^= ((u_int64_t)tail[7]) << 56;
case 7:
k1 ^= ((u_int64_t)tail[6]) << 48;
case 6:
k1 ^= ((u_int64_t)tail[5]) << 40;
case 5:
k1 ^= ((u_int64_t)tail[4]) << 32;
case 4:
k1 ^= ((u_int64_t)tail[3]) << 24;
case 3:
k1 ^= ((u_int64_t)tail[2]) << 16;
case 2:
k1 ^= ((u_int64_t)tail[1]) << 8;
case 1:
k1 ^= ((u_int64_t)tail[0]) << 0;
k1 *= MH3_X64_128_C1;
#if defined(__x86_64__)
__asm__ ( "rol $31, %[k1]\n\t" :[k1] "+r" (k1) : :);
#elif defined(__arm64__)
__asm__ ( "ror %[k1], %[k1], #(64-31)\n\t" :[k1] "+r" (k1) : :);
#else
k1 = ROTL64(k1, 31);
#endif
k1 *= MH3_X64_128_C2;
h1 ^= k1;
}
;
h1 ^= len;
h2 ^= len;
h1 += h2;
h2 += h1;
h1 = mh3_fmix64(h1);
h2 = mh3_fmix64(h2);
h1 += h2;
h2 += h1;
return h1 & 0xffffffff;
}
#define JHASH_INIT 0xdeadbeef
#define JHASH_MIX(a, b, c) { \
a -= c; a ^= ROTL32(c, 4); c += b; \
b -= a; b ^= ROTL32(a, 6); a += c; \
c -= b; c ^= ROTL32(b, 8); b += a; \
a -= c; a ^= ROTL32(c, 16); c += b; \
b -= a; b ^= ROTL32(a, 19); a += c; \
c -= b; c ^= ROTL32(b, 4); b += a; \
}
#define JHASH_FINAL(a, b, c) { \
c ^= b; c -= ROTL32(b, 14); \
a ^= c; a -= ROTL32(c, 11); \
b ^= a; b -= ROTL32(a, 25); \
c ^= b; c -= ROTL32(b, 16); \
a ^= c; a -= ROTL32(c, 4); \
b ^= a; b -= ROTL32(a, 14); \
c ^= b; c -= ROTL32(b, 24); \
}
#if BYTE_ORDER == BIG_ENDIAN
u_int32_t
net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
{
u_int32_t a, b, c;
a = b = c = JHASH_INIT + len + seed;
if (ALIGNED32(key)) {
const u_int32_t *k = (const u_int32_t *)key;
while (len > 12) {
a += k[0];
b += k[1];
c += k[2];
JHASH_MIX(a, b, c);
len -= 12;
k += 3;
}
switch (len) {
case 12:
c += k[2];
b += k[1];
a += k[0];
break;
case 11:
c += k[2] & 0xffffff00;
b += k[1];
a += k[0];
break;
case 10:
c += k[2] & 0xffff0000;
b += k[1];
a += k[0];
break;
case 9:
c += k[2] & 0xff000000;
b += k[1];
a += k[0];
break;
case 8:
b += k[1];
a += k[0];
break;
case 7:
b += k[1] & 0xffffff00;
a += k[0];
break;
case 6:
b += k[1] & 0xffff0000;
a += k[0];
break;
case 5:
b += k[1] & 0xff000000;
a += k[0];
break;
case 4:
a += k[0];
break;
case 3:
a += k[0] & 0xffffff00;
break;
case 2:
a += k[0] & 0xffff0000;
break;
case 1:
a += k[0] & 0xff000000;
break;
case 0:
return c;
}
JHASH_FINAL(a, b, c);
return c;
}
const u_int8_t *k = (const u_int8_t *)key;
while (len > 12) {
a += ((u_int32_t)k[0]) << 24;
a += ((u_int32_t)k[1]) << 16;
a += ((u_int32_t)k[2]) << 8;
a += ((u_int32_t)k[3]);
b += ((u_int32_t)k[4]) << 24;
b += ((u_int32_t)k[5]) << 16;
b += ((u_int32_t)k[6]) << 8;
b += ((u_int32_t)k[7]);
c += ((u_int32_t)k[8]) << 24;
c += ((u_int32_t)k[9]) << 16;
c += ((u_int32_t)k[10]) << 8;
c += ((u_int32_t)k[11]);
JHASH_MIX(a, b, c);
len -= 12;
k += 12;
}
switch (len) {
case 12:
c += k[11];
case 11:
c += ((u_int32_t)k[10]) << 8;
case 10:
c += ((u_int32_t)k[9]) << 16;
case 9:
c += ((u_int32_t)k[8]) << 24;
case 8:
b += k[7];
case 7:
b += ((u_int32_t)k[6]) << 8;
case 6:
b += ((u_int32_t)k[5]) << 16;
case 5:
b += ((u_int32_t)k[4]) << 24;
case 4:
a += k[3];
case 3:
a += ((u_int32_t)k[2]) << 8;
case 2:
a += ((u_int32_t)k[1]) << 16;
case 1:
a += ((u_int32_t)k[0]) << 24;
break;
case 0:
return c;
}
JHASH_FINAL(a, b, c);
return c;
}
#else
u_int32_t
net_flowhash_jhash(const void *key, u_int32_t len, const u_int32_t seed)
{
u_int32_t a, b, c;
a = b = c = JHASH_INIT + len + seed;
#if defined(__i386__) || defined(__x86_64__)
if (ALIGNED32(key) || !ALIGNED16(key)) {
#else
if (ALIGNED32(key)) {
#endif
const u_int32_t *k = (const u_int32_t *)key;
while (len > 12) {
a += k[0];
b += k[1];
c += k[2];
JHASH_MIX(a, b, c);
len -= 12;
k += 3;
}
switch (len) {
case 12:
c += k[2];
b += k[1];
a += k[0];
break;
case 11:
c += k[2] & 0xffffff;
b += k[1];
a += k[0];
break;
case 10:
c += k[2] & 0xffff;
b += k[1];
a += k[0];
break;
case 9:
c += k[2] & 0xff;
b += k[1];
a += k[0];
break;
case 8:
b += k[1];
a += k[0];
break;
case 7:
b += k[1] & 0xffffff;
a += k[0];
break;
case 6:
b += k[1] & 0xffff;
a += k[0];
break;
case 5:
b += k[1] & 0xff;
a += k[0];
break;
case 4:
a += k[0];
break;
case 3:
a += k[0] & 0xffffff;
break;
case 2:
a += k[0] & 0xffff;
break;
case 1:
a += k[0] & 0xff;
break;
case 0:
return c;
}
JHASH_FINAL(a, b, c);
return c;
}
#if !defined(__i386__) && !defined(__x86_64__)
else if (ALIGNED16(key)) {
#endif
const u_int16_t *k = (const u_int16_t *)key;
const u_int8_t *k8;
while (len > 12) {
a += k[0] + (((u_int32_t)k[1]) << 16);
b += k[2] + (((u_int32_t)k[3]) << 16);
c += k[4] + (((u_int32_t)k[5]) << 16);
JHASH_MIX(a, b, c);
len -= 12;
k += 6;
}
k8 = (const u_int8_t *)k;
switch (len) {
case 12:
c += k[4] + (((u_int32_t)k[5]) << 16);
b += k[2] + (((u_int32_t)k[3]) << 16);
a += k[0] + (((u_int32_t)k[1]) << 16);
break;
case 11:
c += ((u_int32_t)k8[10]) << 16;
case 10:
c += k[4];
b += k[2] + (((u_int32_t)k[3]) << 16);
a += k[0] + (((u_int32_t)k[1]) << 16);
break;
case 9:
c += k8[8];
case 8:
b += k[2] + (((u_int32_t)k[3]) << 16);
a += k[0] + (((u_int32_t)k[1]) << 16);
break;
case 7:
b += ((u_int32_t)k8[6]) << 16;
case 6:
b += k[2];
a += k[0] + (((u_int32_t)k[1]) << 16);
break;
case 5:
b += k8[4];
case 4:
a += k[0] + (((u_int32_t)k[1]) << 16);
break;
case 3:
a += ((u_int32_t)k8[2]) << 16;
case 2:
a += k[0];
break;
case 1:
a += k8[0];
break;
case 0:
return c;
}
JHASH_FINAL(a, b, c);
return c;
#if !defined(__i386__) && !defined(__x86_64__)
}
const u_int8_t *k = (const u_int8_t *)key;
while (len > 12) {
a += k[0];
a += ((u_int32_t)k[1]) << 8;
a += ((u_int32_t)k[2]) << 16;
a += ((u_int32_t)k[3]) << 24;
b += k[4];
b += ((u_int32_t)k[5]) << 8;
b += ((u_int32_t)k[6]) << 16;
b += ((u_int32_t)k[7]) << 24;
c += k[8];
c += ((u_int32_t)k[9]) << 8;
c += ((u_int32_t)k[10]) << 16;
c += ((u_int32_t)k[11]) << 24;
JHASH_MIX(a, b, c);
len -= 12;
k += 12;
}
switch (len) {
case 12:
c += ((u_int32_t)k[11]) << 24;
case 11:
c += ((u_int32_t)k[10]) << 16;
case 10:
c += ((u_int32_t)k[9]) << 8;
case 9:
c += k[8];
case 8:
b += ((u_int32_t)k[7]) << 24;
case 7:
b += ((u_int32_t)k[6]) << 16;
case 6:
b += ((u_int32_t)k[5]) << 8;
case 5:
b += k[4];
case 4:
a += ((u_int32_t)k[3]) << 24;
case 3:
a += ((u_int32_t)k[2]) << 16;
case 2:
a += ((u_int32_t)k[1]) << 8;
case 1:
a += k[0];
break;
case 0:
return c;
}
JHASH_FINAL(a, b, c);
return c;
#endif
}
#endif