From 7ab3384e5d2a77ff1f70a5b76f8cfa7b11c1d37f Mon Sep 17 00:00:00 2001 From: Marko Kreen Date: Thu, 8 Nov 2007 21:32:40 +0000 Subject: [PATCH] jenkins hash: optimize last memcpy compiler cannot optimize variable-size memcpy. use a inlined simple version then. That makes this version always faster than Jenkins version on Core Duo. On other cpu-s its +3% win although still slower than Jenkins'. --- src/hash.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/hash.c b/src/hash.c index d87e656..c153129 100644 --- a/src/hash.c +++ b/src/hash.c @@ -56,7 +56,17 @@ c ^= b; c -= rot(b,24); \ } while (0) -/* simple version - let compiler worry about memory access */ +/* for a small amount of bytes the call to libc is a loss */ +static inline void simple_memcpy(void *dst, const void *src, size_t n) +{ + const uint8_t *s = src; + uint8_t *d = dst; + + while (n--) + *d++ = *s++; +} + +/* short version - let compiler worry about memory access */ uint32_t lookup3_hash(const void *data, size_t len) { uint32_t a, b, c; @@ -78,7 +88,7 @@ uint32_t lookup3_hash(const void *data, size_t len) } buf[0] = buf[1] = buf[2] = 0; - memcpy(buf, p, len); + simple_memcpy(buf, p, len); a += buf[0]; b += buf[1]; c += buf[2]; -- 2.39.5