From edcf7412ff2280f0e85fcffb6f7ea2b43631fb91 Mon Sep 17 00:00:00 2001 From: Jonathan Leibiusky Date: Wed, 15 Sep 2010 15:06:12 -0300 Subject: [PATCH] Changed MurmurHash algo, to the one developed in http://github.com/tdunning/MAHOUT-228 --- src/main/java/redis/clients/util/Hashing.java | 52 +----- .../java/redis/clients/util/MurmurHash.java | 162 ++++++++++++++++++ .../clients/jedis/tests/ShardedJedisTest.java | 2 +- 3 files changed, 166 insertions(+), 50 deletions(-) create mode 100644 src/main/java/redis/clients/util/MurmurHash.java diff --git a/src/main/java/redis/clients/util/Hashing.java b/src/main/java/redis/clients/util/Hashing.java index b961992..291416f 100644 --- a/src/main/java/redis/clients/util/Hashing.java +++ b/src/main/java/redis/clients/util/Hashing.java @@ -3,55 +3,9 @@ package redis.clients.util; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -public abstract class Hashing { - public static final Hashing MURMURE_HASH = new Hashing() { - public long hash(String key) { - // 'm' and 'r' are mixing constants generated offline. - // They're not really 'magic', they just happen to work well. - byte[] data = key.getBytes(); - int seed = 0x1234ABCD; - int m = 0x5bd1e995; - int r = 24; +public interface Hashing { + public static final Hashing MURMUR_HASH = new MurmurHash(); - // Initialize the hash to a 'random' value - int len = data.length; - int h = seed ^ len; - - int i = 0; - while (len >= 4) { - int k = data[i + 0] & 0xFF; - k |= (data[i + 1] & 0xFF) << 8; - k |= (data[i + 2] & 0xFF) << 16; - k |= (data[i + 3] & 0xFF) << 24; - - k *= m; - k ^= k >>> r; - k *= m; - - h *= m; - h ^= k; - - i += 4; - len -= 4; - } - - switch (len) { - case 3: - h ^= (data[i + 2] & 0xFF) << 16; - case 2: - h ^= (data[i + 1] & 0xFF) << 8; - case 1: - h ^= (data[i + 0] & 0xFF); - h *= m; - } - - h ^= h >>> 13; - h *= m; - h ^= h >>> 15; - - return h; - } - }; public static final Hashing MD5 = new Hashing() { private MessageDigest md5 = null; // avoid recurring construction @@ -75,5 +29,5 @@ public abstract class Hashing { } }; - public abstract long hash(String key); + public long hash(String key); } \ No newline at end of file diff --git a/src/main/java/redis/clients/util/MurmurHash.java b/src/main/java/redis/clients/util/MurmurHash.java new file mode 100644 index 0000000..0294a25 --- /dev/null +++ b/src/main/java/redis/clients/util/MurmurHash.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package redis.clients.util; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * This is a very fast, non-cryptographic hash suitable for general hash-based + * lookup. See http://murmurhash.googlepages.com/ for more details. + *

+ *

+ * The C version of MurmurHash 2.0 found at that site was ported to Java by + * Andrzej Bialecki (ab at getopt org). + *

+ */ +public class MurmurHash implements Hashing { + /** + * Hashes bytes in an array. + * + * @param data + * The bytes to hash. + * @param seed + * The seed for the hash. + * @return The 32 bit hash of the bytes in question. + */ + public static int hash(byte[] data, int seed) { + return hash(ByteBuffer.wrap(data), seed); + } + + /** + * Hashes bytes in part of an array. + * + * @param data + * The data to hash. + * @param offset + * Where to start munging. + * @param length + * How many bytes to process. + * @param seed + * The seed to start with. + * @return The 32-bit hash of the data in question. + */ + public static int hash(byte[] data, int offset, int length, int seed) { + return hash(ByteBuffer.wrap(data, offset, length), seed); + } + + /** + * Hashes the bytes in a buffer from the current position to the limit. + * + * @param buf + * The bytes to hash. + * @param seed + * The seed for the hash. + * @return The 32 bit murmur hash of the bytes in the buffer. + */ + public static int hash(ByteBuffer buf, int seed) { + // save byte order for later restoration + ByteOrder byteOrder = buf.order(); + buf.order(ByteOrder.LITTLE_ENDIAN); + + int m = 0x5bd1e995; + int r = 24; + + int h = seed ^ buf.remaining(); + + int k; + while (buf.remaining() >= 4) { + k = buf.getInt(); + + k *= m; + k ^= k >>> r; + k *= m; + + h *= m; + h ^= k; + } + + if (buf.remaining() > 0) { + ByteBuffer finish = ByteBuffer.allocate(4).order( + ByteOrder.LITTLE_ENDIAN); + // for big-endian version, use this first: + // finish.position(4-buf.remaining()); + finish.put(buf).rewind(); + h ^= finish.getInt(); + h *= m; + } + + h ^= h >>> 13; + h *= m; + h ^= h >>> 15; + + buf.order(byteOrder); + return h; + } + + public static long hash64A(byte[] data, int seed) { + return hash64A(ByteBuffer.wrap(data), seed); + } + + public static long hash64A(byte[] data, int offset, int length, int seed) { + return hash64A(ByteBuffer.wrap(data, offset, length), seed); + } + + public static long hash64A(ByteBuffer buf, int seed) { + ByteOrder byteOrder = buf.order(); + buf.order(ByteOrder.LITTLE_ENDIAN); + + long m = 0xc6a4a7935bd1e995L; + int r = 47; + + long h = seed ^ (buf.remaining() * m); + + long k; + while (buf.remaining() >= 8) { + k = buf.getLong(); + + k *= m; + k ^= k >>> r; + k *= m; + + h ^= k; + h *= m; + } + + if (buf.remaining() > 0) { + ByteBuffer finish = ByteBuffer.allocate(8).order( + ByteOrder.LITTLE_ENDIAN); + // for big-endian version, do this first: + // finish.position(8-buf.remaining()); + finish.put(buf).rewind(); + h ^= finish.getLong(); + h *= m; + } + + h ^= h >>> r; + h *= m; + h ^= h >>> r; + + buf.order(byteOrder); + return h; + } + + public long hash(String key) { + return hash64A(key.getBytes(), 0x1234ABCD); + } +} \ No newline at end of file diff --git a/src/test/java/redis/clients/jedis/tests/ShardedJedisTest.java b/src/test/java/redis/clients/jedis/tests/ShardedJedisTest.java index 0223833..fe4a8a4 100644 --- a/src/test/java/redis/clients/jedis/tests/ShardedJedisTest.java +++ b/src/test/java/redis/clients/jedis/tests/ShardedJedisTest.java @@ -64,7 +64,7 @@ public class ShardedJedisTest extends Assert { si = new ShardInfo(redis2.host, redis2.port); si.setPassword("foobared"); shards.add(si); - ShardedJedis jedis = new ShardedJedis(shards, Hashing.MURMURE_HASH); + ShardedJedis jedis = new ShardedJedis(shards, Hashing.MURMUR_HASH); jedis.set("a", "bar"); ShardInfo s1 = jedis.getShardInfo("a"); jedis.set("b", "bar1");