mirror of
https://gitee.com/dromara/hutool.git
synced 2025-05-06 05:38:00 +08:00
修复murmur3_32实现错误
This commit is contained in:
parent
0c6ea1b3e8
commit
11f52ffc4f
@ -11,6 +11,7 @@
|
||||
|
||||
### 🐞Bug修复
|
||||
* 【core 】 修复FileNameUtil.cleanInvalid无法去除换行符问题(issue#I5RMZV@Gitee)
|
||||
* 【core 】 修复murmur3_32实现错误(pr#2616@Github)
|
||||
|
||||
-------------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
@ -17,10 +17,10 @@ import java.nio.charset.Charset;
|
||||
* 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
|
||||
* </p>
|
||||
*
|
||||
* @author looly,Simhash4J
|
||||
* @author looly, Simhash4J
|
||||
* @since 4.3.3
|
||||
*/
|
||||
public class MurmurHash implements Serializable{
|
||||
public class MurmurHash implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
// Constants for 32 bit variant
|
||||
@ -68,55 +68,57 @@ public class MurmurHash implements Serializable{
|
||||
/**
|
||||
* Murmur3 32-bit Hash值计算
|
||||
*
|
||||
* @param data 数据
|
||||
* @param data 数据
|
||||
* @param length 长度
|
||||
* @param seed 种子,默认0
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值
|
||||
*/
|
||||
public static int hash32(byte[] data, int length, int seed) {
|
||||
return hash32(data, 0, length, seed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Murmur3 32-bit Hash值计算
|
||||
*
|
||||
* @param data 数据
|
||||
* @param offset 数据开始位置
|
||||
* @param length 长度
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值
|
||||
*/
|
||||
public static int hash32(byte[] data, int offset, int length, int seed) {
|
||||
int hash = seed;
|
||||
final int nblocks = length >> 2;
|
||||
|
||||
// body
|
||||
for (int i = 0; i < nblocks; i++) {
|
||||
int i4 = i << 2;
|
||||
int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER);
|
||||
|
||||
final int i4 = offset + (i << 2);
|
||||
final int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER);
|
||||
// mix functions
|
||||
k *= C1_32;
|
||||
k = Integer.rotateLeft(k, R1_32);
|
||||
k *= C2_32;
|
||||
hash ^= k;
|
||||
hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
|
||||
hash = mix32(k, hash);
|
||||
}
|
||||
|
||||
// tail
|
||||
int idx = nblocks << 2;
|
||||
final int idx = offset + (nblocks << 2);
|
||||
int k1 = 0;
|
||||
switch (length - idx) {
|
||||
case 3:
|
||||
k1 ^= (data[idx + 2] & 0xff) << 16;
|
||||
case 2:
|
||||
k1 ^= (data[idx + 1] & 0xff) << 8;
|
||||
case 1:
|
||||
k1 ^= (data[idx] & 0xff);
|
||||
switch (offset + length - idx) {
|
||||
case 3:
|
||||
k1 ^= (data[idx + 2] & 0xff) << 16;
|
||||
case 2:
|
||||
k1 ^= (data[idx + 1] & 0xff) << 8;
|
||||
case 1:
|
||||
k1 ^= (data[idx] & 0xff);
|
||||
|
||||
// mix functions
|
||||
k1 *= C1_32;
|
||||
k1 = Integer.rotateLeft(k1, R1_32);
|
||||
k1 *= C2_32;
|
||||
hash ^= k1;
|
||||
// mix functions
|
||||
k1 *= C1_32;
|
||||
k1 = Integer.rotateLeft(k1, R1_32);
|
||||
k1 *= C2_32;
|
||||
hash ^= k1;
|
||||
}
|
||||
|
||||
// finalization
|
||||
hash ^= length;
|
||||
hash ^= (hash >>> 16);
|
||||
hash *= 0x85ebca6b;
|
||||
hash ^= (hash >>> 13);
|
||||
hash *= 0xc2b2ae35;
|
||||
hash ^= (hash >>> 16);
|
||||
|
||||
return hash;
|
||||
return fmix32(hash);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -133,7 +135,6 @@ public class MurmurHash implements Serializable{
|
||||
* Murmur3 64-bit 算法<br>
|
||||
* This is essentially MSB 8 bytes of Murmur3 128-bit variant.
|
||||
*
|
||||
*
|
||||
* @param data 数据
|
||||
* @return Hash值
|
||||
*/
|
||||
@ -142,12 +143,12 @@ public class MurmurHash implements Serializable{
|
||||
}
|
||||
|
||||
/**
|
||||
* Murmur3 64-bit 算法 <br>
|
||||
* 类Murmur3 64-bit 算法 <br>
|
||||
* This is essentially MSB 8 bytes of Murmur3 128-bit variant.
|
||||
*
|
||||
* @param data 数据
|
||||
* @param data 数据
|
||||
* @param length 长度
|
||||
* @param seed 种子,默认0
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值
|
||||
*/
|
||||
public static long hash64(byte[] data, int length, int seed) {
|
||||
@ -171,24 +172,24 @@ public class MurmurHash implements Serializable{
|
||||
long k1 = 0;
|
||||
int tailStart = nblocks << 3;
|
||||
switch (length - tailStart) {
|
||||
case 7:
|
||||
k1 ^= ((long) data[tailStart + 6] & 0xff) << 48;
|
||||
case 6:
|
||||
k1 ^= ((long) data[tailStart + 5] & 0xff) << 40;
|
||||
case 5:
|
||||
k1 ^= ((long) data[tailStart + 4] & 0xff) << 32;
|
||||
case 4:
|
||||
k1 ^= ((long) data[tailStart + 3] & 0xff) << 24;
|
||||
case 3:
|
||||
k1 ^= ((long) data[tailStart + 2] & 0xff) << 16;
|
||||
case 2:
|
||||
k1 ^= ((long) data[tailStart + 1] & 0xff) << 8;
|
||||
case 1:
|
||||
k1 ^= ((long) data[tailStart] & 0xff);
|
||||
k1 *= C1;
|
||||
k1 = Long.rotateLeft(k1, R1);
|
||||
k1 *= C2;
|
||||
hash ^= k1;
|
||||
case 7:
|
||||
k1 ^= ((long) data[tailStart + 6] & 0xff) << 48;
|
||||
case 6:
|
||||
k1 ^= ((long) data[tailStart + 5] & 0xff) << 40;
|
||||
case 5:
|
||||
k1 ^= ((long) data[tailStart + 4] & 0xff) << 32;
|
||||
case 4:
|
||||
k1 ^= ((long) data[tailStart + 3] & 0xff) << 24;
|
||||
case 3:
|
||||
k1 ^= ((long) data[tailStart + 2] & 0xff) << 16;
|
||||
case 2:
|
||||
k1 ^= ((long) data[tailStart + 1] & 0xff) << 8;
|
||||
case 1:
|
||||
k1 ^= ((long) data[tailStart] & 0xff);
|
||||
k1 *= C1;
|
||||
k1 = Long.rotateLeft(k1, R1);
|
||||
k1 *= C2;
|
||||
hash ^= k1;
|
||||
}
|
||||
|
||||
// finalization
|
||||
@ -221,19 +222,35 @@ public class MurmurHash implements Serializable{
|
||||
/**
|
||||
* Murmur3 128-bit variant.
|
||||
*
|
||||
* @param data 数据
|
||||
* @param data 数据
|
||||
* @param length 长度
|
||||
* @param seed 种子,默认0
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值(2 longs)
|
||||
*/
|
||||
public static long[] hash128(byte[] data, int length, int seed) {
|
||||
return hash128(data, 0, length, seed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Murmur3 128-bit variant.
|
||||
*
|
||||
* @param data 数据
|
||||
* @param offset 数据开始位置
|
||||
* @param length 长度
|
||||
* @param seed 种子,默认0
|
||||
* @return Hash值(2 longs)
|
||||
*/
|
||||
public static long[] hash128(byte[] data, int offset, int length, int seed) {
|
||||
// 避免负数的种子
|
||||
seed &= 0xffffffffL;
|
||||
|
||||
long h1 = seed;
|
||||
long h2 = seed;
|
||||
final int nblocks = length >> 4;
|
||||
|
||||
// body
|
||||
for (int i = 0; i < nblocks; i++) {
|
||||
final int i16 = i << 4;
|
||||
final int i16 = offset + (i << 4);
|
||||
long k1 = ByteUtil.bytesToLong(data, i16, DEFAULT_ORDER);
|
||||
long k2 = ByteUtil.bytesToLong(data, i16 + 8, DEFAULT_ORDER);
|
||||
|
||||
@ -259,47 +276,47 @@ public class MurmurHash implements Serializable{
|
||||
// tail
|
||||
long k1 = 0;
|
||||
long k2 = 0;
|
||||
int tailStart = nblocks << 4;
|
||||
switch (length - tailStart) {
|
||||
case 15:
|
||||
k2 ^= (long) (data[tailStart + 14] & 0xff) << 48;
|
||||
case 14:
|
||||
k2 ^= (long) (data[tailStart + 13] & 0xff) << 40;
|
||||
case 13:
|
||||
k2 ^= (long) (data[tailStart + 12] & 0xff) << 32;
|
||||
case 12:
|
||||
k2 ^= (long) (data[tailStart + 11] & 0xff) << 24;
|
||||
case 11:
|
||||
k2 ^= (long) (data[tailStart + 10] & 0xff) << 16;
|
||||
case 10:
|
||||
k2 ^= (long) (data[tailStart + 9] & 0xff) << 8;
|
||||
case 9:
|
||||
k2 ^= data[tailStart + 8] & 0xff;
|
||||
k2 *= C2;
|
||||
k2 = Long.rotateLeft(k2, R3);
|
||||
k2 *= C1;
|
||||
h2 ^= k2;
|
||||
final int tailStart = offset + (nblocks << 4);
|
||||
switch (offset + length - tailStart) {
|
||||
case 15:
|
||||
k2 ^= (long) (data[tailStart + 14] & 0xff) << 48;
|
||||
case 14:
|
||||
k2 ^= (long) (data[tailStart + 13] & 0xff) << 40;
|
||||
case 13:
|
||||
k2 ^= (long) (data[tailStart + 12] & 0xff) << 32;
|
||||
case 12:
|
||||
k2 ^= (long) (data[tailStart + 11] & 0xff) << 24;
|
||||
case 11:
|
||||
k2 ^= (long) (data[tailStart + 10] & 0xff) << 16;
|
||||
case 10:
|
||||
k2 ^= (long) (data[tailStart + 9] & 0xff) << 8;
|
||||
case 9:
|
||||
k2 ^= data[tailStart + 8] & 0xff;
|
||||
k2 *= C2;
|
||||
k2 = Long.rotateLeft(k2, R3);
|
||||
k2 *= C1;
|
||||
h2 ^= k2;
|
||||
|
||||
case 8:
|
||||
k1 ^= (long) (data[tailStart + 7] & 0xff) << 56;
|
||||
case 7:
|
||||
k1 ^= (long) (data[tailStart + 6] & 0xff) << 48;
|
||||
case 6:
|
||||
k1 ^= (long) (data[tailStart + 5] & 0xff) << 40;
|
||||
case 5:
|
||||
k1 ^= (long) (data[tailStart + 4] & 0xff) << 32;
|
||||
case 4:
|
||||
k1 ^= (long) (data[tailStart + 3] & 0xff) << 24;
|
||||
case 3:
|
||||
k1 ^= (long) (data[tailStart + 2] & 0xff) << 16;
|
||||
case 2:
|
||||
k1 ^= (long) (data[tailStart + 1] & 0xff) << 8;
|
||||
case 1:
|
||||
k1 ^= data[tailStart] & 0xff;
|
||||
k1 *= C1;
|
||||
k1 = Long.rotateLeft(k1, R1);
|
||||
k1 *= C2;
|
||||
h1 ^= k1;
|
||||
case 8:
|
||||
k1 ^= (long) (data[tailStart + 7] & 0xff) << 56;
|
||||
case 7:
|
||||
k1 ^= (long) (data[tailStart + 6] & 0xff) << 48;
|
||||
case 6:
|
||||
k1 ^= (long) (data[tailStart + 5] & 0xff) << 40;
|
||||
case 5:
|
||||
k1 ^= (long) (data[tailStart + 4] & 0xff) << 32;
|
||||
case 4:
|
||||
k1 ^= (long) (data[tailStart + 3] & 0xff) << 24;
|
||||
case 3:
|
||||
k1 ^= (long) (data[tailStart + 2] & 0xff) << 16;
|
||||
case 2:
|
||||
k1 ^= (long) (data[tailStart + 1] & 0xff) << 8;
|
||||
case 1:
|
||||
k1 ^= data[tailStart] & 0xff;
|
||||
k1 *= C1;
|
||||
k1 = Long.rotateLeft(k1, R1);
|
||||
k1 *= C2;
|
||||
h1 ^= k1;
|
||||
}
|
||||
|
||||
// finalization
|
||||
@ -315,7 +332,24 @@ public class MurmurHash implements Serializable{
|
||||
h1 += h2;
|
||||
h2 += h1;
|
||||
|
||||
return new long[] { h1, h2 };
|
||||
return new long[]{h1, h2};
|
||||
}
|
||||
|
||||
private static int mix32(int k, int hash) {
|
||||
k *= C1_32;
|
||||
k = Integer.rotateLeft(k, R1_32);
|
||||
k *= C2_32;
|
||||
hash ^= k;
|
||||
return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
|
||||
}
|
||||
|
||||
private static int fmix32(int hash) {
|
||||
hash ^= (hash >>> 16);
|
||||
hash *= 0x85ebca6b;
|
||||
hash ^= (hash >>> 13);
|
||||
hash *= 0xc2b2ae35;
|
||||
hash ^= (hash >>> 16);
|
||||
return hash;
|
||||
}
|
||||
|
||||
private static long fmix64(long h) {
|
||||
|
@ -4,7 +4,7 @@ import cn.hutool.core.util.StrUtil;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class MurMurHashTest {
|
||||
public class MurmurHashTest {
|
||||
|
||||
@Test
|
||||
public void hash32Test() {
|
Loading…
Reference in New Issue
Block a user