修复murmur3_32实现错误

This commit is contained in:
Looly 2022-09-20 17:03:42 +08:00
parent 0c6ea1b3e8
commit 11f52ffc4f
3 changed files with 134 additions and 99 deletions

View File

@ -11,6 +11,7 @@
### 🐞Bug修复
* 【core 】 修复FileNameUtil.cleanInvalid无法去除换行符问题issue#I5RMZV@Gitee
* 【core 】 修复murmur3_32实现错误pr#2616@Github
-------------------------------------------------------------------------------------------------------------

View File

@ -17,10 +17,10 @@ import java.nio.charset.Charset;
* 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
* </p>
*
* @author looly,Simhash4J
* @author looly, Simhash4J
* @since 4.3.3
*/
public class MurmurHash implements Serializable{
public class MurmurHash implements Serializable {
private static final long serialVersionUID = 1L;
// Constants for 32 bit variant
@ -74,26 +74,34 @@ public class MurmurHash implements Serializable{
* @return Hash值
*/
public static int hash32(byte[] data, int length, int seed) {
return hash32(data, 0, length, seed);
}
/**
* Murmur3 32-bit Hash值计算
*
* @param data 数据
* @param offset 数据开始位置
* @param length 长度
* @param seed 种子默认0
* @return Hash值
*/
public static int hash32(byte[] data, int offset, int length, int seed) {
int hash = seed;
final int nblocks = length >> 2;
// body
for (int i = 0; i < nblocks; i++) {
int i4 = i << 2;
int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER);
final int i4 = offset + (i << 2);
final int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER);
// mix functions
k *= C1_32;
k = Integer.rotateLeft(k, R1_32);
k *= C2_32;
hash ^= k;
hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
hash = mix32(k, hash);
}
// tail
int idx = nblocks << 2;
final int idx = offset + (nblocks << 2);
int k1 = 0;
switch (length - idx) {
switch (offset + length - idx) {
case 3:
k1 ^= (data[idx + 2] & 0xff) << 16;
case 2:
@ -110,13 +118,7 @@ public class MurmurHash implements Serializable{
// finalization
hash ^= length;
hash ^= (hash >>> 16);
hash *= 0x85ebca6b;
hash ^= (hash >>> 13);
hash *= 0xc2b2ae35;
hash ^= (hash >>> 16);
return hash;
return fmix32(hash);
}
/**
@ -133,7 +135,6 @@ public class MurmurHash implements Serializable{
* Murmur3 64-bit 算法<br>
* This is essentially MSB 8 bytes of Murmur3 128-bit variant.
*
*
* @param data 数据
* @return Hash值
*/
@ -142,7 +143,7 @@ public class MurmurHash implements Serializable{
}
/**
* Murmur3 64-bit 算法 <br>
* Murmur3 64-bit 算法 <br>
* This is essentially MSB 8 bytes of Murmur3 128-bit variant.
*
* @param data 数据
@ -227,13 +228,29 @@ public class MurmurHash implements Serializable{
* @return Hash值(2 longs)
*/
public static long[] hash128(byte[] data, int length, int seed) {
return hash128(data, 0, length, seed);
}
/**
* Murmur3 128-bit variant.
*
* @param data 数据
* @param offset 数据开始位置
* @param length 长度
* @param seed 种子默认0
* @return Hash值(2 longs)
*/
public static long[] hash128(byte[] data, int offset, int length, int seed) {
// 避免负数的种子
seed &= 0xffffffffL;
long h1 = seed;
long h2 = seed;
final int nblocks = length >> 4;
// body
for (int i = 0; i < nblocks; i++) {
final int i16 = i << 4;
final int i16 = offset + (i << 4);
long k1 = ByteUtil.bytesToLong(data, i16, DEFAULT_ORDER);
long k2 = ByteUtil.bytesToLong(data, i16 + 8, DEFAULT_ORDER);
@ -259,8 +276,8 @@ public class MurmurHash implements Serializable{
// tail
long k1 = 0;
long k2 = 0;
int tailStart = nblocks << 4;
switch (length - tailStart) {
final int tailStart = offset + (nblocks << 4);
switch (offset + length - tailStart) {
case 15:
k2 ^= (long) (data[tailStart + 14] & 0xff) << 48;
case 14:
@ -315,7 +332,24 @@ public class MurmurHash implements Serializable{
h1 += h2;
h2 += h1;
return new long[] { h1, h2 };
return new long[]{h1, h2};
}
private static int mix32(int k, int hash) {
k *= C1_32;
k = Integer.rotateLeft(k, R1_32);
k *= C2_32;
hash ^= k;
return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
}
private static int fmix32(int hash) {
hash ^= (hash >>> 16);
hash *= 0x85ebca6b;
hash ^= (hash >>> 13);
hash *= 0xc2b2ae35;
hash ^= (hash >>> 16);
return hash;
}
private static long fmix64(long h) {

View File

@ -4,7 +4,7 @@ import cn.hutool.core.util.StrUtil;
import org.junit.Assert;
import org.junit.Test;
public class MurMurHashTest {
public class MurmurHashTest {
@Test
public void hash32Test() {