Skip to content

Commit

Permalink
add: implement equals and hashCode
Browse files Browse the repository at this point in the history
* cache hash digests
* add `bitwiseDifference` call
  • Loading branch information
AlbertWeichselbraun committed Jan 10, 2017
1 parent 36a7874 commit 61e1f04
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 68 deletions.
1 change: 0 additions & 1 deletion .classpath
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="org.eclipse.jst.component.nondependency" value=""/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ Original C nilsimsa-0.2.4 implementation by cmeclax:

* 0.0.2:
- added support for hashing byte arrays and static constructors
- cache hash digest
- implemented `equals` and `hashCode`
- `compare` now uses the quicker Integer.bitcount method
- `compare` yields the number of bits that differ rather than the difference to 128 equal bits.
- improved test coverage
- added `bitwiseDifference` which yield the number of bits that differ between hashes
- improved test coverage
- code cleanup
4 changes: 0 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
151 changes: 97 additions & 54 deletions src/main/java/com/weblyzard/lib/string/nilsimsa/Nilsimsa.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@

import java.util.*;

import org.apache.commons.codec.DecoderException;
import javax.xml.bind.DatatypeConverter;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;

/**
* Computes the Nilsimsa hash for the given string.
* @author Albert Weichselbraun <[email protected]>
* <[email protected]>
*
* This class is a translation of the Python implementation by Michael Itz
* to the Java language <http://code.google.com/p/py-nilsimsa>.
* This class is based on the Python implementation by Michael Itz
* <http://code.google.com/p/py-nilsimsa>.
*
* Original C nilsimsa-0.2.4 implementation by cmeclax:
* <http://ixazon.dynip.com/~cmeclax/nilsimsa.html>
Expand All @@ -34,12 +37,13 @@
*/
public class Nilsimsa {

private int count = 0; // num characters seen
private int[] acc = new int[256]; // accumulators for computing the digest
private int[] lastch = new int[4]; // the last four seen characters
private int count = 0; // num characters seen
private int[] acc = new int[256]; // accumulators for computing the digest
private int[] lastch = new int[4]; // the last four seen characters
private byte[] digest = null; // the Nilsimsa digest

// pre-defined transformation arrays
private static final byte[] TRAN = Nilsimsa._getByteArray(
private static final byte[] TRAN = DatatypeConverter.parseHexBinary(
"02D69E6FF91D04ABD022161FD873A1AC" +
"3B7062961E6E8F399D05144AA6BEAE0E" +
"CFB99C9AC76813E12DA4EB518D646B50" +
Expand All @@ -61,24 +65,6 @@ public Nilsimsa() {
reset();
}

/**
* Computes the Nilsimsa digest for the given byte array.
* @param data
* @return
*/
public static Nilsimsa getHash(byte[] data) {
return new Nilsimsa().update(data);
}

/**
* Computes the Nilsimsa digest for the given String.
* @param s
* @return
*/
public static Nilsimsa getHash(String s) {
return getHash(s.getBytes());
}

/**
* Updates the Nilsimsa digest using the given String
* @param s: the String data to consider in the update
Expand Down Expand Up @@ -110,6 +96,7 @@ public Nilsimsa update(byte[] data) {
}
lastch[0] = ch;
}
digest = null;
return this;
}

Expand All @@ -124,23 +111,10 @@ public Nilsimsa reset() {
count = 0;
Arrays.fill(acc, (byte) 0);
Arrays.fill(lastch, -1);
this.digest = null;
return this;
}

/*
* Converts the given hexString to a byte array.
* @param hexString: the hexString to convert
* @return the corresponding byte array
*/
private static byte[] _getByteArray( String hexString ) {
try {
return Hex.decodeHex( hexString.toCharArray());
} catch (DecoderException e) {
e.printStackTrace();
return null;
}
}

/**
* Accumulator for a transition n between the chars a, b, c
*/
Expand All @@ -153,9 +127,12 @@ private int _tran3(int a, int b, int c, int n) {
* @return the digest for the current Nilsimsa object.
*/
public byte[] digest() {
if (digest != null) {
return digest;
}
int total = 0;
int threshold;
byte[] digest = new byte[32];
digest = new byte[32];
Arrays.fill(digest, (byte)0);

if (count == 3) {
Expand All @@ -174,46 +151,85 @@ public byte[] digest() {
}
ArrayUtils.reverse( digest );
return digest;
}

/**
* Compute the Nilsimsa digest for the given String.
* @param data: an array of bytes to hash
* @return the Nilsimsa digest.
*/
public byte[] digest(byte[] data) {
reset();
update(data);
return digest();
}

/**
* Computes the Nilsimsa digest for the given byte array.
* @param data
* @return
*/
public static Nilsimsa getHash(byte[] data) {
return new Nilsimsa().update(data);
}

/**
* @return a String representation of the current state of
* the Nilsimsa object.
* Computes the Nilsimsa digest for the given String.
* @param s
* @return
*/
public String hexdigest() {
return Hex.encodeHexString( digest() );
public static Nilsimsa getHash(String s) {
return getHash(s.getBytes());
}


/**
* Compute the Nilsimsa digest for the given String.
* @param s: the String to hash
* @return the Nilsimsa digest.
*/
public byte[] digest(String s) {
reset();
update(s);
return digest();
return digest(s.getBytes());
}


/**
* @return a String representation of the current state of
* the Nilsimsa object.
*/
public String hexdigest() {
return Hex.encodeHexString(digest());
}

/**
* Compute the Nilsimsa hexDigest for the given String.
* @param s: the String to hash
* @param data: an array of bytes to hash
* @return the Nilsimsa hexdigest.
*/
public String hexdigest(byte[] data) {
digest(data);
return hexdigest();
}


/**
* Compute the Nilsimsa hexDigest for the given String.
* @param s: the String to hash
* @return the Nilsimsa hexdigest.
*/
public String hexdigest(String s) {
return Hex.encodeHexString( digest(s) );
digest(s);
return hexdigest();
}

/**
* Compares a Nilsimsa object to the current one and
* return the number of bits that differ.
* @param cmp: the comparison object
* @return the number of bits in the strings which differ.
* @param cmp:
* the comparison object
* @return
* the number of bits in which the Nilsimsa digests differ.
*/
public int compare(Nilsimsa cmp) {
public int bitwiseDifference(Nilsimsa cmp) {
int distance = 0;
int h1, h2;

Expand All @@ -227,5 +243,32 @@ public int compare(Nilsimsa cmp) {
}
return distance;
}

/**
* Returns a value between -128 and + 128 that indicates the difference between
* the nilsimsa digest of the current object and cmp.
* @param cmp:
* comparison object
* @return
* a value between -128 (no matching bits) and 128 (all bits match; both hashes are equal)
*/
public int compare(Nilsimsa cmp) {
return 128 - bitwiseDifference(cmp);
}

@Override
public boolean equals(Object o) {
if (o == null) { return false; }
if (o == this) { return true; }
if (o.getClass() != getClass()) { return false; };

return new EqualsBuilder()
.append(digest(), ((Nilsimsa)o).digest()).isEquals();
}

@Override
public int hashCode() {
return new HashCodeBuilder().append(digest()).toHashCode();
}

}
}
45 changes: 38 additions & 7 deletions src/test/java/com/weblyzard/lib/string/nilsimsa/NilsimsaTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,24 @@
import static org.junit.Assert.*;

import java.io.*;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;

import org.apache.commons.codec.Charsets;
import org.apache.commons.io.FileUtils;
import org.junit.Test;

/**
* Test the Nilsimsa algorithm
* @author Albert Weichselbraun
* @author Albert Weichselbraun <[email protected]>
*
*/
public class NilsimsaTest {

private final static String CONTENT_ENCODING = "UTF8";
private final static Charset CONTENT_ENCODING = Charsets.UTF_8;
private final static String[] TEST_DATA = {
"73302df80673894c115249b1f880abb1ec2b09f1c9726e642b690291e636fe6f c",
"67b02df81323816c51019d71da92612dede05cf1cd20fb042b218310e61368ef hmac",
Expand Down Expand Up @@ -71,14 +74,43 @@ public void differenceTest() {
Nilsimsa referenceHash = Nilsimsa.getHash(referenceString);
// System.out.print("{");
for (int j=0; j<TEST_DATA.length; j++) {
int distance = referenceHash.compare(Nilsimsa.getHash(TEST_DATA[j]));
int distance = referenceHash.bitwiseDifference(Nilsimsa.getHash(TEST_DATA[j]));
// System.out.print(distance + ", ");
assertEquals(REFERENCE_DISTANCE[i][j], distance);
}
// System.out.println("}, ");
}
}

@Test
public void equalsAndHashCodeTest() {
Nilsimsa h1, h2;

// test equals and hash code
for (int i=0; i<TEST_DATA.length; i++) {
h1 = Nilsimsa.getHash(TEST_DATA[i]);
for (int j=0; j<TEST_DATA.length; j++) {
h2 = Nilsimsa.getHash(TEST_DATA[j]);
if (j == i) {
assertEquals(h1, h2);
assertEquals(h1.hashCode(), h2.hashCode());
} else {
assertNotEquals(h1, h2);
assertNotEquals(h1.hashCode(), h2.hashCode());
}
}
}
}

@Test
public void equalsSpecialCasesTest() {
Nilsimsa h = Nilsimsa.getHash("test");
assertNotEquals(h, null);
assertNotEquals(h, null);
assertNotEquals(h, this);
assertEquals(h, h);
}

/**
* compile test mapping
* @return a mapping of file content and the corresponding reference
Expand All @@ -92,10 +124,9 @@ private static Map<String, String> _readTestDocuments() {
testSet = testData.split(" ");
try {
URL resource = NilsimsaTest.class.getClassLoader().getResource("wiki-"+ testSet[1] + ".txt");
documentContent = FileUtils.readFileToString(
new File( resource.getFile()), CONTENT_ENCODING);
documentContent = new String(Files.readAllBytes(Paths.get(resource.toURI())), CONTENT_ENCODING);
result.put( testSet[0], documentContent);
} catch (IOException e) {
} catch (IOException | URISyntaxException e) {
e.printStackTrace();
fail("Cannot read corpus.");
}
Expand Down

0 comments on commit 61e1f04

Please sign in to comment.