Skip to content
Snippets Groups Projects
Commit f76c576f authored by Markus Alexander Kuppe's avatar Markus Alexander Kuppe
Browse files

[Feature] Add TLCIterator#getLast(long) where long acts as a lower bound

for the sequential iteration through the (potentially large) in-memory
buffer.

Starting with the second disk flush, the maximum value (fingerprint) of
the in-memory and on-disk file has to be calculated. The maximum on-disk
value is kept as a variable, while the in-memory value requires a lookup
by iterating the buffer end to front. The moment the iterator sees a
value smaller than the on-disk max value, it can stop its scan. It won't
find a value larger than the on-disk one.
parent 2c6977b1
No related branches found
No related tags found
No related merge requests found
...@@ -100,10 +100,18 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet { ...@@ -100,10 +100,18 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet {
final long buffLen = tblCnt.get(); final long buffLen = tblCnt.get();
final TLCIterator itr = new TLCIterator(tbl); final TLCIterator itr = new TLCIterator(tbl);
// Precompute the maximum value of the new file // Precompute the maximum value of the new file.
long maxVal = itr.getLast(); // If this isn't the first merge, the index has
// the last element of the disk file. In that case
// the new maxVal is the larger element of the two
// in-memory and on-disk elements.
// The largest on-disk element is passed to the
// iterator as a lower bound.
long maxVal;
if (index != null) { if (index != null) {
maxVal = Math.max(maxVal, index[index.length - 1]); maxVal = itr.getLast(index[index.length - 1]);
} else {
maxVal = itr.getLast();
} }
int indexLen = calculateIndexLen(buffLen); int indexLen = calculateIndexLen(buffLen);
...@@ -141,6 +149,7 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet { ...@@ -141,6 +149,7 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet {
if (value == fp) { if (value == fp) {
Assert.check(false, EC.TLC_FP_VALUE_ALREADY_ON_DISK, Assert.check(false, EC.TLC_FP_VALUE_ALREADY_ON_DISK,
String.valueOf(value)); String.valueOf(value));
} }
writeFP(outRAF, fp); writeFP(outRAF, fp);
// we used one fp up, thus move to next one // we used one fp up, thus move to next one
...@@ -276,11 +285,77 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet { ...@@ -276,11 +285,77 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet {
return result; return result;
} }
/**
* @param lowBound
* Stop searching for the last element if no element is
* larger than lowBound
* @return The last element in the iteration that is larger than lowBound
* @exception NoSuchElementException
* if iteration is empty.
* <p>
* Pre-condition: Each bucket is sorted in ascending
* order. on-disk fingerprints don't adhere to the
* ascending order though!
*/
public long getLast(final long lowBound) {
int len = buff.length - 1;
long[] bucket = buff[len];
// Walk from the end of buff to the beginning. Each bucket that is
// found and non-null (null if no fingerprint for such an index has
// been added to the DiskFPSet) is checked for a valid fingerprint.
// A fp is valid iff it is larger zero. A zero fingerprint slot
// indicates an unoccupied slot, while a negative one corresponds to
// a fp that has already been flushed to disk.
while (len > 0) {
bucket = buff[--len];
// Find last element > 0 in bucket (negative elements have already
// been flushed to disk, zero indicates an unoccupied slot).
if (bucket != null) {
for (int i = bucket.length - 1; i >= 0; i--) {
final long fp = bucket[i];
// unused slot
if (fp == 0) {
continue;
}
// in-memory fingerprint
if (fp > 0) {
if (fp < lowBound) {
// smaller lowBound
return lowBound;
} else {
// larger or equal lowBound
return fp;
}
}
// Cannot take on-disk fingerprints (negative ones) into
// account. They don't adhere to the precondition that
// the bucket is sorted. The bucket is logically sorted
// only for in-memory fingerprints.
}
}
}
// At this point we have scanned the complete buff, but haven't
// found a single fingerprint that hasn't been flushed to disk
// already.
throw new NoSuchElementException();
}
/** /**
* @return The last element in the iteration. * @return The last element in the iteration.
* @exception NoSuchElementException if iteration is empty. * @exception NoSuchElementException if iteration is empty.
*
* Pre-condition: Each bucket is sorted in ascending order!
*/ */
public long getLast() { public long getLast() {
/*
* Tempting to delegate to getLast(Long.MAX_VALUE). However,
* getLast() could not throw a NoSuchElementException when the
* value returned is Long.MAX_VALUE. It could be a valid value, but
* it might as well be the cutOff.
*/
int len = buff.length - 1; int len = buff.length - 1;
long[] bucket = buff[len]; long[] bucket = buff[len];
......
...@@ -78,6 +78,39 @@ public class MSBDiskFPSetTest2 extends AbstractHeapBasedDiskFPSetTest { ...@@ -78,6 +78,39 @@ public class MSBDiskFPSetTest2 extends AbstractHeapBasedDiskFPSetTest {
fail(); fail();
} }
public void testGetLastWithCutOff() throws IOException {
final MSBDiskFPSet msbDiskFPSet = getMSBDiskFPSet();
// Add the largest possible fingerprint into the fpset. It will end up
// in the largest bucket. Check that the MSB iterator returns it.
final long highFP = 1L << 62;
msbDiskFPSet.put(highFP);
TLCIterator tlcIterator = new MSBDiskFPSet.TLCIterator(msbDiskFPSet.tbl);
assertEquals(highFP, tlcIterator.getLast());
// Flush the set to disk (simulating e.g. a checkpoint), a new iterator
// won't find the element anymore because it intentionally only searches
// for elements that are *not* on disk.
msbDiskFPSet.flusher.flushTable();
new MSBDiskFPSet.TLCIterator(msbDiskFPSet.tbl);
try {
tlcIterator.getLast();
} catch (NoSuchElementException e) {
// This exception is expected.
// Now add the smallest possible element into the set. It will end
// up in the smallest bucket.
final long lowFP = 1;
msbDiskFPSet.put(lowFP);
// check that the iterator finds lower bound as the last element.
tlcIterator = new MSBDiskFPSet.TLCIterator(msbDiskFPSet.tbl);
final long lowerBound = highFP - 1L;
assertEquals(lowerBound, tlcIterator.getLast(lowerBound));
return;
}
fail();
}
private MSBDiskFPSet getMSBDiskFPSet() throws RemoteException, IOException { private MSBDiskFPSet getMSBDiskFPSet() throws RemoteException, IOException {
// Create an MSBDiskFPSet usable in this unit test with memory allocated // Create an MSBDiskFPSet usable in this unit test with memory allocated
// to store 100 fingerprints. // to store 100 fingerprints.
......
// Copyright (c) 2012 Microsoft Corporation. All rights reserved.
package tlc2.tool.fp.iterator;
public class TLCIterator2Test extends TLCIteratorTest {
/* (non-Javadoc)
* @see tlc2.tool.fp.iterator.TLCIteratorTest#getBuffer()
*/
@Override
protected long[][] getBuffer() {
final long[][] buff = new long[8][];
buff[0] = getArray(8, 1, 8);
buff[1] = getArray(8, 9, 6);
buff[2] = null;
buff[3] = null;
buff[4] = getArray(8, 15, 7); // Bucket with last element
buff[5] = null;
// Simulate that this bucket is filled with fingerprints who have all
// been flushed to disk.
buff[6] = new long[4];
buff[6][0] = 22L | 0x8000000000000000L;
buff[6][1] = 0L;
buff[6][2] = 0L;
buff[6][3] = 0L;
buff[7] = null;
return buff;
}
}
...@@ -94,4 +94,16 @@ public class TLCIteratorTest extends TestCase { ...@@ -94,4 +94,16 @@ public class TLCIteratorTest extends TestCase {
public void testGetLast() { public void testGetLast() {
assertEquals(getLast(), itr.getLast()); assertEquals(getLast(), itr.getLast());
} }
public void testGetLastLowBound() {
assertEquals(getLast() + 1, itr.getLast(getLast() + 1));
}
/*
* Use a smaller lower bound element than last in the buffer.
*/
public void testGetLastLowerBoundNoHit() {
assertEquals(getLast(), itr.getLast(20));
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment