From f76c576f61029d843e8b3261fb197b34c0437d23 Mon Sep 17 00:00:00 2001 From: Markus Alexander Kuppe <tlaplus.net@lemmster.de> Date: Thu, 21 May 2015 08:24:31 +0200 Subject: [PATCH] [Feature] Add TLCIterator#getLast(long) where long acts as a lower bound for the sequential iteration through the (potentially large) in-memory buffer. Starting with the second disk flush, the maximum value (fingerprint) of the in-memory and on-disk file has to be calculated. The maximum on-disk value is kept as a variable, while the in-memory value requires a lookup by iterating the buffer end to front. The moment the iterator sees a value smaller than the on-disk max value, it can stop its scan. It won't find a value larger than the on-disk one. --- tlatools/src/tlc2/tool/fp/MSBDiskFPSet.java | 81 ++++++++++++++++++- .../test/tlc2/tool/fp/MSBDiskFPSetTest2.java | 33 ++++++++ .../tool/fp/iterator/TLCIterator2Test.java | 32 ++++++++ .../tool/fp/iterator/TLCIteratorTest.java | 12 +++ 4 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 tlatools/test/tlc2/tool/fp/iterator/TLCIterator2Test.java diff --git a/tlatools/src/tlc2/tool/fp/MSBDiskFPSet.java b/tlatools/src/tlc2/tool/fp/MSBDiskFPSet.java index 3d724c0dd..6b8a63a4e 100644 --- a/tlatools/src/tlc2/tool/fp/MSBDiskFPSet.java +++ b/tlatools/src/tlc2/tool/fp/MSBDiskFPSet.java @@ -100,10 +100,18 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet { final long buffLen = tblCnt.get(); final TLCIterator itr = new TLCIterator(tbl); - // Precompute the maximum value of the new file - long maxVal = itr.getLast(); + // Precompute the maximum value of the new file. + // If this isn't the first merge, the index has + // the last element of the disk file. In that case + // the new maxVal is the larger element of the two + // in-memory and on-disk elements. + // The largest on-disk element is passed to the + // iterator as a lower bound. + long maxVal; if (index != null) { - maxVal = Math.max(maxVal, index[index.length - 1]); + maxVal = itr.getLast(index[index.length - 1]); + } else { + maxVal = itr.getLast(); } int indexLen = calculateIndexLen(buffLen); @@ -141,6 +149,7 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet { if (value == fp) { Assert.check(false, EC.TLC_FP_VALUE_ALREADY_ON_DISK, String.valueOf(value)); + } writeFP(outRAF, fp); // we used one fp up, thus move to next one @@ -276,11 +285,77 @@ public class MSBDiskFPSet extends HeapBasedDiskFPSet { return result; } + /** + * @param lowBound + * Stop searching for the last element if no element is + * larger than lowBound + * @return The last element in the iteration that is larger than lowBound + * @exception NoSuchElementException + * if iteration is empty. + * <p> + * Pre-condition: Each bucket is sorted in ascending + * order. on-disk fingerprints don't adhere to the + * ascending order though! + */ + public long getLast(final long lowBound) { + int len = buff.length - 1; + long[] bucket = buff[len]; + + // Walk from the end of buff to the beginning. Each bucket that is + // found and non-null (null if no fingerprint for such an index has + // been added to the DiskFPSet) is checked for a valid fingerprint. + // A fp is valid iff it is larger zero. A zero fingerprint slot + // indicates an unoccupied slot, while a negative one corresponds to + // a fp that has already been flushed to disk. + while (len > 0) { + bucket = buff[--len]; + + // Find last element > 0 in bucket (negative elements have already + // been flushed to disk, zero indicates an unoccupied slot). + if (bucket != null) { + for (int i = bucket.length - 1; i >= 0; i--) { + final long fp = bucket[i]; + // unused slot + if (fp == 0) { + continue; + } + // in-memory fingerprint + if (fp > 0) { + if (fp < lowBound) { + // smaller lowBound + return lowBound; + } else { + // larger or equal lowBound + return fp; + } + } + // Cannot take on-disk fingerprints (negative ones) into + // account. They don't adhere to the precondition that + // the bucket is sorted. The bucket is logically sorted + // only for in-memory fingerprints. + } + } + } + // At this point we have scanned the complete buff, but haven't + // found a single fingerprint that hasn't been flushed to disk + // already. + throw new NoSuchElementException(); + } + /** * @return The last element in the iteration. * @exception NoSuchElementException if iteration is empty. + * + * Pre-condition: Each bucket is sorted in ascending order! */ public long getLast() { + /* + * Tempting to delegate to getLast(Long.MAX_VALUE). However, + * getLast() could not throw a NoSuchElementException when the + * value returned is Long.MAX_VALUE. It could be a valid value, but + * it might as well be the cutOff. + */ + int len = buff.length - 1; long[] bucket = buff[len]; diff --git a/tlatools/test/tlc2/tool/fp/MSBDiskFPSetTest2.java b/tlatools/test/tlc2/tool/fp/MSBDiskFPSetTest2.java index 3a8b45495..ff080010a 100644 --- a/tlatools/test/tlc2/tool/fp/MSBDiskFPSetTest2.java +++ b/tlatools/test/tlc2/tool/fp/MSBDiskFPSetTest2.java @@ -77,6 +77,39 @@ public class MSBDiskFPSetTest2 extends AbstractHeapBasedDiskFPSetTest { } fail(); } + + public void testGetLastWithCutOff() throws IOException { + final MSBDiskFPSet msbDiskFPSet = getMSBDiskFPSet(); + + // Add the largest possible fingerprint into the fpset. It will end up + // in the largest bucket. Check that the MSB iterator returns it. + final long highFP = 1L << 62; + msbDiskFPSet.put(highFP); + TLCIterator tlcIterator = new MSBDiskFPSet.TLCIterator(msbDiskFPSet.tbl); + assertEquals(highFP, tlcIterator.getLast()); + + // Flush the set to disk (simulating e.g. a checkpoint), a new iterator + // won't find the element anymore because it intentionally only searches + // for elements that are *not* on disk. + msbDiskFPSet.flusher.flushTable(); + new MSBDiskFPSet.TLCIterator(msbDiskFPSet.tbl); + try { + tlcIterator.getLast(); + } catch (NoSuchElementException e) { + // This exception is expected. + + // Now add the smallest possible element into the set. It will end + // up in the smallest bucket. + final long lowFP = 1; + msbDiskFPSet.put(lowFP); + // check that the iterator finds lower bound as the last element. + tlcIterator = new MSBDiskFPSet.TLCIterator(msbDiskFPSet.tbl); + final long lowerBound = highFP - 1L; + assertEquals(lowerBound, tlcIterator.getLast(lowerBound)); + return; + } + fail(); + } private MSBDiskFPSet getMSBDiskFPSet() throws RemoteException, IOException { // Create an MSBDiskFPSet usable in this unit test with memory allocated diff --git a/tlatools/test/tlc2/tool/fp/iterator/TLCIterator2Test.java b/tlatools/test/tlc2/tool/fp/iterator/TLCIterator2Test.java new file mode 100644 index 000000000..1ac9f4ed0 --- /dev/null +++ b/tlatools/test/tlc2/tool/fp/iterator/TLCIterator2Test.java @@ -0,0 +1,32 @@ +// Copyright (c) 2012 Microsoft Corporation. All rights reserved. + +package tlc2.tool.fp.iterator; + +public class TLCIterator2Test extends TLCIteratorTest { + + /* (non-Javadoc) + * @see tlc2.tool.fp.iterator.TLCIteratorTest#getBuffer() + */ + @Override + protected long[][] getBuffer() { + final long[][] buff = new long[8][]; + buff[0] = getArray(8, 1, 8); + buff[1] = getArray(8, 9, 6); + buff[2] = null; + buff[3] = null; + + buff[4] = getArray(8, 15, 7); // Bucket with last element + buff[5] = null; + + // Simulate that this bucket is filled with fingerprints who have all + // been flushed to disk. + buff[6] = new long[4]; + buff[6][0] = 22L | 0x8000000000000000L; + buff[6][1] = 0L; + buff[6][2] = 0L; + buff[6][3] = 0L; + + buff[7] = null; + return buff; + } +} diff --git a/tlatools/test/tlc2/tool/fp/iterator/TLCIteratorTest.java b/tlatools/test/tlc2/tool/fp/iterator/TLCIteratorTest.java index b15ac8788..07f347f36 100644 --- a/tlatools/test/tlc2/tool/fp/iterator/TLCIteratorTest.java +++ b/tlatools/test/tlc2/tool/fp/iterator/TLCIteratorTest.java @@ -94,4 +94,16 @@ public class TLCIteratorTest extends TestCase { public void testGetLast() { assertEquals(getLast(), itr.getLast()); } + + public void testGetLastLowBound() { + assertEquals(getLast() + 1, itr.getLast(getLast() + 1)); + } + + /* + * Use a smaller lower bound element than last in the buffer. + */ + public void testGetLastLowerBoundNoHit() { + assertEquals(getLast(), itr.getLast(20)); + } + } -- GitLab