diff --git a/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java b/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java index 717874a93..781fbebca 100644 --- a/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/sampling/ReservoirCrossLanguageTest.java @@ -22,10 +22,12 @@ import org.apache.datasketches.common.ArrayOfDoublesSerDe; import org.apache.datasketches.common.ArrayOfLongsSerDe; import org.apache.datasketches.common.ArrayOfStringsSerDe; +import org.apache.datasketches.common.ResizeFactor; import org.testng.annotations.Test; import java.io.IOException; import java.nio.file.Files; +import java.util.ArrayList; import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; import static org.apache.datasketches.common.TestUtil.javaPath; @@ -74,7 +76,7 @@ public void generateReservoirLongsSketchSampling() throws IOException { final ReservoirLongsSketch sk = ReservoirLongsSketch.getInstance( predeterminedSamples, n, - org.apache.datasketches.common.ResizeFactor.X8, + ResizeFactor.X8, k ); @@ -83,6 +85,56 @@ public void generateReservoirLongsSketchSampling() throws IOException { } } + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirLongsUnionEmpty() throws IOException { + int maxK = 128; + ReservoirLongsUnion union = ReservoirLongsUnion.newInstance(maxK); + + Files.newOutputStream(javaPath.resolve("reservoir_longs_union_empty_maxk" + maxK + "_java.sk")) + .write(union.toByteArray()); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirLongsUnionExact() throws IOException { + int maxK = 128; + int[] nArr = {1, 10, 32, 100, 128}; + + for (int n : nArr) { + ReservoirLongsUnion union = ReservoirLongsUnion.newInstance(maxK); + for (int i = 0; i < n; i++) { + union.update(i); + } + Files.newOutputStream(javaPath.resolve("reservoir_longs_union_exact_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray()); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirLongsUnionSampling() throws IOException { + int[] maxKArr = {32, 64, 128}; + long n = 1000; + + for (int maxK : maxKArr) { + long[] predeterminedSamples = new long[maxK]; + for (int i = 0; i < maxK; i++) { + predeterminedSamples[i] = i * 2; + } + + ReservoirLongsSketch sk = ReservoirLongsSketch.getInstance( + predeterminedSamples, + n, + ResizeFactor.X8, + maxK + ); + + ReservoirLongsUnion union = ReservoirLongsUnion.newInstance(maxK); + union.update(sk); + + Files.newOutputStream(javaPath.resolve("reservoir_longs_union_sampling_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray()); + } + } + @Test(groups = {GENERATE_JAVA_FILES}) public void generateReservoirItemsSketchLongEmpty() throws IOException { final int k = 128; @@ -121,7 +173,7 @@ public void generateReservoirItemsSketchLongSampling() throws IOException { final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( predeterminedSamples, n, - org.apache.datasketches.common.ResizeFactor.X8, + ResizeFactor.X8, k ); @@ -168,7 +220,7 @@ public void generateReservoirItemsSketchDoubleSampling() throws IOException { final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( predeterminedSamples, n, - org.apache.datasketches.common.ResizeFactor.X8, + ResizeFactor.X8, k ); @@ -215,7 +267,7 @@ public void generateReservoirItemsSketchStringSampling() throws IOException { final ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( predeterminedSamples, n, - org.apache.datasketches.common.ResizeFactor.X8, + ResizeFactor.X8, k ); @@ -223,4 +275,154 @@ public void generateReservoirItemsSketchStringSampling() throws IOException { .write(sk.toByteArray(new ArrayOfStringsSerDe())); } } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionLongEmpty() throws IOException { + int maxK = 128; + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + + Files.newOutputStream(javaPath.resolve("reservoir_items_union_long_empty_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfLongsSerDe())); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionLongExact() throws IOException { + int maxK = 128; + int[] nArr = {1, 10, 32, 100, 128}; + + for (int n : nArr) { + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + for (int i = 0; i < n; i++) { + union.update((long) i); + } + Files.newOutputStream(javaPath.resolve("reservoir_items_union_long_exact_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfLongsSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionLongSampling() throws IOException { + int[] maxKArr = {32, 64, 128}; + long n = 1000; + + for (int maxK : maxKArr) { + ArrayList predeterminedSamples = new ArrayList<>(); + for (int i = 0; i < maxK; i++) { + predeterminedSamples.add((long) (i * 2)); + } + + ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( + predeterminedSamples, + n, + ResizeFactor.X8, + maxK + ); + + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + union.update(sk); + + Files.newOutputStream(javaPath.resolve("reservoir_items_union_long_sampling_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfLongsSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionDoubleEmpty() throws IOException { + int maxK = 128; + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + + Files.newOutputStream(javaPath.resolve("reservoir_items_union_double_empty_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfDoublesSerDe())); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionDoubleExact() throws IOException { + int maxK = 128; + int[] nArr = {1, 10, 32, 100, 128}; + + for (int n : nArr) { + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + for (int i = 0; i < n; i++) { + union.update((double) i); + } + Files.newOutputStream(javaPath.resolve("reservoir_items_union_double_exact_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfDoublesSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionDoubleSampling() throws IOException { + int[] maxKArr = {32, 64, 128}; + long n = 1000; + + for (int maxK : maxKArr) { + ArrayList predeterminedSamples = new ArrayList<>(); + for (int i = 0; i < maxK; i++) { + predeterminedSamples.add((double) (i * 2)); + } + + ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( + predeterminedSamples, + n, + ResizeFactor.X8, + maxK + ); + + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + union.update(sk); + + Files.newOutputStream(javaPath.resolve("reservoir_items_union_double_sampling_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfDoublesSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionStringEmpty() throws IOException { + int maxK = 128; + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + + Files.newOutputStream(javaPath.resolve("reservoir_items_union_string_empty_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfStringsSerDe())); + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionStringExact() throws IOException { + int maxK = 128; + int[] nArr = {1, 10, 32, 100, 128}; + + for (int n : nArr) { + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + for (int i = 0; i < n; i++) { + union.update("item" + i); + } + Files.newOutputStream(javaPath.resolve("reservoir_items_union_string_exact_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfStringsSerDe())); + } + } + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateReservoirItemsUnionStringSampling() throws IOException { + int[] maxKArr = {32, 64, 128}; + long n = 1000; + + for (int maxK : maxKArr) { + ArrayList predeterminedSamples = new ArrayList<>(); + for (int i = 0; i < maxK; i++) { + predeterminedSamples.add("item" + (i * 2)); + } + + ReservoirItemsSketch sk = ReservoirItemsSketch.newInstance( + predeterminedSamples, + n, + ResizeFactor.X8, + maxK + ); + + ReservoirItemsUnion union = ReservoirItemsUnion.newInstance(maxK); + union.update(sk); + + Files.newOutputStream(javaPath.resolve("reservoir_items_union_string_sampling_n" + n + "_maxk" + maxK + "_java.sk")) + .write(union.toByteArray(new ArrayOfStringsSerDe())); + } + } }