diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index be63ae2156..7c52032785 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -460,12 +460,18 @@ private List> CloneNullBitMapBuffers() public PrimitiveColumnContainer Clone(PrimitiveColumnContainer mapIndices, Type type, bool invertMapIndices = false) where U : unmanaged { + PrimitiveColumnContainer ret = new PrimitiveColumnContainer(mapIndices.Length); + + if (Buffers.Count == 0) + { + return ret; + } + ReadOnlySpan thisSpan = Buffers[0].ReadOnlySpan; ReadOnlySpan thisNullBitMapSpan = NullBitMapBuffers[0].ReadOnlySpan; long minRange = 0; long maxRange = DataFrameBuffer.MaxCapacity; long maxCapacity = maxRange; - PrimitiveColumnContainer ret = new PrimitiveColumnContainer(mapIndices.Length); for (int b = 0; b < mapIndices.Buffers.Count; b++) { int index = b; diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameAssert.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameAssert.cs new file mode 100644 index 0000000000..14f51ae043 --- /dev/null +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameAssert.cs @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Xunit; + +namespace Microsoft.Data.Analysis.Tests +{ + public static class DataFrameAssert + { + public static void Equal(DataFrame expected, DataFrame actual) + { + Assert.Equal(expected.Columns.Count, actual.Columns.Count); + Assert.Equal(expected.Rows.Count, actual.Rows.Count); + + for (int c = 0; c < expected.Columns.Count; c++) + { + var expectedColumn = expected.Columns[c]; + var actualColumn = actual.Columns[c]; + + Assert.Equal(expectedColumn.Name, actualColumn.Name); + Assert.Equal(expectedColumn.GetType(), actualColumn.GetType()); + + for (int r = 0; r < expected.Rows.Count; r++) + { + var expectedValue = expectedColumn[r]; + var actualValue = actualColumn[r]; + + if (expectedValue == null || actualValue == null) + { + Assert.Null(expectedValue); + Assert.Null(actualValue); + } + else + { + Assert.Equal(expectedValue, actualValue); + } + } + } + } + } +} diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.Merge.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.Merge.cs index b507e846e8..dbd689ff3d 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.Merge.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.Merge.cs @@ -714,6 +714,256 @@ public void TestMerge_Issue5778() MatchRowsOnMergedDataFrame(merge, left, right, 1, 1, 0); } + public static IEnumerable GenerateData_TestMerge_EmptyDataFrames() + { + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3") + ), + new DataFrame( + new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("R1", new[] { 0, 1, 1 }), + new Int32DataFrameColumn("R2", new[] { 1, 1, 2 }), + new StringDataFrameColumn("R3", new[] { "Z", "Y", "B" }) + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.Left, + new DataFrame( + new Int32DataFrameColumn("Index_left"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3"), + new Int32DataFrameColumn("Index_right"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + }; + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3") + ), + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.Inner, + new DataFrame( + new Int32DataFrameColumn("Index_left"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3"), + new Int32DataFrameColumn("Index_right"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + }; + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3") + ), + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.Left, + new DataFrame( + new Int32DataFrameColumn("Index_left"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3"), + new Int32DataFrameColumn("Index_right"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + }; + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3") + ), + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.Right, + new DataFrame( + new Int32DataFrameColumn("Index_left"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3"), + new Int32DataFrameColumn("Index_right"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + }; + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3") + ), + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.FullOuter, + new DataFrame( + new Int32DataFrameColumn("Index_left"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3"), + new Int32DataFrameColumn("Index_right"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + }; + } + + [Theory] + [MemberData(nameof(GenerateData_TestMerge_EmptyDataFrames))] + public void TestMerge_EmptyDataFrames(DataFrame left, DataFrame right, string[] leftColumns, string[] rightColumns, JoinAlgorithm joinAlgorithm, DataFrame expectedOutput) + { + DataFrame actualOutput = left.Merge(right, leftColumns, rightColumns, joinAlgorithm: joinAlgorithm); + + DataFrameAssert.Equal(expectedOutput, actualOutput); + } + + public static IEnumerable GenerateData_TestMerge_OuterJoinsPreserveUnmatched() + { + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }), + new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }), + new StringDataFrameColumn("L3", new[] { "A", "B", "C" }) + ), + new DataFrame( + new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("R1", new[] { 10, 11, 11 }), + new Int32DataFrameColumn("R2", new[] { 1, 1, 2 }), + new StringDataFrameColumn("R3", new[] { "Z", "Y", "B" }) + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.Left, + new DataFrame( + new Int32DataFrameColumn("Index_left", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }), + new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }), + new StringDataFrameColumn("L3", new[] { "A", "B", "C" }), + new Int32DataFrameColumn("Index_right", new int?[] { null, null, null }), + new Int32DataFrameColumn("R1", new int?[] { null, null, null }), + new Int32DataFrameColumn("R2", new int?[] { null, null, null }), + new StringDataFrameColumn("R3", new string[] { null, null, null }) + ), + }; + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }), + new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }), + new StringDataFrameColumn("L3", new[] { "A", "B", "C" }) + ), + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("R1"), + new Int32DataFrameColumn("R2"), + new StringDataFrameColumn("R3") + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.Left, + new DataFrame( + new Int32DataFrameColumn("Index_left", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("L1", new[] { 1, 2, 3 }), + new Int32DataFrameColumn("L2", new[] { 1, 2, 1 }), + new StringDataFrameColumn("L3", new[] { "A", "B", "C" }), + new Int32DataFrameColumn("Index_right", new int?[] { null, null, null }), + new Int32DataFrameColumn("R1", new int?[] { null, null, null }), + new Int32DataFrameColumn("R2", new int?[] { null, null, null }), + new StringDataFrameColumn("R3", new string[] { null, null, null }) + ), + }; + yield return new object[] + { + new DataFrame( + new Int32DataFrameColumn("Index"), + new Int32DataFrameColumn("L1"), + new Int32DataFrameColumn("L2"), + new StringDataFrameColumn("L3") + ), + new DataFrame( + new Int32DataFrameColumn("Index", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("R1", new[] { 1, 2, 3 }), + new Int32DataFrameColumn("R2", new[] { 1, 2, 1 }), + new StringDataFrameColumn("R3", new[] { "A", "B", "C" }) + ), + new string[]{ "L1" }, + new string[]{ "R1" }, + JoinAlgorithm.Right, + new DataFrame( + new Int32DataFrameColumn("Index_left", new int?[] { null, null, null }), + new Int32DataFrameColumn("L1", new int?[] { null, null, null }), + new Int32DataFrameColumn("L2", new int?[] { null, null, null }), + new StringDataFrameColumn("L3", new string[] { null, null, null }), + new Int32DataFrameColumn("Index_right", new[] { 0, 1, 2 }), + new Int32DataFrameColumn("R1", new[] { 1, 2, 3 }), + new Int32DataFrameColumn("R2", new[] { 1, 2, 1 }), + new StringDataFrameColumn("R3", new[] { "A", "B", "C" }) + ), + }; + } + + [Theory] + [MemberData(nameof(GenerateData_TestMerge_OuterJoinsPreserveUnmatched))] + public void TestMerge_OuterJoinsPreserveUnmatched(DataFrame left, DataFrame right, string[] leftColumns, string[] rightColumns, JoinAlgorithm joinAlgorithm, DataFrame expectedOutput) + { + DataFrame actualOutput = left.Merge(right, leftColumns, rightColumns, joinAlgorithm: joinAlgorithm); + + DataFrameAssert.Equal(expectedOutput, actualOutput); + } + [Fact] //Issue 6127 public void TestMerge_CorrectColumnTypes()