From 159d016adbeeae6cb3ef4b160e7e119a48f86c44 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Thu, 29 Jan 2026 17:40:06 +0100 Subject: [PATCH 1/2] feat(isthmus): add strptime_* function mappings Signed-off-by: Niels Pardon --- .../isthmus/expression/FunctionMappings.java | 5 +- .../expression/ScalarFunctionConverter.java | 5 +- .../StrptimeDateFunctionMapper.java | 60 ++++++++++ .../StrptimeTimeFunctionMapper.java | 60 ++++++++++ .../StrptimeTimestampFunctionMapper.java | 62 +++++++++++ .../isthmus/FunctionConversionTest.java | 103 ++++++++++++++++-- 6 files changed, 284 insertions(+), 11 deletions(-) create mode 100644 isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeDateFunctionMapper.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimeFunctionMapper.java create mode 100644 isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimestampFunctionMapper.java diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java index 0c8805c0f..90f04a326 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/FunctionMappings.java @@ -97,7 +97,10 @@ public class FunctionMappings { s(SqlLibraryOperators.LEFT, "left"), s(SqlLibraryOperators.RIGHT, "right"), s(SqlLibraryOperators.LPAD, "lpad"), - s(SqlLibraryOperators.RPAD, "rpad")) + s(SqlLibraryOperators.RPAD, "rpad"), + s(SqlLibraryOperators.PARSE_TIME, "strptime_time"), + s(SqlLibraryOperators.PARSE_TIMESTAMP, "strptime_timestamp"), + s(SqlLibraryOperators.PARSE_DATE, "strptime_date")) .build(); public static final ImmutableList AGGREGATE_SIGS = diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java b/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java index 7ffaa018d..26d740f19 100644 --- a/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/ScalarFunctionConverter.java @@ -47,7 +47,10 @@ public ScalarFunctionConverter( new TrimFunctionMapper(functions), new SqrtFunctionMapper(functions), new ExtractDateFunctionMapper(functions), - new PositionFunctionMapper(functions)); + new PositionFunctionMapper(functions), + new StrptimeDateFunctionMapper(functions), + new StrptimeTimeFunctionMapper(functions), + new StrptimeTimestampFunctionMapper(functions)); } @Override diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeDateFunctionMapper.java b/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeDateFunctionMapper.java new file mode 100644 index 000000000..3aff3205c --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeDateFunctionMapper.java @@ -0,0 +1,60 @@ +package io.substrait.isthmus.expression; + +import io.substrait.expression.Expression.ScalarFunctionInvocation; +import io.substrait.expression.FunctionArg; +import io.substrait.extension.SimpleExtension.ScalarFunctionVariant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlLibraryOperators; + +/** + * Custom mapping for the Calcite {@code PARSE_DATE} function to the Substrait {@code strptime_date} + * function. + * + *

Calcite {@code PARSE_DATE} has format followed by date_string parameters, + * while Substrait {@code strptime_date} has date_string followed by format. When + * mapping between Calcite and Substrait, the parameters need to be reversed. + * + *

{@code PARSE_DATE(format, date_string)} maps to {@code strptime_date(date_string, format)}. + */ +public final class StrptimeDateFunctionMapper implements ScalarFunctionMapper { + private static final String STRPTIME_DATE_FUNCTION_NAME = "strptime_date"; + private final List strptimeDateFunctions; + + public StrptimeDateFunctionMapper(List functions) { + strptimeDateFunctions = + functions.stream() + .filter(f -> STRPTIME_DATE_FUNCTION_NAME.equals(f.name())) + .collect(Collectors.toUnmodifiableList()); + } + + @Override + public Optional toSubstrait(RexCall call) { + if (!SqlLibraryOperators.PARSE_DATE.equals(call.op)) { + return Optional.empty(); + } + + List operands = new ArrayList<>(call.getOperands()); + Collections.swap(operands, 0, 1); + + return Optional.of( + new SubstraitFunctionMapping(STRPTIME_DATE_FUNCTION_NAME, operands, strptimeDateFunctions)); + } + + @Override + public Optional> getExpressionArguments(ScalarFunctionInvocation expression) { + if (!STRPTIME_DATE_FUNCTION_NAME.equals(expression.declaration().name())) { + return Optional.empty(); + } + + List arguments = new ArrayList<>(expression.arguments()); + Collections.swap(arguments, 0, 1); + + return Optional.of(arguments); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimeFunctionMapper.java b/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimeFunctionMapper.java new file mode 100644 index 000000000..a1f360d1f --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimeFunctionMapper.java @@ -0,0 +1,60 @@ +package io.substrait.isthmus.expression; + +import io.substrait.expression.Expression.ScalarFunctionInvocation; +import io.substrait.expression.FunctionArg; +import io.substrait.extension.SimpleExtension.ScalarFunctionVariant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlLibraryOperators; + +/** + * Custom mapping for the Calcite {@code PARSE_TIME} function to the Substrait {@code strptime_time} + * function. + * + *

Calcite {@code PARSE_TIME} has format followed by time_string parameters, + * while Substrait {@code strptime_time} has time_string followed by format. When + * mapping between Calcite and Substrait, the parameters need to be reversed. + * + *

{@code PARSE_TIME(format, time_string)} maps to {@code strptime_time(time_string, format)}. + */ +public final class StrptimeTimeFunctionMapper implements ScalarFunctionMapper { + private static final String STRPTIME_TIME_FUNCTION_NAME = "strptime_time"; + private final List strptimeTimeFunctions; + + public StrptimeTimeFunctionMapper(List functions) { + strptimeTimeFunctions = + functions.stream() + .filter(f -> STRPTIME_TIME_FUNCTION_NAME.equals(f.name())) + .collect(Collectors.toUnmodifiableList()); + } + + @Override + public Optional toSubstrait(RexCall call) { + if (!SqlLibraryOperators.PARSE_TIME.equals(call.op)) { + return Optional.empty(); + } + + List operands = new ArrayList<>(call.getOperands()); + Collections.swap(operands, 0, 1); + + return Optional.of( + new SubstraitFunctionMapping(STRPTIME_TIME_FUNCTION_NAME, operands, strptimeTimeFunctions)); + } + + @Override + public Optional> getExpressionArguments(ScalarFunctionInvocation expression) { + if (!STRPTIME_TIME_FUNCTION_NAME.equals(expression.declaration().name())) { + return Optional.empty(); + } + + List arguments = new ArrayList<>(expression.arguments()); + Collections.swap(arguments, 0, 1); + + return Optional.of(arguments); + } +} diff --git a/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimestampFunctionMapper.java b/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimestampFunctionMapper.java new file mode 100644 index 000000000..f0c1ca5f9 --- /dev/null +++ b/isthmus/src/main/java/io/substrait/isthmus/expression/StrptimeTimestampFunctionMapper.java @@ -0,0 +1,62 @@ +package io.substrait.isthmus.expression; + +import io.substrait.expression.Expression.ScalarFunctionInvocation; +import io.substrait.expression.FunctionArg; +import io.substrait.extension.SimpleExtension.ScalarFunctionVariant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlLibraryOperators; + +/** + * Custom mapping for the Calcite {@code PARSE_TIMESTAMP} function to the Substrait {@code + * strptime_timestamp} function. + * + *

Calcite {@code PARSE_TIMESTAMP} has format followed by timestamp_string + * parameters, while Substrait {@code strptime_timestamp} has timestamp_string followed by + * format. When mapping between Calcite and Substrait, the parameters need to be reversed. + * + *

{@code PARSE_TIMESTAMP(format, timestamp_string)} maps to {@code + * strptime_timestamp(timestamp_string, format)}. + */ +public final class StrptimeTimestampFunctionMapper implements ScalarFunctionMapper { + private static final String STRPTIME_TIMESTAMP_FUNCTION_NAME = "strptime_timestamp"; + private final List strptimeTimestampFunctions; + + public StrptimeTimestampFunctionMapper(List functions) { + strptimeTimestampFunctions = + functions.stream() + .filter(f -> STRPTIME_TIMESTAMP_FUNCTION_NAME.equals(f.name())) + .collect(Collectors.toUnmodifiableList()); + } + + @Override + public Optional toSubstrait(RexCall call) { + if (!SqlLibraryOperators.PARSE_TIMESTAMP.equals(call.op)) { + return Optional.empty(); + } + + List operands = new ArrayList<>(call.getOperands()); + Collections.swap(operands, 0, 1); + + return Optional.of( + new SubstraitFunctionMapping( + STRPTIME_TIMESTAMP_FUNCTION_NAME, operands, strptimeTimestampFunctions)); + } + + @Override + public Optional> getExpressionArguments(ScalarFunctionInvocation expression) { + if (!STRPTIME_TIMESTAMP_FUNCTION_NAME.equals(expression.declaration().name())) { + return Optional.empty(); + } + + List arguments = new ArrayList<>(expression.arguments()); + Collections.swap(arguments, 0, 1); + + return Optional.of(arguments); + } +} diff --git a/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java b/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java index e08728400..241d526f3 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java @@ -16,6 +16,7 @@ import io.substrait.isthmus.expression.ScalarFunctionConverter; import io.substrait.isthmus.expression.WindowFunctionConverter; import io.substrait.type.TypeCreator; +import java.util.stream.Stream; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlKind; @@ -25,21 +26,25 @@ * Verify that "problematic" Substrait functions can be converted to Calcite and back successfully */ class FunctionConversionTest extends PlanTestBase { + final ScalarFunctionConverter scalarFnConverter = + new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory); + + final WindowFunctionConverter windowFnConverter = + new WindowFunctionConverter(extensions.windowFunctions(), typeFactory); final ExpressionRexConverter expressionRexConverter = new ExpressionRexConverter( - typeFactory, - new ScalarFunctionConverter(extensions.scalarFunctions(), typeFactory), - new WindowFunctionConverter(extensions.windowFunctions(), typeFactory), - TypeConverter.DEFAULT); + typeFactory, scalarFnConverter, windowFnConverter, TypeConverter.DEFAULT); final RexExpressionConverter rexExpressionConverter = new RexExpressionConverter( // a SubstraitRelVisitor is not needed for these tests null, - CallConverters.defaults(TypeConverter.DEFAULT), - // TODO: set WindowFunctionConverter if/when tests for window functions are added - null, + Stream.concat( + CallConverters.defaults(TypeConverter.DEFAULT).stream(), + Stream.of(scalarFnConverter)) + .toList(), + windowFnConverter, TypeConverter.DEFAULT); @Test @@ -61,8 +66,8 @@ void subtractDateIDay() { TypeConverter.DEFAULT.toCalcite(typeFactory, TypeCreator.REQUIRED.DATE), calciteExpr.getType()); - // TODO: remove once this can be converted back to Substrait - assertThrows(IllegalArgumentException.class, () -> calciteExpr.accept(rexExpressionConverter)); + Expression reverse = calciteExpr.accept(rexExpressionConverter); + assertEquals(expr, reverse); } @Test @@ -278,4 +283,84 @@ void concatCharAndVarchar() throws Exception { void concatStringLiteralAndChar() throws Exception { assertProtoPlanRoundrip("select 'brand_'||P_BRAND from PART"); } + + @Test + void testStrptimeTime() { + Expression.StrLiteral timeString = Expression.StrLiteral.builder().value("12:34:56").build(); + Expression.StrLiteral formatString = Expression.StrLiteral.builder().value("%H:%M:%S").build(); + ScalarFunctionInvocation strptimeTimeFn = + sb.scalarFn( + DefaultExtensionCatalog.FUNCTIONS_DATETIME, + "strptime_time:str_str", + TypeCreator.REQUIRED.TIME, + timeString, + formatString); + + // tests Substrait -> Calcite + RexNode calciteExpr = strptimeTimeFn.accept(expressionRexConverter, Context.newContext()); + assertEquals(SqlKind.OTHER_FUNCTION, calciteExpr.getKind()); + assertInstanceOf(RexCall.class, calciteExpr); + + RexCall extract = (RexCall) calciteExpr; + assertEquals("PARSE_TIME('%H:%M:%S':VARCHAR, '12:34:56':VARCHAR)", extract.toString()); + + // tests the reverse Calcite -> Substrait + Expression reverse = calciteExpr.accept(rexExpressionConverter); + assertEquals(strptimeTimeFn, reverse); + } + + @Test + void testStrptimeTimestamp() { + Expression.StrLiteral timestampString = + Expression.StrLiteral.builder().value("2026-01-29T12:34:56").build(); + Expression.StrLiteral formatString = + Expression.StrLiteral.builder().value("%Y:%m:%dT%H:%M:%S").build(); + ScalarFunctionInvocation strptimeTimestampFn = + sb.scalarFn( + DefaultExtensionCatalog.FUNCTIONS_DATETIME, + "strptime_timestamp:str_str", + // using precision 6 here to be compatible with Calcite + TypeCreator.REQUIRED.precisionTimestamp(6), + timestampString, + formatString); + + // tests Substrait -> Calcite + RexNode calciteExpr = strptimeTimestampFn.accept(expressionRexConverter, Context.newContext()); + assertEquals(SqlKind.OTHER_FUNCTION, calciteExpr.getKind()); + assertInstanceOf(RexCall.class, calciteExpr); + + RexCall extract = (RexCall) calciteExpr; + assertEquals( + "PARSE_TIMESTAMP('%Y:%m:%dT%H:%M:%S':VARCHAR, '2026-01-29T12:34:56':VARCHAR)", + extract.toString()); + + // tests the reverse Calcite -> Substrait + Expression reverse = calciteExpr.accept(rexExpressionConverter); + assertEquals(strptimeTimestampFn, reverse); + } + + @Test + void testStrptimeDate() { + Expression.StrLiteral dateString = Expression.StrLiteral.builder().value("2026-01-29").build(); + Expression.StrLiteral formatString = Expression.StrLiteral.builder().value("%Y:%m:%d").build(); + ScalarFunctionInvocation strptimeDateFn = + sb.scalarFn( + DefaultExtensionCatalog.FUNCTIONS_DATETIME, + "strptime_date:str_str", + TypeCreator.REQUIRED.DATE, + dateString, + formatString); + + // tests Substrait -> Calcite + RexNode calciteExpr = strptimeDateFn.accept(expressionRexConverter, Context.newContext()); + assertEquals(SqlKind.OTHER_FUNCTION, calciteExpr.getKind()); + assertInstanceOf(RexCall.class, calciteExpr); + + RexCall extract = (RexCall) calciteExpr; + assertEquals("PARSE_DATE('%Y:%m:%d':VARCHAR, '2026-01-29':VARCHAR)", extract.toString()); + + // tests the reverse Calcite -> Substrait + Expression reverse = calciteExpr.accept(rexExpressionConverter); + assertEquals(strptimeDateFn, reverse); + } } From 735c7f6f4a8db5fc0febf66dcf163954bfa79969 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Fri, 30 Jan 2026 15:47:51 +0100 Subject: [PATCH 2/2] feat: apply suggestions from code review Co-authored-by: Mark S. Lewis --- .../io/substrait/isthmus/FunctionConversionTest.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java b/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java index 241d526f3..fda3aca43 100644 --- a/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java +++ b/isthmus/src/test/java/io/substrait/isthmus/FunctionConversionTest.java @@ -301,8 +301,7 @@ void testStrptimeTime() { assertEquals(SqlKind.OTHER_FUNCTION, calciteExpr.getKind()); assertInstanceOf(RexCall.class, calciteExpr); - RexCall extract = (RexCall) calciteExpr; - assertEquals("PARSE_TIME('%H:%M:%S':VARCHAR, '12:34:56':VARCHAR)", extract.toString()); + assertEquals("PARSE_TIME('%H:%M:%S':VARCHAR, '12:34:56':VARCHAR)", calciteExpr.toString()); // tests the reverse Calcite -> Substrait Expression reverse = calciteExpr.accept(rexExpressionConverter); @@ -329,10 +328,9 @@ void testStrptimeTimestamp() { assertEquals(SqlKind.OTHER_FUNCTION, calciteExpr.getKind()); assertInstanceOf(RexCall.class, calciteExpr); - RexCall extract = (RexCall) calciteExpr; assertEquals( "PARSE_TIMESTAMP('%Y:%m:%dT%H:%M:%S':VARCHAR, '2026-01-29T12:34:56':VARCHAR)", - extract.toString()); + calciteExpr.toString()); // tests the reverse Calcite -> Substrait Expression reverse = calciteExpr.accept(rexExpressionConverter); @@ -356,8 +354,7 @@ void testStrptimeDate() { assertEquals(SqlKind.OTHER_FUNCTION, calciteExpr.getKind()); assertInstanceOf(RexCall.class, calciteExpr); - RexCall extract = (RexCall) calciteExpr; - assertEquals("PARSE_DATE('%Y:%m:%d':VARCHAR, '2026-01-29':VARCHAR)", extract.toString()); + assertEquals("PARSE_DATE('%Y:%m:%d':VARCHAR, '2026-01-29':VARCHAR)", calciteExpr.toString()); // tests the reverse Calcite -> Substrait Expression reverse = calciteExpr.accept(rexExpressionConverter);