Skip to content

Commit f124c70

Browse files
committed
[Feature](func) Support GET_FORAMT function (apache#56691)
```text mysql> SELECT * FROM get_format_test; +------+----------+ | id | lc | +------+----------+ | 1 | USA | | 2 | JIS | | 3 | ISO | | 4 | EUR | | 5 | INTERNAL | | 6 | Doris | +------+----------+ mysql> SELECT lc, GET_FORMAT(DATE, lc) FROM get_format_test; +----------+----------------------+ | lc | GET_FORMAT(DATE, lc) | +----------+----------------------+ | USA | %m.%d.%Y | | JIS | %Y-%m-%d | | ISO | %Y-%m-%d | | EUR | %d.%m.%Y | | INTERNAL | %Y%m%d | | Doris | NULL | +----------+----------------------+ mysql> SELECT lc, GET_FORMAT(DATETIME, lc) FROM get_format_test; +----------+--------------------------+ | lc | GET_FORMAT(DATETIME, lc) | +----------+--------------------------+ | USA | %Y-%m-%d %H.%i.%s | | JIS | %Y-%m-%d %H:%i:%s | | ISO | %Y-%m-%d %H:%i:%s | | EUR | %Y-%m-%d %H.%i.%s | | INTERNAL | %Y%m%d%H%i%s | | Doris | NULL | +----------+--------------------------+ mysql> SELECT lc, GET_FORMAT(TIME, lc) FROM get_format_test; +----------+----------------------+ | lc | GET_FORMAT(TIME, lc) | +----------+----------------------+ | USA | %h:%i:%s %p | | JIS | %H:%i:%s | | ISO | %H:%i:%s | | EUR | %H.%i.%s | | INTERNAL | %H%i%s | | Doris | NULL | +----------+----------------------+ mysql> SELECT GET_FORMAT(DORIS, 'USA'); ERROR 1105 (HY000): errCode = 2, detailMessage = Format type only support DATE, DATETIME and TIME, but get: DORIS ```
1 parent 0143660 commit f124c70

File tree

11 files changed

+547
-21
lines changed

11 files changed

+547
-21
lines changed

be/src/vec/functions/function_date_or_datetime_computation.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ void register_function_date_time_computation(SimpleFunctionFactory& factory) {
9898
factory.register_function<FunctionSecToDateTime>();
9999
factory.register_function<FunctionMonthsBetween>();
100100
factory.register_function<FunctionTime>();
101+
factory.register_function<FunctionGetFormat>();
101102

102103
// alias
103104
factory.register_alias("days_add", "date_add");

be/src/vec/functions/function_date_or_datetime_computation.h

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,5 +1418,116 @@ class FunctionTime : public IFunction {
14181418
return Status::OK();
14191419
}
14201420
};
1421+
1422+
class FunctionGetFormat : public IFunction {
1423+
public:
1424+
static constexpr auto name = "get_format";
1425+
static FunctionPtr create() { return std::make_shared<FunctionGetFormat>(); }
1426+
String get_name() const override { return name; }
1427+
size_t get_number_of_arguments() const override { return 2; }
1428+
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1429+
return make_nullable(std::make_shared<DataTypeString>());
1430+
}
1431+
1432+
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1433+
uint32_t result, size_t input_rows_count) const override {
1434+
const auto& [left_col_ptr, left_is_const] =
1435+
unpack_if_const(block.get_by_position(arguments[0]).column);
1436+
const auto& [right_col_ptr, right_is_const] =
1437+
unpack_if_const(block.get_by_position(arguments[1]).column);
1438+
1439+
const auto* left_col = assert_cast<const ColumnString*>(left_col_ptr.get());
1440+
const auto* right_col = assert_cast<const ColumnString*>(right_col_ptr.get());
1441+
1442+
auto type_ref = left_col->get_data_at(0);
1443+
std::string type_str(type_ref.data, type_ref.size);
1444+
1445+
auto res_col = ColumnString::create();
1446+
auto res_null_map = ColumnUInt8::create(input_rows_count, 0);
1447+
auto& res_data = res_col->get_chars();
1448+
auto& res_offsets = res_col->get_offsets();
1449+
1450+
if (type_str == DATE_NAME) {
1451+
execute_format_type<DateFormatImpl>(res_data, res_offsets, res_null_map->get_data(),
1452+
input_rows_count, right_col);
1453+
} else if (type_str == DATETIME_NAME) {
1454+
execute_format_type<DateTimeFormatImpl>(res_data, res_offsets, res_null_map->get_data(),
1455+
input_rows_count, right_col);
1456+
} else if (type_str == TIME_NAME) {
1457+
execute_format_type<TimeFormatImpl>(res_data, res_offsets, res_null_map->get_data(),
1458+
input_rows_count, right_col);
1459+
} else {
1460+
return Status::InvalidArgument(
1461+
"Function GET_FORMAT only support DATE, DATETIME or TIME");
1462+
}
1463+
1464+
block.replace_by_position(
1465+
result, ColumnNullable::create(std::move(res_col), std::move(res_null_map)));
1466+
return Status::OK();
1467+
}
1468+
1469+
private:
1470+
template <typename Impl>
1471+
static void execute_format_type(ColumnString::Chars& res_data,
1472+
ColumnString::Offsets& res_offsets,
1473+
PaddedPODArray<UInt8>& res_null_map, size_t input_rows_count,
1474+
const ColumnString* right_col) {
1475+
res_data.reserve(input_rows_count * Impl::ESTIMATE_SIZE);
1476+
res_offsets.reserve(input_rows_count);
1477+
1478+
for (int i = 0; i < input_rows_count; ++i) {
1479+
StringRef format_ref = right_col->get_data_at(i);
1480+
std::string format_str(format_ref.data, format_ref.size);
1481+
std::transform(format_str.begin(), format_str.end(), format_str.begin(), ::toupper);
1482+
1483+
std::string_view format_res;
1484+
if (format_str == "USA") {
1485+
format_res = Impl::USA;
1486+
} else if (format_str == "JIS" || format_str == "ISO") {
1487+
format_res = Impl::JIS_ISO;
1488+
} else if (format_str == "EUR") {
1489+
format_res = Impl::EUR;
1490+
} else if (format_str == "INTERNAL") {
1491+
format_res = Impl::INTERNAL;
1492+
} else {
1493+
res_null_map[i] = 1;
1494+
res_offsets.push_back(res_data.size());
1495+
continue;
1496+
}
1497+
1498+
res_data.insert(format_res.data(), format_res.data() + format_res.size());
1499+
res_offsets.push_back(res_data.size());
1500+
}
1501+
}
1502+
1503+
struct DateFormatImpl {
1504+
static constexpr auto USA = "%m.%d.%Y";
1505+
static constexpr auto JIS_ISO = "%Y-%m-%d";
1506+
static constexpr auto EUR = "%d.%m.%Y";
1507+
static constexpr auto INTERNAL = "%Y%m%d";
1508+
static constexpr size_t ESTIMATE_SIZE = 8;
1509+
};
1510+
1511+
struct DateTimeFormatImpl {
1512+
static constexpr auto USA = "%Y-%m-%d %H.%i.%s";
1513+
static constexpr auto JIS_ISO = "%Y-%m-%d %H:%i:%s";
1514+
static constexpr auto EUR = "%Y-%m-%d %H.%i.%s";
1515+
static constexpr auto INTERNAL = "%Y%m%d%H%i%s";
1516+
static constexpr size_t ESTIMATE_SIZE = 17;
1517+
};
1518+
1519+
struct TimeFormatImpl {
1520+
static constexpr auto USA = "%h:%i:%s %p";
1521+
static constexpr auto JIS_ISO = "%H:%i:%s";
1522+
static constexpr auto EUR = "%H.%i.%s";
1523+
static constexpr auto INTERNAL = "%H%i%s";
1524+
static constexpr size_t ESTIMATE_SIZE = 11;
1525+
};
1526+
1527+
static constexpr auto DATE_NAME = "DATE";
1528+
static constexpr auto DATETIME_NAME = "DATETIME";
1529+
static constexpr auto TIME_NAME = "TIME";
1530+
};
1531+
14211532
#include "common/compile_check_avoid_end.h"
14221533
} // namespace doris::vectorized

fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,8 @@ primaryExpression
16051605
(ORDER BY sortItem (COMMA sortItem)*)?
16061606
(SEPARATOR sep=expression)? RIGHT_PAREN
16071607
(OVER windowSpec)? #groupConcat
1608+
| GET_FORMAT LEFT_PAREN
1609+
expression COMMA expression RIGHT_PAREN #getFormatFunction
16081610
| TRIM LEFT_PAREN
16091611
((BOTH | LEADING | TRAILING) expression? | expression) FROM expression RIGHT_PAREN #trim
16101612
| (SUBSTR | SUBSTRING | MID) LEFT_PAREN
@@ -1998,6 +2000,7 @@ nonReserved
19982000
| FRONTENDS
19992001
| FUNCTION
20002002
| GENERATED
2003+
| GET_FORMAT
20012004
| GENERIC
20022005
| GLOBAL
20032006
| GRAPH

fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@
220220
import org.apache.doris.nereids.trees.expressions.functions.scalar.FromUnixtime;
221221
import org.apache.doris.nereids.trees.expressions.functions.scalar.G;
222222
import org.apache.doris.nereids.trees.expressions.functions.scalar.Gcd;
223+
import org.apache.doris.nereids.trees.expressions.functions.scalar.GetFormat;
223224
import org.apache.doris.nereids.trees.expressions.functions.scalar.GetVariantType;
224225
import org.apache.doris.nereids.trees.expressions.functions.scalar.Greatest;
225226
import org.apache.doris.nereids.trees.expressions.functions.scalar.Grouping;
@@ -551,6 +552,16 @@ public class BuiltinScalarFunctions implements FunctionHelper {
551552
scalar(Abs.class, "abs"),
552553
scalar(Acos.class, "acos"),
553554
scalar(Acosh.class, "acosh"),
555+
scalar(AITranslate.class, "ai_translate"),
556+
scalar(AISentiment.class, "ai_sentiment"),
557+
scalar(AIFilter.class, "ai_filter"),
558+
scalar(AIFixGrammar.class, "ai_fixgrammar"),
559+
scalar(AIExtract.class, "ai_extract"),
560+
scalar(AIGenerate.class, "ai_generate"),
561+
scalar(AIClassify.class, "ai_classify"),
562+
scalar(AIMask.class, "ai_mask"),
563+
scalar(AISummarize.class, "ai_summarize"),
564+
scalar(AISimilarity.class, "ai_similarity"),
554565
scalar(AesDecrypt.class, "aes_decrypt"),
555566
scalar(AesEncrypt.class, "aes_encrypt"),
556567
scalar(AppendTrailingCharIfAbsent.class, "append_trailing_char_if_absent"),
@@ -651,6 +662,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
651662
scalar(Char.class, "char"),
652663
scalar(CharacterLength.class, "char_length", "character_length"),
653664
scalar(Coalesce.class, "coalesce"),
665+
scalar(Compress.class, "compress"),
654666
scalar(Concat.class, "concat"),
655667
scalar(ConcatWs.class, "concat_ws"),
656668
scalar(ConnectionId.class, "connection_id"),
@@ -705,6 +717,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
705717
scalar(E.class, "e"),
706718
scalar(ElementAt.class, "element_at"),
707719
scalar(Elt.class, "elt"),
720+
scalar(Embed.class, "embed"),
708721
scalar(EncodeAsSmallInt.class, "encode_as_smallint"),
709722
scalar(EncodeAsInt.class, "encode_as_int"),
710723
scalar(EncodeAsBigInt.class, "encode_as_bigint"),
@@ -713,6 +726,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
713726
scalar(EsQuery.class, "esquery"),
714727
scalar(Even.class, "even"),
715728
scalar(Exp.class, "exp"),
729+
scalar(ExportSet.class, "export_set"),
716730
scalar(ExtractUrlParameter.class, "extract_url_parameter"),
717731
scalar(Field.class, "field"),
718732
scalar(FindInSet.class, "find_in_set"),
@@ -730,6 +744,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
730744
scalar(FromUnixtime.class, "from_unixtime"),
731745
scalar(G.class, "g"),
732746
scalar(Gcd.class, "gcd"),
747+
scalar(GetFormat.class, "get_format"),
733748
scalar(GetVariantType.class, "variant_type"),
734749
scalar(Greatest.class, "greatest"),
735750
scalar(Grouping.class, "grouping"),
@@ -810,6 +825,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
810825
scalar(L2DistanceApproximate.class, "l2_distance_approximate"),
811826
scalar(L2Distance.class, "l2_distance"),
812827
scalar(LastDay.class, "last_day"),
828+
scalar(LastQueryId.class, "last_query_id"),
813829
scalar(Lcm.class, "lcm"),
814830
scalar(Least.class, "least"),
815831
scalar(Left.class, "left", "strleft"),
@@ -827,6 +843,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
827843
scalar(Ltrim.class, "ltrim"),
828844
scalar(LtrimIn.class, "ltrim_in"),
829845
scalar(MakeDate.class, "makedate"),
846+
scalar(MakeSet.class, "make_set"),
830847
scalar(MapContainsEntry.class, "map_contains_entry"),
831848
scalar(MapContainsKey.class, "map_contains_key"),
832849
scalar(MapContainsValue.class, "map_contains_value"),
@@ -861,6 +878,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
861878
scalar(MonthsBetween.class, "months_between"),
862879
scalar(MonthsDiff.class, "months_diff"),
863880
scalar(MonthsSub.class, "months_sub"),
881+
scalar(MultiMatch.class, "multi_match"),
864882
scalar(MultiMatchAny.class, "multi_match_any"),
865883
scalar(MultiSearchAllPositions.class, "multi_search_all_positions"),
866884
scalar(MurmurHash332.class, "murmur_hash3_32"),
@@ -897,6 +915,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
897915
scalar(QuartersDiff.class, "quarters_diff"),
898916
scalar(QuarterFloor.class, "quarter_floor"),
899917
scalar(QuartersSub.class, "quarters_sub"),
918+
scalar(Quote.class, "quote"),
900919
scalar(Search.class, "search"),
901920
scalar(Radians.class, "radians"),
902921
scalar(Random.class, "rand", "random"),
@@ -934,6 +953,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
934953
scalar(MilliSecondTimestamp.class, "millisecond_timestamp"),
935954
scalar(MicroSecondTimestamp.class, "microsecond_timestamp"),
936955
scalar(RandomBytes.class, "random_bytes"),
956+
scalar(SessionUser.class, "session_user"),
937957
scalar(Sha1.class, "sha1", "sha"),
938958
scalar(Sha2.class, "sha2"),
939959
scalar(Sign.class, "sign"),
@@ -1024,7 +1044,8 @@ public class BuiltinScalarFunctions implements FunctionHelper {
10241044
scalar(UnhexNull.class, "unhex_null"),
10251045
scalar(UnixTimestamp.class, "unix_timestamp"),
10261046
scalar(Upper.class, "ucase", "upper"),
1027-
scalar(Quote.class, "quote"),
1047+
scalar(Uncompress.class, "uncompress"),
1048+
scalar(Uniform.class, "uniform"),
10281049
scalar(UrlDecode.class, "url_decode"),
10291050
scalar(UrlEncode.class, "url_encode"),
10301051
scalar(User.class, "user"),
@@ -1053,26 +1074,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
10531074
scalar(YearWeek.class, "yearweek"),
10541075
scalar(YearsAdd.class, "years_add"),
10551076
scalar(YearsDiff.class, "years_diff"),
1056-
scalar(YearsSub.class, "years_sub"),
1057-
scalar(MultiMatch.class, "multi_match"),
1058-
scalar(SessionUser.class, "session_user"),
1059-
scalar(LastQueryId.class, "last_query_id"),
1060-
scalar(Compress.class, "compress"),
1061-
scalar(Uncompress.class, "uncompress"),
1062-
scalar(AITranslate.class, "ai_translate"),
1063-
scalar(AISentiment.class, "ai_sentiment"),
1064-
scalar(AIFilter.class, "ai_filter"),
1065-
scalar(AIFixGrammar.class, "ai_fixgrammar"),
1066-
scalar(AIExtract.class, "ai_extract"),
1067-
scalar(AIGenerate.class, "ai_generate"),
1068-
scalar(AIClassify.class, "ai_classify"),
1069-
scalar(AIMask.class, "ai_mask"),
1070-
scalar(AISummarize.class, "ai_summarize"),
1071-
scalar(AISimilarity.class, "ai_similarity"),
1072-
scalar(Embed.class, "embed"),
1073-
scalar(Uniform.class, "uniform"),
1074-
scalar(MakeSet.class, "make_set"),
1075-
scalar(ExportSet.class, "export_set"));
1077+
scalar(YearsSub.class, "years_sub"));
10761078

10771079
public static final BuiltinScalarFunctions INSTANCE = new BuiltinScalarFunctions();
10781080

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
import org.apache.doris.nereids.types.BigIntType;
9393
import org.apache.doris.nereids.types.BooleanType;
9494
import org.apache.doris.nereids.types.DataType;
95+
import org.apache.doris.nereids.types.StringType;
9596
import org.apache.doris.nereids.types.TinyIntType;
9697
import org.apache.doris.nereids.util.ExpressionUtils;
9798
import org.apache.doris.nereids.util.TypeCoercionUtils;
@@ -397,6 +398,19 @@ UnboundFunction preProcessUnboundFunction(UnboundFunction unboundFunction, Expre
397398
newChildrenBuilder.add(unboundFunction.child(i));
398399
}
399400
unboundFunction = unboundFunction.withChildren(newChildrenBuilder.build());
401+
} else if (StringUtils.isEmpty(unboundFunction.getDbName())
402+
&& "get_format".equalsIgnoreCase(unboundFunction.getName())
403+
&& unboundFunction.arity() == 2
404+
&& unboundFunction.child(0) instanceof UnboundSlot) {
405+
SlotReference slotReference = new SlotReference(new ExprId(-1),
406+
((UnboundSlot) unboundFunction.child(0)).getName(),
407+
StringType.INSTANCE, false, ImmutableList.of());
408+
ImmutableList.Builder<Expression> newChildrenBuilder = ImmutableList.builder();
409+
newChildrenBuilder.add(slotReference);
410+
for (int i = 1; i < unboundFunction.arity(); i++) {
411+
newChildrenBuilder.add(unboundFunction.child(i));
412+
}
413+
unboundFunction = unboundFunction.withChildren(newChildrenBuilder.build());
400414
}
401415
unboundFunction = (UnboundFunction) super.visit(unboundFunction, context);
402416
}
@@ -449,6 +463,14 @@ public Expression visitUnboundFunction(UnboundFunction unboundFunction, Expressi
449463
return ret;
450464
}
451465
}
466+
if (GetFormatFunctionBinder.isGetFormatFunction(unboundFunction.getName())) {
467+
Expression ret = GetFormatFunctionBinder.INSTANCE.bind(unboundFunction);
468+
if (ret instanceof BoundFunction) {
469+
return TypeCoercionUtils.processBoundFunction((BoundFunction) ret);
470+
} else {
471+
return ret;
472+
}
473+
}
452474
if (DatetimeFunctionBinder.isDatetimeFunction(unboundFunction.getName())) {
453475
Expression ret = DatetimeFunctionBinder.INSTANCE.bind(unboundFunction);
454476
if (ret instanceof BoundFunction) {
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.rules.analysis;
19+
20+
import org.apache.doris.nereids.analyzer.UnboundFunction;
21+
import org.apache.doris.nereids.exceptions.AnalysisException;
22+
import org.apache.doris.nereids.trees.expressions.Expression;
23+
import org.apache.doris.nereids.trees.expressions.SlotReference;
24+
import org.apache.doris.nereids.trees.expressions.functions.scalar.GetFormat;
25+
import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
26+
27+
import com.google.common.collect.ImmutableSet;
28+
29+
/**
30+
* Binder for GET_FORMAT function.
31+
*/
32+
public class GetFormatFunctionBinder {
33+
34+
public static final GetFormatFunctionBinder INSTANCE = new GetFormatFunctionBinder();
35+
36+
private static final ImmutableSet<String> SUPPORTED_FORMATS = ImmutableSet.of("DATE", "DATETIME", "TIME");
37+
38+
public static boolean isGetFormatFunction(String functionName) {
39+
return "GET_FORMAT".equalsIgnoreCase(functionName);
40+
}
41+
42+
/**
43+
* bind get_format function that have non-expression arguments.
44+
*
45+
* @param unboundFunction unbound get_format function
46+
*
47+
* @return bound function
48+
*/
49+
public Expression bind(UnboundFunction unboundFunction) {
50+
if (unboundFunction.arity() != 2) {
51+
throw new AnalysisException("Can not find function 'GET_FORMAT' with "
52+
+ unboundFunction.arity() + " arguments");
53+
}
54+
StringLiteral formatLiteral = parseFormatType(unboundFunction.child(0));
55+
Expression pattern = unboundFunction.child(1);
56+
return new GetFormat(formatLiteral, pattern);
57+
}
58+
59+
private StringLiteral parseFormatType(Expression formatTypeExpr) {
60+
if (formatTypeExpr instanceof StringLiteral) {
61+
String formatType = ((StringLiteral) formatTypeExpr).getStringValue().toUpperCase();
62+
validateFormatType(formatType);
63+
return new StringLiteral(formatType);
64+
}
65+
if (formatTypeExpr instanceof SlotReference) {
66+
String formatType = ((SlotReference) formatTypeExpr).getName().toUpperCase();
67+
validateFormatType(formatType);
68+
return new StringLiteral(formatType);
69+
}
70+
throw new AnalysisException("Illegal first argument for GET_FORMAT: " + formatTypeExpr.toSql());
71+
}
72+
73+
private void validateFormatType(String formatType) {
74+
if (!SUPPORTED_FORMATS.contains(formatType)) {
75+
throw new AnalysisException("Format type only support DATE, DATETIME and TIME, but get: " + formatType);
76+
}
77+
}
78+
}

0 commit comments

Comments
 (0)