2022-09-15 mysql列存储引擎-语法树转换

摘要:

列存储引擎有一套自己的执行处理规则, 在进行处理前,是将mysql经过词法分析和语法分析后的语法树,经过了一些符合自己逻辑的处理。

本文记录mysql的语法树在列存储引擎中的转换过程。

逻辑建模:

时序图:

调用堆栈:

(gdb) bt
#0 Tianmu::core::Query::Preexecute (this=0x7ff0ada4b800, qu=..., sender=0x7feed49fa4d0, display_now=true)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/core/query.cpp:599
#1 0x0000000002c71256 in Tianmu::core::Engine::Execute (this=0x5787fd0, thd=0x7feed4014040, lex=0x7feed4016368, result_output=0x7feed49f2ac0, unit_for_union=0x0)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/core/engine_execute.cpp:421
#2 0x0000000002c704e4 in Tianmu::core::Engine::HandleSelect (this=0x5787fd0, thd=0x7feed4014040, lex=0x7feed4016368, result=@0x7ff0ada4bd18: 0x7feed49f2ac0, setup_tables_done_option=0,
res=@0x7ff0ada4bd14: 0, optimize_after_tianmu=@0x7ff0ada4bd0c: 1, tianmu_free_join=@0x7ff0ada4bd10: 1, with_insert=0)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/core/engine_execute.cpp:232
#3 0x0000000002d58f41 in Tianmu::dbhandler::TIANMU_HandleSelect (thd=0x7feed4014040, lex=0x7feed4016368, result=@0x7ff0ada4bd18: 0x7feed49f2ac0, setup_tables_done_option=0,
res=@0x7ff0ada4bd14: 0, optimize_after_tianmu=@0x7ff0ada4bd0c: 1, tianmu_free_join=@0x7ff0ada4bd10: 1, with_insert=0)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/handler/ha_rcengine.cpp:82
#4 0x000000000239b274 in execute_sqlcom_select (thd=0x7feed4014040, all_tables=0x7feed49f21b0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:5182
#5 0x00000000023945f8 in mysql_execute_command (thd=0x7feed4014040, first_level=true) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:2831
#6 0x000000000239c23d in mysql_parse (thd=0x7feed4014040, parser_state=0x7ff0ada4ceb0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:5621
#7 0x00000000023914d5 in dispatch_command (thd=0x7feed4014040, com_data=0x7ff0ada4d650, command=COM_QUERY) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:1495
#8 0x0000000002390401 in do_command (thd=0x7feed4014040) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:1034
#9 0x00000000024c3001 in handle_connection (arg=0x8555370) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/conn_handler/connection_handler_per_thread.cc:313
#10 0x0000000002ba6c5c in pfs_spawn_thread (arg=0x85acd30) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/perfschema/pfs.cc:2197
#11 0x00007ff0fd781ea5 in start_thread () from /lib64/libpthread.so.0
#12 0x00007ff0faca5b0d in clone () from /lib64/libc.so.6

转换结果:

mysql> desc t1;
+-------+-------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+-------+-------------+------+-----+---------+-------+
| c1 | varchar(20) | YES | | NULL | |
| c2 | varchar(56) | YES | | NULL | |
+-------+-------------+------+-----+---------+-------+
2 rows in set (0.00 sec)
select * from t1 where c1='world' and  c2='world';
T:-1 = TABLE_ALIAS(T:0,"t1")
T:-2 = TMP_TABLE(T:4294967295)

VC:-2.0 = CREATE_VC(T:-2,PHYS_COL(T:-1,A:0))
A:-1 = T:-2.ADD_COLUMN(VC:-2.0,LIST,"c1","ALL")

VC:-2.1 = CREATE_VC(T:-2,PHYS_COL(T:-1,A:1))
A:-2 = T:-2.ADD_COLUMN(VC:-2.1,LIST,"c2","ALL")

VC:-2.2 = CREATE_VC(T:-2,EXPR("world"))

C:0 = CREATE_CONDS(T:-2,VC:-2.0,=,VC:-2.2,<null>)
C:0.AND(VC:-2.1,=,VC:-2.2,<null>)

T:-2.ADD_CONDS(C:0,WHERE)

T:-2.APPLY_CONDS()

RESULT(T:-2)

核心函数:

Engine::optimize_select

/*
Prepares and optimizes a single select for Tianmu engine
*/
int optimize_select(THD *thd, ulong select_options, Query_result *result,
SELECT_LEX *select_lex, int &optimize_after_tianmu, int &free_join)
{
// copied from sql_select.cpp from the beginning of mysql_select(...)
int err = 0;
free_join = 1;
select_lex->context.resolve_in_select_list = TRUE;
JOIN *join;
if (select_lex->join != 0) {
join = select_lex->join;
// here is EXPLAIN of subselect or derived table
if (select_lex->linkage != DERIVED_TABLE_TYPE || (select_options & (1ULL << 2))) {

if (select_lex->linkage != GLOBAL_OPTIONS_TYPE) {

if (result->prepare(select_lex->join->fields_list, select_lex->master_unit()) || result->prepare2())
{
return TRUE;

}
} else {
if ((err = select_lex->prepare(thd)))
{
return err;
}
}
}
free_join = 0;
join->select_options = select_options;
}
else
{
thd_proc_info(thd, "init");

if ((err = select_lex->prepare(thd)))
{
return err;
}
if (result->prepare(select_lex->fields_list, select_lex->master_unit()) || result->prepare2()) {
return TRUE;
}
if (!(join = new JOIN(thd, select_lex)))
return TRUE; /* purecov: inspected */
select_lex->set_join(join);

}
join->best_rowcount = 2;
optimize_after_tianmu = TRUE;
if ((err = join->optimize(1)))
return err;
return FALSE;
}

Query::Compile

int Query::Compile(CompiledQuery *compiled_query, SELECT_LEX *selects_list, SELECT_LEX *last_distinct, TabID *res_tab,
bool ignore_limit, Item *left_expr_for_subselect, common::Operator *oper_for_subselect,
bool ignore_minmax, bool for_subq_in_where) {
MEASURE_FET("Query::Compile(...)");
// at this point all tables are in RCBase engine, so we can proceed with the
// query

/*Item_func
|
--Item_int_func <- arguments are kept in an array accessible through
arguments()
|
--Item_bool_func
| |
| ---Item_cond <- arguments are kept in a list accessible through
argument_list() | | | | | ---Item_cond_and <- when negated OR
of negated items is created | | | | | ---Item_cond_or <- when
negated AND of negated items is created | | | | | ---Item_cond_xor |
| | ---Item_equal <- arguments are kept in a list accessible through
argument_list() | | + const_item (accessible through
get_const() ) | | (multiple equality) | | | ---Item_func_not
| | (???)
| |
| ---Item func_isnull <- when negated IS NOT NULL is created
|
--Item_func_opt_neg <- arguments are kept in an array accessible through
arguments(), if negated | | this information is kept
additionally (in a field named 'negated') | | | | | ---Item_func_in | |
| |
| ---Item_func_between
|
|
--Item_bool_func2
|
|
---Item_bool_rowready_func2 <-arguments are kept in an array accessible
through arguments(), if negated | an object of a
corresponding class is created | (e.q.
~Item_func_lt => Item_func_ge)
|
----Item_func_eq
|
|
----Item_func_ne
|
|
----Item_func_ge
|
|
----Item_func_le
|
|
----Item_func_gt
|
|
----Item_func_lt
|
|
----Item_func_equal <- This is mystery so far

There are 3 equality functions:
Item_equal -> multiple equality (many fields and optional additional constant
value) Item_func_equal -> ??? Item_func_eq -> pairwise equality
*/

bool union_all = (last_distinct == NULL);
TabID prev_result;

SQL_I_List<ORDER> *global_order = NULL;
int col_count = 0;
int64_t global_limit_value = -1;
int64_t global_offset_value = -1;

// local copy of current cq, to be restored on exit
CompiledQuery *saved_cq = cq;
cq = compiled_query;

if ((selects_list->join)&&(selects_list != selects_list->join->unit->global_parameters())) { // only in case of unions this is set
SetLimit(selects_list->join->unit->global_parameters(), 0, global_offset_value, (int64_t &)global_limit_value);
global_order = &(selects_list->join->unit->global_parameters()->order_list);
}

for (SELECT_LEX *sl = selects_list; sl; sl = sl->next_select()) {
int64_t limit_value = -1;
int64_t offset_value = -1;


if (!sl->join)

{
sl->add_active_options(SELECT_NO_UNLOCK);
JOIN *join = new JOIN(sl->master_unit()->thd, sl);

if (!join) {

sl->cleanup(0);
return TRUE;
}
sl->set_join(join);
}

if (!JudgeErrors(sl))
return RETURN_QUERY_TO_MYSQL_ROUTE;
SetLimit(sl, sl == selects_list ? 0 : sl->join->unit->global_parameters(), offset_value, limit_value);

List<Item> *fields = &sl->fields_list;
Item * conds = sl->where_cond();
ORDER * order = sl->order_list.first;

// if (order) global_order = 0; //we want to zero global order (which
// seems to be always present) if we find a local order by clause
// The above is not necessary since global_order is set only in case of
// real UNIONs

ORDER * group = sl->group_list.first;
Item * having = sl->having_cond();
List<TABLE_LIST> *join_list = sl->join_list;
bool zero_result = sl->join->zero_result_cause != NULL;

Item *field_for_subselect;
Item *cond_to_reinsert = NULL;
List<Item> *list_to_reinsert = NULL;

TabID tmp_table;
try {
// partial optimization of LOJ conditions, JOIN::optimize(part=3)
// necessary due to already done basic transformation of conditions
// see comments in sql_select.cc:JOIN::optimize()
if (IsLOJ(join_list)) sl->join->optimize(3);

if (left_expr_for_subselect)
if (!ClearSubselectTransformation(*oper_for_subselect, field_for_subselect, conds, having, cond_to_reinsert,
list_to_reinsert, left_expr_for_subselect))
throw CompilationError();

if (having && !group) // we cannot handle the case of a having without a group by
throw CompilationError();

TABLE_LIST *tables = sl->leaf_tables ? sl->leaf_tables : (TABLE_LIST *)sl->table_list.first;
for (TABLE_LIST *table_ptr = tables; table_ptr; table_ptr = table_ptr->next_leaf) {
if (!table_ptr->is_view_or_derived()) {
if (!Engine::IsTIANMUTable(table_ptr->table)) throw CompilationError();
std::string path = TablePath(table_ptr);
if (path2num.find(path) == path2num.end()) {
path2num[path] = NumOfTabs();
AddTable(m_conn->GetTableByPath(path));
TIANMU_LOG(LogCtl_Level::DEBUG, "add query table: %s", path.c_str());
}
}
}
std::vector<TabID> left_tables, right_tables;
bool first_table = true;
if (!AddJoins(*join_list, tmp_table, left_tables, right_tables, (res_tab != NULL && res_tab->n != 0), first_table,
for_subq_in_where))
throw CompilationError();

List<Item> field_list_for_subselect;
if (left_expr_for_subselect && field_for_subselect) {
field_list_for_subselect.push_back(field_for_subselect);
fields = &field_list_for_subselect;
}
bool aggr_used = false;
if (!AddFields(*fields, tmp_table, group != NULL, col_count, ignore_minmax, aggr_used)) throw CompilationError();

if (!AddGroupByFields(group, tmp_table)) throw CompilationError();

if (!AddOrderByFields(order, tmp_table, group != NULL || sl->join->select_distinct || aggr_used))
throw CompilationError();
CondID cond_id;
if (!BuildConditions(conds, cond_id, cq, tmp_table, CondType::WHERE_COND, zero_result)) throw CompilationError();

cq->AddConds(tmp_table, cond_id, CondType::WHERE_COND);

cond_id = CondID();
if (!BuildConditions(having, cond_id, cq, tmp_table, CondType::HAVING_COND)) throw CompilationError();
cq->AddConds(tmp_table, cond_id, CondType::HAVING_COND);

cq->ApplyConds(tmp_table);
} catch (...) {
// restore original values of class fields (necessary if this method is
// called recursively)
cq = saved_cq;
if (cond_to_reinsert && list_to_reinsert) list_to_reinsert->push_back(cond_to_reinsert);
sl->cleanup(0);
return RETURN_QUERY_TO_MYSQL_ROUTE;
}

if (sl->join->select_distinct) cq->Mode(tmp_table, TMParameter::TM_DISTINCT);
if (!ignore_limit && limit_value >= 0) cq->Mode(tmp_table, TMParameter::TM_TOP, offset_value, limit_value);

if (sl == selects_list) {
prev_result = tmp_table;
if (global_order && !selects_list->next_select()) { // trivial union with one select and
// ext. order by
tmp_table = TabID();
cq->Union(prev_result, prev_result, tmp_table, true);
}
} else
cq->Union(prev_result, prev_result, tmp_table, union_all);
if (sl == last_distinct) union_all = true;
if (cond_to_reinsert && list_to_reinsert) list_to_reinsert->push_back(cond_to_reinsert);
sl->cleanup(0);
}

cq->BuildTableIDStepsMap();

if (!AddGlobalOrderByFields(global_order, prev_result, col_count)) return RETURN_QUERY_TO_MYSQL_ROUTE;

if (!ignore_limit && global_limit_value >= 0)
cq->Mode(prev_result, TMParameter::TM_TOP, global_offset_value, global_limit_value);

if (res_tab != NULL)
*res_tab = prev_result;
else
cq->Result(prev_result);
cq = saved_cq;
return RCBASE_QUERY_ROUTE;
}

Query::Item2CQTerm

int Query::Item2CQTerm(Item *an_arg, CQTerm &term, const TabID &tmp_table, CondType filter_type, bool negative,
Item *left_expr_for_subselect, common::Operator *oper_for_subselect) {
an_arg = UnRef(an_arg);
if (an_arg->type() == Item::SUBSELECT_ITEM) {
Item_subselect *item_subs = dynamic_cast<Item_subselect *>(an_arg);
DEBUG_ASSERT(item_subs && "The cast to (Item_subselect*) was unsuccessful");

bool ignore_limit = false;
if (dynamic_cast<Item_maxmin_subselect *>(item_subs) != NULL ||
dynamic_cast<Item_in_subselect *>(item_subs) != NULL)
ignore_limit = true;
st_select_lex_unit *select_unit = item_subs->unit;

// needs to check if we can relay on subquery transformation to min/max
bool ignore_minmax = (dynamic_cast<Item_maxmin_subselect *>(item_subs) == NULL &&
dynamic_cast<Item_in_subselect *>(item_subs) == NULL && negative &&
item_subs->substype() == Item_subselect::SINGLEROW_SUBS);
subqueries_in_where.emplace_back(tmp_table,
item_subs->place() != CTX_HAVING && filter_type != CondType::HAVING_COND);

// we need to make a copy of global map with table aliases so that subquery
// contains aliases of outer queries and itself but not "parallel"
// subqueries. Once subquery is compiled we can get rid of its aliases since
// they are not needed any longer and stay with aliases of outer query only
auto outer_map_copy = table_alias2index_ptr;
TabID subselect;
int res = Compile(cq, select_unit->first_select(), select_unit->union_distinct, &subselect, ignore_limit,
left_expr_for_subselect, oper_for_subselect, ignore_minmax, true);
// restore outer query aliases
table_alias2index_ptr = outer_map_copy;

subqueries_in_where.pop_back();
if (res == RCBASE_QUERY_ROUTE) {
AttrID vc;
vc.n = VirtualColumnAlreadyExists(tmp_table, subselect);
if (vc.n == common::NULL_VALUE_32) {
cq->CreateVirtualColumn(vc, tmp_table, subselect, filter_type == CondType::HAVING_COND ? true : false);
tab_id2subselect.insert(std::make_pair(tmp_table, std::make_pair(vc.n, subselect)));
}
if (oper_for_subselect) {
if (dynamic_cast<Item_maxmin_subselect *>(item_subs) != NULL ||
dynamic_cast<Item_in_subselect *>(item_subs) != NULL) {
if (negative) {
MarkWithAll(*oper_for_subselect);
if (dynamic_cast<Item_in_subselect *>(item_subs) != NULL && *oper_for_subselect == common::Operator::O_IN)
*oper_for_subselect = common::Operator::O_EQ_ALL;
} else {
MarkWithAny(*oper_for_subselect);
if (dynamic_cast<Item_allany_subselect *>(item_subs) != NULL &&
dynamic_cast<Item_allany_subselect *>(item_subs)->all == 1)
*oper_for_subselect = common::Operator::O_EQ_ALL;
}
} else {
if (negative) {
// if(item_subs->substype() != Item_subselect::SINGLEROW_SUBS)
// return RETURN_QUERY_TO_MYSQL_ROUTE;
MarkWithAll(*oper_for_subselect);
} else
UnmarkAllAny(*oper_for_subselect);
}
}
term = CQTerm(vc.n);
}
return res;
}

if (filter_type == CondType::HAVING_COND) {
common::ColOperation oper;
bool distinct;
if (!OperationUnmysterify(an_arg, oper, distinct,
true)) // is_having_clause may be true only in
// case group by clause was present
return RETURN_QUERY_TO_MYSQL_ROUTE;

AttrID col, vc;
TabID tab;
if ((IsFieldItem(an_arg) || IsAggregationOverFieldItem(an_arg)) && !FieldUnmysterify(an_arg, tab, col))
return RETURN_QUERY_TO_MYSQL_ROUTE;
if (IsAggregationItem(an_arg) && HasAggregation(((Item_sum *)an_arg)->get_arg(0)))
return RETURN_QUERY_TO_MYSQL_ROUTE;
if ((IsFieldItem(an_arg) || IsAggregationOverFieldItem(an_arg)) && cq->ExistsInTempTable(tab, tmp_table)) {
int col_num = AddColumnForPhysColumn(an_arg, tmp_table, oper, distinct, true);
auto phys_vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, AttrID(-col_num - 1));
if (phys_vc.first == common::NULL_VALUE_32) {
phys_vc.first = tmp_table.n;
cq->CreateVirtualColumn(phys_vc.second, tmp_table, tmp_table, AttrID(col_num));
phys2virt.insert(std::make_pair(std::pair<int, int>(tmp_table.n, -col_num - 1), phys_vc));
}
vc.n = phys_vc.second;
} else if (IsCountStar(an_arg)) {
AttrID at;
at.n = GetAddColumnId(AttrID(common::NULL_VALUE_32), tmp_table, common::ColOperation::COUNT, false);
if (at.n == common::NULL_VALUE_32) // doesn't exist yet
cq->AddColumn(at, tmp_table, CQTerm(), common::ColOperation::COUNT, NULL, false);
auto phys_vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, at);
if (phys_vc.first == common::NULL_VALUE_32) {
phys_vc.first = tmp_table.n;
cq->CreateVirtualColumn(phys_vc.second, tmp_table, tmp_table, at);
phys2virt.insert(std::make_pair(std::pair<int, int>(tmp_table.n, at.n), phys_vc));
}
vc.n = phys_vc.second;
} else if (an_arg->type() == Item::VARBIN_ITEM) {
String str;
an_arg->val_str(&str); // sets null_value
if (!an_arg->null_value) {
if (an_arg->max_length <= 8) {
Item *int_item = new Item_int((ulonglong)an_arg->val_int());
MysqlExpression *mysql_expression = NULL;
MysqlExpression::Item2VarID item2varid;
gc_expressions.push_back(mysql_expression = new MysqlExpression(int_item, item2varid));
vc.n = VirtualColumnAlreadyExists(tmp_table, mysql_expression);
if (vc.n == common::NULL_VALUE_32) {
cq->CreateVirtualColumn(vc, tmp_table, mysql_expression);
tab_id2expression.insert(std::make_pair(tmp_table, std::make_pair(vc.n, mysql_expression)));
}
} else
return RETURN_QUERY_TO_MYSQL_ROUTE; // too large binary to be treated
// as BIGINT
} else {
return RETURN_QUERY_TO_MYSQL_ROUTE;
}
} else {
MysqlExpression *expr;
MysqlExpression::SetOfVars vars;
if (WrapMysqlExpression(an_arg, tmp_table, expr, false, true) == WrapStatus::FAILURE)
return RETURN_QUERY_TO_MYSQL_ROUTE;
if (IsConstExpr(expr->GetVars(), tmp_table)) {
vc.n = VirtualColumnAlreadyExists(tmp_table, expr);
if (vc.n == common::NULL_VALUE_32) {
cq->CreateVirtualColumn(vc, tmp_table, expr, tmp_table);
tab_id2expression.insert(std::make_pair(tmp_table, std::make_pair(vc.n, expr)));
}
} else if (IsAggregationItem(an_arg)) {
DEBUG_ASSERT(expr->GetItem()->type() == Item_tianmufield::get_tianmuitem_type());
int col_num =
((Item_tianmufield *)expr->GetItem())->varID[((Item_tianmufield *)expr->GetItem())->varID.size() - 1].col;
auto phys_vc = VirtualColumnAlreadyExists(tmp_table, tmp_table, AttrID(-col_num - 1));
if (phys_vc.first == common::NULL_VALUE_32) {
phys_vc.first = tmp_table.n;
cq->CreateVirtualColumn(phys_vc.second, tmp_table, tmp_table, AttrID(col_num));
phys2virt.insert(std::make_pair(std::pair<int, int>(tmp_table.n, -col_num - 1), phys_vc));
}
vc.n = phys_vc.second;
} else {
// int col_num =
// AddColumnForMysqlExpression(expr,
// tmp_table,
// NULL, DELAYED, distinct, true);
vc.n = VirtualColumnAlreadyExists(tmp_table, expr);
if (vc.n == common::NULL_VALUE_32) {
cq->CreateVirtualColumn(vc, tmp_table, expr, tmp_table);
tab_id2expression.insert(std::make_pair(tmp_table, std::make_pair(vc.n, expr)));
}
}
}
term = CQTerm(vc.n);
return RCBASE_QUERY_ROUTE;
} else {
// WHERE FILTER

AttrID vc;
AttrID col;
TabID tab;
if (IsFieldItem(an_arg) && !FieldUnmysterify(an_arg, tab, col)) return RETURN_QUERY_TO_MYSQL_ROUTE;
if (IsFieldItem(an_arg) && cq->ExistsInTempTable(tab, tmp_table)) {
auto phys_vc = VirtualColumnAlreadyExists(tmp_table, tab, col);
if (phys_vc.first == common::NULL_VALUE_32) {
phys_vc.first = tmp_table.n;
cq->CreateVirtualColumn(phys_vc.second, tmp_table, tab, col);
phys2virt.insert(std::make_pair(std::pair<int, int>(tab.n, col.n), phys_vc));
}
vc.n = phys_vc.second;
} else if (an_arg->type() == Item::VARBIN_ITEM) {
String str;
an_arg->val_str(&str); // sets null_value
if (!an_arg->null_value) {
if (an_arg->max_length <= 8) {
Item *int_item = new Item_int((ulonglong)an_arg->val_int());
MysqlExpression *mysql_expression = NULL;
MysqlExpression::Item2VarID item2varid;
gc_expressions.push_back(mysql_expression = new MysqlExpression(int_item, item2varid));
vc.n = VirtualColumnAlreadyExists(tmp_table, mysql_expression);
if (vc.n == common::NULL_VALUE_32) {
cq->CreateVirtualColumn(vc, tmp_table, mysql_expression);
tab_id2expression.insert(std::make_pair(tmp_table, std::make_pair(vc.n, mysql_expression)));
}
} else
return RETURN_QUERY_TO_MYSQL_ROUTE; // too large binary to be treated
// as BIGINT
} else {
return RETURN_QUERY_TO_MYSQL_ROUTE;
}
} else {
MysqlExpression *expr;
WrapStatus ws = WrapMysqlExpression(an_arg, tmp_table, expr, true, false);
if (ws != WrapStatus::SUCCESS) return RETURN_QUERY_TO_MYSQL_ROUTE;
vc.n = VirtualColumnAlreadyExists(tmp_table, expr);
if (vc.n == common::NULL_VALUE_32) {
cq->CreateVirtualColumn(vc, tmp_table, expr);
tab_id2expression.insert(std::make_pair(tmp_table, std::make_pair(vc.n, expr)));
}
}
term = CQTerm(vc.n);
return RCBASE_QUERY_ROUTE;
}
return RETURN_QUERY_TO_MYSQL_ROUTE;
}

Query::Preexecute

TempTable *Query::Preexecute(CompiledQuery &qu, ResultSender *sender, [[maybe_unused]] bool display_now) {
if (TIANMU_LOGCHECK(LogCtl_Level::DEBUG)) {
qu.Print(this);
}
std::vector<Condition *> conds(qu.NumOfConds());

TempTable *output_table = NULL; // NOTE: this pointer will be returned by the function

ta.resize(qu.NumOfTabs());
auto global_limits = qu.GetGlobalLimit();

cq = &qu;
// Execution itself
for (int i = 0; i < qu.NumOfSteps(); i++) {
CompiledQuery::CQStep step = qu.Step(i);
std::shared_ptr<JustATable> t1_ptr, t2_ptr, t3_ptr;

if (step.t1.n != common::NULL_VALUE_32) {
if (step.t1.n >= 0)
t1_ptr = Table(step.t1.n); // normal table
else {
t1_ptr = ta[-step.t1.n - 1]; // TempTable
}
}
if (step.t2.n != common::NULL_VALUE_32) {
if (step.t2.n >= 0)
t2_ptr = Table(step.t2.n); // normal table
else {
t2_ptr = ta[-step.t2.n - 1]; // TempTable
}
}
if (step.t3.n != common::NULL_VALUE_32) {
if (step.t3.n >= 0)
t3_ptr = Table(step.t3.n); // normal table
else {
t3_ptr = ta[-step.t3.n - 1]; // TempTable
}
}
// Some technical information
if (step.alias && std::strcmp(step.alias, "roughstats") == 0) {
// magical word (passed as table alias) to display statistics
((TempTable *)ta[-step.t1.n - 1].get())->DisplayRSI();
}

if (step.alias && std::strcmp(step.alias, "roughattrstats") == 0) {
// magical word (passed as table alias) to display attr. statistics
m_conn->SetDisplayAttrStats();
}

// Implementation of steps
try {
switch (step.type) {
case CompiledQuery::StepType::TABLE_ALIAS:
ta[-step.t1.n - 1] = t2_ptr;
break;
case CompiledQuery::StepType::TMP_TABLE:
DEBUG_ASSERT(step.t1.n < 0);
ta[-step.t1.n - 1] = step.n1
? TempTable::Create(ta[-step.tables1[0].n - 1].get(), step.tables1[0].n, this, true)
: TempTable::Create(ta[-step.tables1[0].n - 1].get(), step.tables1[0].n, this);
((TempTable *)ta[-step.t1.n - 1].get())->ReserveVirtColumns(qu.NumOfVirtualColumns(step.t1));
break;
case CompiledQuery::StepType::CREATE_CONDS:
DEBUG_ASSERT(step.t1.n < 0);
step.e1.vc = (step.e1.vc_id != common::NULL_VALUE_32)
? ((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.e1.vc_id)
: NULL;
step.e2.vc = (step.e2.vc_id != common::NULL_VALUE_32)
? ((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.e2.vc_id)
: NULL;
step.e3.vc = (step.e3.vc_id != common::NULL_VALUE_32)
? ((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.e3.vc_id)
: NULL;
if (step.n1 != static_cast<int64_t>(CondType::OR_SUBTREE)) { // on result = false
conds[step.c1.n] = new Condition();
if (step.c2.IsNull()) {
conds[step.c1.n]->AddDescriptor(
step.e1, step.op, step.e2, step.e3, (TempTable *)ta[-step.t1.n - 1].get(), qu.GetNumOfDimens(step.t1),
(step.op == common::Operator::O_LIKE || step.op == common::Operator::O_NOT_LIKE) ? char(step.n2)
: '\\');
} else {
DEBUG_ASSERT(conds[step.c2.n]->IsType_Tree());
conds[step.c1.n]->AddDescriptor(static_cast<SingleTreeCondition *>(conds[step.c2.n])->GetTree(),
(TempTable *)ta[-step.t1.n - 1].get(), qu.GetNumOfDimens(step.t1));
}
} else { // on result = true
if (step.c2.IsNull())
conds[step.c1.n] =
new SingleTreeCondition(step.e1, step.op, step.e2, step.e3, (TempTable *)ta[-step.t1.n - 1].get(),
qu.GetNumOfDimens(step.t1), char(step.n2));
else {
DEBUG_ASSERT(conds[step.c2.n]->IsType_Tree());
conds[step.c1.n] = new Condition();
conds[step.c1.n]->AddDescriptor(((SingleTreeCondition *)conds[step.c2.n])->GetTree(),
(TempTable *)ta[-step.t1.n - 1].get(), qu.GetNumOfDimens(step.t1));
}
}
break;
case CompiledQuery::StepType::AND_F:
case CompiledQuery::StepType::OR_F:
if (!conds[step.c2.n]->IsType_Tree()) {
ASSERT(step.type == CompiledQuery::StepType::AND_F);
auto cond2 = conds[step.c2.n];
for (size_t i = 0; i < cond2->Size(); i++) {
auto &desc = (*cond2)[i];
if (conds[step.c1.n]->IsType_Tree()) {
TempTable *temptb = (TempTable *)ta[-qu.GetTableOfCond(step.c2).n - 1].get();
int no_dims = qu.GetNumOfDimens(qu.GetTableOfCond(step.c2));
if (desc.op == common::Operator::O_OR_TREE) {
static_cast<SingleTreeCondition *>(conds[step.c1.n])
->AddTree(common::LogicalOperator::O_AND, desc.tree, no_dims);
} else {
static_cast<SingleTreeCondition *>(conds[step.c1.n])
->AddDescriptor(common::LogicalOperator::O_AND, desc.attr, desc.op, desc.val1, desc.val2, temptb,
no_dims, desc.like_esc);
}
} else {
conds[step.c1.n]->AddDescriptor(desc);
}
}
} else if (conds[step.c1.n]->IsType_Tree()) { // on result = false
DEBUG_ASSERT(conds[step.c2.n]->IsType_Tree());
common::LogicalOperator lop = (step.type == CompiledQuery::StepType::AND_F ? common::LogicalOperator::O_AND
: common::LogicalOperator::O_OR);
static_cast<SingleTreeCondition *>(conds[step.c1.n])
->AddTree(lop, static_cast<SingleTreeCondition *>(conds[step.c2.n])->GetTree(), qu.GetNumOfDimens(step.t1));
} else {
DEBUG_ASSERT(conds[step.c2.n]->IsType_Tree());
conds[step.c1.n]->AddDescriptor(static_cast<SingleTreeCondition *>(conds[step.c2.n])->GetTree(),
(TempTable *)ta[-qu.GetTableOfCond(step.c1).n - 1].get(),
qu.GetNumOfDimens(qu.GetTableOfCond(step.c1)));
}
break;
case CompiledQuery::StepType::OR_DESC:
case CompiledQuery::StepType::AND_DESC: {
common::LogicalOperator lop =
(step.type == CompiledQuery::StepType::AND_DESC ? common::LogicalOperator::O_AND
: common::LogicalOperator::O_OR);
step.e1.vc = (step.e1.vc_id != common::NULL_VALUE_32)
? ((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.e1.vc_id)
: NULL;
step.e2.vc = (step.e2.vc_id != common::NULL_VALUE_32)
? ((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.e2.vc_id)
: NULL;
step.e3.vc = (step.e3.vc_id != common::NULL_VALUE_32)
? ((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.e3.vc_id)
: NULL;
if (!conds[step.c1.n]->IsType_Tree()) {
DEBUG_ASSERT(conds[step.c1.n]);
conds[step.c1.n]->AddDescriptor(
step.e1, step.op, step.e2, step.e3, (TempTable *)ta[-step.t1.n - 1].get(), qu.GetNumOfDimens(step.t1),
(step.op == common::Operator::O_LIKE || step.op == common::Operator::O_NOT_LIKE) ? char(step.n2)
: '\\');
} else
static_cast<SingleTreeCondition *>(conds[step.c1.n])
->AddDescriptor(lop, step.e1, step.op, step.e2, step.e3, (TempTable *)ta[-step.t1.n - 1].get(),
qu.GetNumOfDimens(step.t1),
(step.op == common::Operator::O_LIKE || step.op == common::Operator::O_NOT_LIKE)
? char(step.n2)
: '\\');
break;
}
case CompiledQuery::StepType::T_MODE:
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
((TempTable *)ta[-step.t1.n - 1].get())->SetMode(step.tmpar, step.n1, step.n2);
break;
case CompiledQuery::StepType::JOIN_T:
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
((TempTable *)ta[-step.t1.n - 1].get())->JoinT(t2_ptr.get(), step.t2.n, step.jt);
break;
case CompiledQuery::StepType::ADD_CONDS: {
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
if (step.c1.n == common::NULL_VALUE_32) break;
if (step.n1 != static_cast<int64_t>(CondType::HAVING_COND)) conds[step.c1.n]->Simplify();
((TempTable *)ta[-step.t1.n - 1].get())->AddConds(conds[step.c1.n], (CondType)step.n1);
break;
}
case CompiledQuery::StepType::LEFT_JOIN_ON: {
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
if (step.c1.n == common::NULL_VALUE_32) break;
((TempTable *)ta[-step.t1.n - 1].get())->AddLeftConds(conds[step.c1.n], step.tables1, step.tables2);
break;
}
case CompiledQuery::StepType::INNER_JOIN_ON: {
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
if (step.c1.n == common::NULL_VALUE_32) break;
((TempTable *)ta[-step.t1.n - 1].get())->AddInnerConds(conds[step.c1.n], step.tables1);
break;
}
case CompiledQuery::StepType::APPLY_CONDS: {
int64_t cur_limit = -1;
if (qu.FindDistinct(step.t1.n))
((TempTable *)ta[-step.t1.n - 1].get())->SetMode(TMParameter::TM_DISTINCT, 0, 0);
if (qu.NoAggregationOrderingAndDistinct(step.t1.n)) cur_limit = qu.FindLimit(step.t1.n);

if (cur_limit != -1 && ((TempTable *)ta[-step.t1.n - 1].get())->GetFilterP()->NoParameterizedDescs())
cur_limit = -1;

ParameterizedFilter *filter = ((TempTable *)ta[-step.t1.n - 1].get())->GetFilterP();
std::set<int> used_dims = qu.GetUsedDims(step.t1, ta);

// no need any more to check WHERE for not used dims
bool is_simple_filter = true; // qu.IsSimpleFilter(step.c1);
if (used_dims.size() == 1 && used_dims.find(common::NULL_VALUE_32) != used_dims.end())
is_simple_filter = false;
for (int i = 0; i < filter->mind->NumOfDimensions(); i++) {
if (used_dims.find(i) == used_dims.end() && is_simple_filter)
filter->mind->ResetUsedInOutput(i);
else
filter->mind->SetUsedInOutput(i);
}

if (IsRoughQuery()) {
((TempTable *)ta[-step.t1.n - 1].get())->GetFilterP()->RoughUpdateParamFilter();
} else
((TempTable *)ta[-step.t1.n - 1].get())
->GetFilterP()
->UpdateMultiIndex(qu.CountColumnOnly(step.t1), cur_limit);
break;
}
case CompiledQuery::StepType::ADD_COLUMN: {
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
CQTerm e(step.e1);
if (e.vc_id != common::NULL_VALUE_32)
e.vc =
((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.e1.vc_id); // vc must have been created
step.a1.n = ((TempTable *)ta[-step.t1.n - 1].get())
->AddColumn(e, step.cop, step.alias, step.n1 ? true : false, step.si);
break;
}
case CompiledQuery::StepType::CREATE_VC: {
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
TempTable *t = (TempTable *)ta[-step.t1.n - 1].get();

DEBUG_ASSERT(t);
if (step.mysql_expr.size() > 0) {
// vcolumn::VirtualColumn for Expression
DEBUG_ASSERT(step.mysql_expr.size() == 1);
MultiIndex *mind = (step.t2.n == step.t1.n) ? t->GetOutputMultiIndexP() : t->GetMultiIndexP();
int c = ((TempTable *)ta[-step.t1.n - 1].get())
->AddVirtColumn(CreateColumnFromExpression(step.mysql_expr, t, step.t1.n, mind), step.a1.n);
ASSERT(c == step.a1.n, "AddVirtColumn failed");
} else if (step.virt_cols.size() > 0) {
// vcolumn::VirtualColumn for IN
ColumnType ct;
if (step.a2.n != common::NULL_VALUE_32)
ct = ((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.a2.n)->Type();
std::vector<vcolumn::VirtualColumn *> vcs;
for (uint i = 0; i < step.virt_cols.size(); i++)
vcs.push_back(((TempTable *)ta[-step.t1.n - 1].get())->GetVirtualColumn(step.virt_cols[i]));
int c = ((TempTable *)ta[-step.t1.n - 1].get())
->AddVirtColumn(new vcolumn::InSetColumn(ct, t->GetMultiIndexP(), vcs), step.a1.n);
ASSERT(c == step.a1.n, "AddVirtColumn failed");
} else if (step.a2.n != common::NULL_VALUE_32) {
// vcolumn::VirtualColumn for PhysicalColumn
JustATable *t_src = ta[-step.t2.n - 1].get();
PhysicalColumn *phc;
MultiIndex *mind = (step.t2.n == step.t1.n) ? t->GetOutputMultiIndexP() : t->GetMultiIndexP();
int dim = (step.t2.n == step.t1.n) ? 0 : t->GetDimension(step.t2);
phc = (PhysicalColumn *)t_src->GetColumn(step.a2.n >= 0 ? step.a2.n : -step.a2.n - 1);
int c = ((TempTable *)ta[-step.t1.n - 1].get())
->AddVirtColumn(
new vcolumn::SingleColumn(phc, mind, step.t2.n, step.a2.n, ta[-step.t2.n - 1].get(), dim),
step.a1.n);
ASSERT(c == step.a1.n, "AddVirtColumn failed");
} else {
// vcolumn::VirtualColumn for Subquery
DEBUG_ASSERT(ta[-step.t2.n - 1]->TableType() == TType::TEMP_TABLE);
int c =
((TempTable *)ta[-step.t1.n - 1].get())
->AddVirtColumn(new vcolumn::SubSelectColumn(
dynamic_cast<TempTable *>(ta[-step.t2.n - 1].get()),
step.n1 == 1 ? t->GetOutputMultiIndexP() : t->GetMultiIndexP(), t, step.t1.n),
step.a1.n);
ASSERT(c == step.a1.n, "AddVirtColumn failed");
}
break;
}
case CompiledQuery::StepType::ADD_ORDER: {
DEBUG_ASSERT(step.t1.n < 0 && ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE && step.n1 >= 0 &&
step.n1 < 2);
DEBUG_ASSERT(step.a1.n >= 0 && step.a1.n < qu.NumOfVirtualColumns(step.t1));
TempTable *loc_t = (TempTable *)ta[-step.t1.n - 1].get();
loc_t->AddOrder(loc_t->GetVirtualColumn(step.a1.n),
(int)step.n1); // step.n1 = 0 for asc, 1 for desc
break;
}
case CompiledQuery::StepType::UNION:
DEBUG_ASSERT(step.t1.n < 0 && step.t2.n < 0 && step.t3.n < 0);
DEBUG_ASSERT(ta[-step.t2.n - 1]->TableType() == TType::TEMP_TABLE &&
(step.t3.n == common::NULL_VALUE_32 || ta[-step.t3.n - 1]->TableType() == TType::TEMP_TABLE));
if (step.t1.n != step.t2.n)
ta[-step.t1.n - 1] = TempTable::Create(*(TempTable *)ta[-step.t2.n - 1].get(), false);
if (IsRoughQuery()) {
if (step.t3.n == common::NULL_VALUE_32)
((TempTable *)ta[-step.t1.n - 1].get())->RoughUnion(NULL, qu.IsResultTable(step.t1) ? sender : NULL);
else
((TempTable *)ta[-step.t1.n - 1].get())
->RoughUnion((TempTable *)ta[-step.t3.n - 1].get(), qu.IsResultTable(step.t1) ? sender : NULL);
} else if (qu.IsResultTable(step.t1) && !qu.IsOrderedBy(step.t1) && step.n1)
((TempTable *)ta[-step.t1.n - 1].get())
->Union((TempTable *)ta[-step.t3.n - 1].get(), (int)step.n1, sender, global_limits.first,
global_limits.second);
else {
if (step.t3.n == common::NULL_VALUE_32)
((TempTable *)ta[-step.t1.n - 1].get())->Union(NULL, (int)step.n1);
else {
((TempTable *)ta[-step.t1.n - 1].get())->Union((TempTable *)ta[-step.t3.n - 1].get(), (int)step.n1);
ta[-step.t3.n - 1].reset();
}
}
break;
case CompiledQuery::StepType::RESULT:
DEBUG_ASSERT(step.t1.n < 0 && static_cast<size_t>(-step.t1.n - 1) < ta.size() &&
ta[-step.t1.n - 1]->TableType() == TType::TEMP_TABLE);
output_table = (TempTable *)ta[-step.t1.n - 1].get();
break;
case CompiledQuery::StepType::STEP_ERROR:
rc_control_.lock(m_conn->GetThreadID()) << "ERROR in step " << step.alias << system::unlock;
break;
default:
rc_control_.lock(m_conn->GetThreadID())
<< "ERROR: unsupported type of CQStep (" << static_cast<int>(step.type) << ")" << system::unlock;
}
} catch (...) {
for (auto &c : conds) delete c;
throw;
}
}

for (auto &c : conds) delete c;

// NOTE: output_table is sent out of this function and should be managed
// elsewhere. before deleting all TempTables but output_table those have to be
// detected there are used by output_table

return output_table;
}