Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(core): rename duplicate same columns #405

Merged
merged 9 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ protected Operator generateRoot(Statement statement) {
.forEach(
cte -> {
Operator root = generateRoot(cte.getStatement());
root = new Rename(new OperatorSource(root), cte.getAliasMap());
root = new Rename(new OperatorSource(root), cte.getAliasList());
cte.setRoot(root);
});
return generateRoot(selectStatement);
Expand Down Expand Up @@ -382,7 +382,7 @@ private Operator initFromPart(UnarySelectStatement selectStatement) {
throw new RuntimeException("Unknown FromPart type: " + fromPart.getType());
}
if (fromPart.hasAlias()) {
root = new Rename(new OperatorSource(root), fromPart.getAliasMap());
root = new Rename(new OperatorSource(root), fromPart.getAliasList());
}
return root;
}
Expand Down Expand Up @@ -437,7 +437,7 @@ private Operator initFilterAndMergeFragmentsWithJoin(UnarySelectStatement select
throw new RuntimeException("Unknown FromPart type: " + fromPart.getType());
}
if (fromPart.hasAlias()) {
root = new Rename(new OperatorSource(root), fromPart.getAliasMap());
root = new Rename(new OperatorSource(root), fromPart.getAliasList());
}
joinList.add(root);
});
Expand Down Expand Up @@ -797,16 +797,16 @@ private static Operator buildReorder(UnarySelectStatement selectStatement, Opera
}

/**
* 如果SelectStatement有AliasMap, 在root之上构建一个Rename操作符
* 如果SelectStatement有AliasList, 在root之上构建一个Rename操作符
*
* @param selectStatement Select上下文
* @param root 当前根节点
* @return 添加了Rename操作符的根节点;如果没有AliasMap,返回原根节点
* @return 添加了Rename操作符的根节点;如果没有AliasList,返回原根节点
*/
private static Operator buildRename(UnarySelectStatement selectStatement, Operator root) {
Map<String, String> aliasMap = selectStatement.getSelectAliasMap();
if (!aliasMap.isEmpty()) {
root = new Rename(new OperatorSource(root), aliasMap);
List<Pair<String, String>> aliasList = selectStatement.getSelectAliasList();
if (!aliasList.isEmpty()) {
root = new Rename(new OperatorSource(root), aliasList);
}
return root;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@
import cn.edu.tsinghua.iginx.engine.shared.source.OperatorSource;
import cn.edu.tsinghua.iginx.engine.shared.source.Source;
import cn.edu.tsinghua.iginx.engine.shared.source.SourceType;
import cn.edu.tsinghua.iginx.utils.Pair;
import cn.edu.tsinghua.iginx.utils.StringUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class OperatorUtils {
Expand Down Expand Up @@ -334,7 +334,7 @@ private static Operator pushDownApply(Operator root, List<String> correlatedVari
root =
new Rename(
new OperatorSource(pushDownApply(apply, correlatedVariables)),
rename.getAliasMap(),
rename.getAliasList(),
ignorePatterns);
break;
case CrossJoin:
Expand Down Expand Up @@ -605,8 +605,8 @@ public static List<String> getPatternFromOperatorChildren(
Operator visitedOperator = visitedOperators.get(i);
if (visitedOperator.getType() == OperatorType.Rename) {
Rename rename = (Rename) visitedOperator;
Map<String, String> aliasMap = rename.getAliasMap();
patterns = renamePattern(aliasMap, patterns);
List<Pair<String, String>> aliasList = rename.getAliasList();
patterns = renamePattern(aliasList, patterns);
}
}
return patterns;
Expand Down Expand Up @@ -648,17 +648,18 @@ private static boolean isPatternMatched(String patternA, String patternB) {
/**
* 正向重命名模式列表中的pattern,将key中的pattern替换为value中的pattern
*
* @param aliasMap 重命名规则, key为旧模式,value为新模式
* @param aliasList 重命名规则, key为旧模式,value为新模式
* @param patterns 要重命名的模式列表
* @return
*/
private static List<String> renamePattern(Map<String, String> aliasMap, List<String> patterns) {
private static List<String> renamePattern(
List<Pair<String, String>> aliasList, List<String> patterns) {
List<String> renamedPatterns = new ArrayList<>();
for (String pattern : patterns) {
boolean matched = false;
for (Map.Entry<String, String> entry : aliasMap.entrySet()) {
String oldPattern = entry.getKey().replace("*", "(.*)");
String newPattern = entry.getValue().replace("*", "$1");
for (Pair<String, String> pair : aliasList) {
String oldPattern = pair.k.replace("*", "(.*)");
String newPattern = pair.v.replace("*", "$1");
if (pattern.matches(oldPattern)) {
if (newPattern.contains("$1") && !oldPattern.contains("*")) {
newPattern = newPattern.replace("$1", "*");
Expand All @@ -668,12 +669,12 @@ private static List<String> renamePattern(Map<String, String> aliasMap, List<Str
matched = true;
break;
} else if (pattern.equals(oldPattern)) {
renamedPatterns.add(entry.getValue());
renamedPatterns.add(pair.v);
matched = true;
break;
} else if (pattern.contains(".*")
&& oldPattern.matches(StringUtils.reformatPath(pattern))) {
renamedPatterns.add(entry.getKey());
renamedPatterns.add(pair.k);
matched = true;
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
package cn.edu.tsinghua.iginx.engine.logical.utils;

import cn.edu.tsinghua.iginx.metadata.entity.ColumnsInterval;
import cn.edu.tsinghua.iginx.utils.Pair;
import cn.edu.tsinghua.iginx.utils.StringUtils;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class PathUtils {
Expand Down Expand Up @@ -63,31 +63,31 @@ public static ColumnsInterval addSuffix(ColumnsInterval columnsInterval) {
/**
* 反向重命名模式列表中的模式
*
* @param aliasMap 重命名规则, key为旧模式,value为新模式,在这里我们要将新模式恢复为旧模式
* @param aliasList 重命名规则, key为旧模式,value为新模式,在这里我们要将新模式恢复为旧模式
* @param patterns 要重命名的模式列表
* @return 重命名后的模式列表
*/
public static List<String> recoverRenamedPatterns(
Map<String, String> aliasMap, List<String> patterns) {
List<Pair<String, String>> aliasList, List<String> patterns) {
return patterns.stream()
.map(pattern -> recoverRenamedPattern(aliasMap, pattern))
.map(pattern -> recoverRenamedPattern(aliasList, pattern))
.collect(Collectors.toList());
}

public static String recoverRenamedPattern(Map<String, String> aliasMap, String pattern) {
for (Map.Entry<String, String> entry : aliasMap.entrySet()) {
String oldPattern = entry.getKey().replace("*", "$1"); // 通配符转换为正则的捕获组
String newPattern = entry.getValue().replace("*", "(.*)"); // 使用反向引用保留原始匹配的部分
public static String recoverRenamedPattern(List<Pair<String, String>> aliasList, String pattern) {
for (Pair<String, String> pair : aliasList) {
String oldPattern = pair.k.replace("*", "$1"); // 通配符转换为正则的捕获组
String newPattern = pair.v.replace("*", "(.*)"); // 使用反向引用保留原始匹配的部分
if (pattern.matches(newPattern)) {
// 如果旧模式中有通配符,但是新模式中没有,我们需要将新模式中的捕获组替换为通配符
if (oldPattern.contains("$1") && !newPattern.contains("*")) {
oldPattern = oldPattern.replace("$1", "*");
}
return pattern.replaceAll(newPattern, oldPattern);
} else if (newPattern.equals(pattern)) {
return entry.getKey();
return pair.k;
} else if (pattern.contains(".*") && newPattern.matches(StringUtils.reformatPath(pattern))) {
return entry.getKey();
return pair.k;
}
}
return pattern;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,10 +474,8 @@ private RowStream executeMappingTransform(MappingTransform mappingTransform, Tab

private RowStream executeRename(Rename rename, Table table) {
Header header = table.getHeader();
Map<String, String> aliasMap = rename.getAliasMap();

List<String> ignorePatterns = rename.getIgnorePatterns();
Header newHeader = header.renamedHeader(aliasMap, ignorePatterns);
List<Pair<String, String>> aliasList = rename.getAliasList();
Header newHeader = header.renamedHeader(aliasList, rename.getIgnorePatterns());

List<Row> rows = new ArrayList<>();
table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import cn.edu.tsinghua.iginx.engine.shared.data.read.Row;
import cn.edu.tsinghua.iginx.engine.shared.data.read.RowStream;
import cn.edu.tsinghua.iginx.engine.shared.operator.Rename;
import java.util.Map;

public class RenameLazyStream extends UnaryLazyStream {

Expand All @@ -40,9 +39,7 @@ public RenameLazyStream(Rename rename, RowStream stream) {
public Header getHeader() throws PhysicalException {
if (header == null) {
Header header = stream.getHeader();
Map<String, String> aliasMap = rename.getAliasMap();

this.header = header.renamedHeader(aliasMap, rename.getIgnorePatterns());
this.header = header.renamedHeader(rename.getAliasList(), rename.getIgnorePatterns());
}
return header;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,52 +127,72 @@ public boolean equals(Object o) {
&& Objects.equals(indexMap, header.indexMap);
}

public Header renamedHeader(Map<String, String> aliasMap, List<String> ignorePatterns) {
public Header renamedHeader(List<Pair<String, String>> aliasList, List<String> ignorePatterns) {
List<Field> newFields = new ArrayList<>();
fields.forEach(
field -> {
// 如果列名在ignorePatterns中,对该列不执行rename
for (String ignorePattern : ignorePatterns) {
if (StringUtils.match(field.getName(), ignorePattern)) {
newFields.add(field);
return;
}
int size = getFieldSize();
for (int i = 0; i < size; i++) {
Field field = fields.get(i);
// 如果列名在ignorePatterns中,对该列不执行rename
boolean ignore = false;
for (String ignorePattern : ignorePatterns) {
if (StringUtils.match(field.getName(), ignorePattern)) {
newFields.add(field);
ignore = true;
break;
}
}
if (ignore) {
continue;
}
String alias = "";
for (Pair<String, String> pair : aliasList) {
String oldPattern = pair.k;
String newPattern = pair.v;
if (oldPattern.equals("*") && newPattern.endsWith(".*")) {
String newPrefix = newPattern.substring(0, newPattern.length() - 1);
alias = newPrefix + field.getName();
} else if (oldPattern.endsWith(".*") && newPattern.endsWith(".*")) {
String oldPrefix = oldPattern.substring(0, oldPattern.length() - 1);
String newPrefix = newPattern.substring(0, newPattern.length() - 1);
if (field.getName().startsWith(oldPrefix)) {
alias = field.getName().replaceFirst(oldPrefix, newPrefix);
}
String alias = "";
for (String oldPattern : aliasMap.keySet()) {
String newPattern = aliasMap.get(oldPattern);
if (oldPattern.equals("*") && newPattern.endsWith(".*")) {
String newPrefix = newPattern.substring(0, newPattern.length() - 1);
alias = newPrefix + field.getName();
} else if (oldPattern.endsWith(".*") && newPattern.endsWith(".*")) {
String oldPrefix = oldPattern.substring(0, oldPattern.length() - 1);
String newPrefix = newPattern.substring(0, newPattern.length() - 1);
if (field.getName().startsWith(oldPrefix)) {
alias = field.getName().replaceFirst(oldPrefix, newPrefix);
}
break;
} else if (oldPattern.equals(field.getFullName())) {
alias = newPattern;
break;
break;
} else if (oldPattern.equals(field.getName())) {
alias = newPattern;
Set<Map<String, String>> tagSet = new HashSet<>();
Field nextField = i < size - 1 ? fields.get(i + 1) : null;
tagSet.add(field.getTags());
// 处理同一列但不同tag的情况
while (nextField != null
&& oldPattern.equals(nextField.getName())
&& !tagSet.contains(nextField.getTags())) {
newFields.add(new Field(alias, field.getType(), field.getTags()));
field = nextField;
i++;
nextField = i < size - 1 ? fields.get(i + 1) : null;
tagSet.add(field.getTags());
}
aliasList.remove(pair);
break;
} else {
if (StringUtils.match(field.getName(), oldPattern)) {
if (newPattern.endsWith("." + oldPattern)) {
String prefix = newPattern.substring(0, newPattern.length() - oldPattern.length());
alias = prefix + field.getName();
} else {
if (StringUtils.match(field.getName(), oldPattern)) {
if (newPattern.endsWith("." + oldPattern)) {
String prefix =
newPattern.substring(0, newPattern.length() - oldPattern.length());
alias = prefix + field.getName();
} else {
alias = newPattern;
}
break;
}
alias = newPattern;
}
break;
}
if (alias.isEmpty()) {
newFields.add(field);
} else {
newFields.add(new Field(alias, field.getType(), field.getTags()));
}
});
}
}
if (alias.isEmpty()) {
newFields.add(field);
} else {
newFields.add(new Field(alias, field.getType(), field.getTags()));
}
}
return new Header(getKey(), newFields);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public ExpressionType getType() {

@Override
public boolean hasAlias() {
return alias != null && !alias.equals("");
return alias != null && !alias.isEmpty();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public ExpressionType getType() {

@Override
public boolean hasAlias() {
return alias != null && !alias.equals("");
return alias != null && !alias.isEmpty();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public ExpressionType getType() {

@Override
public boolean hasAlias() {
return alias != null && !alias.equals("");
return alias != null && !alias.isEmpty();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public ExpressionType getType() {

@Override
public boolean hasAlias() {
return alias != null && !alias.equals("");
return alias != null && !alias.isEmpty();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public ExpressionType getType() {

@Override
public boolean hasAlias() {
return alias != null && !alias.equals("");
return alias != null && !alias.isEmpty();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public ExpressionType getType() {

@Override
public boolean hasAlias() {
return alias != null && !alias.equals("");
return alias != null && !alias.isEmpty();
}

@Override
Expand Down
Loading
Loading