Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Improve][Connector-V2] Change read excel util from POI to EasyExcel #8064

Open
wants to merge 19 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ public class DateTimeUtils {
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_SLASH,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SLASH.value));
FORMATTER_MAP.put(
Formatter.YYYY_M_D_HH_MM_SS_SLASH,
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_SS_SLASH.value));
FORMATTER_MAP.put(
Formatter.YYYY_M_D_HH_MM_SS_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_SS_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_M_D_HH_MM_SLASH,
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_SLASH.value));
FORMATTER_MAP.put(
Formatter.YYYY_M_D_HH_MM_ISO8601,
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_ISO8601.value));
FORMATTER_MAP.put(
Formatter.YYYY_MM_DD_HH_MM_SS_NO_SPLIT,
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_NO_SPLIT.value));
Expand All @@ -73,9 +85,26 @@ public class DateTimeUtils {
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSSSSS_ISO8601.value));
}

// if the datatime string length is 17, find the DateTimeFormatter from this map
public static final Map<Pattern, DateTimeFormatter> YYYY_M_D_HH_MM_SS_17_FORMATTER_MAP =
new LinkedHashMap<>();

// if the datatime string length is 15, find the DateTimeFormatter from this map
public static final Map<Pattern, DateTimeFormatter> YYYY_M_D_HH_MM_15_FORMATTER_MAP =
new LinkedHashMap<>();

// all Pattern in this set
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
YYYY_M_D_HH_MM_SS_17_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();

// all Pattern in this set
public static Set<Map.Entry<Pattern, DateTimeFormatter>>
YYYY_M_D_HH_MM_15_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();

// if the datatime string length is 19, find the DateTimeFormatter from this map
public static final Map<Pattern, DateTimeFormatter> YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP =
new LinkedHashMap<>();

public static Set<Map.Entry<Pattern, DateTimeFormatter>>
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP_ENTRY_SET = new LinkedHashSet<>();

Expand Down Expand Up @@ -115,6 +144,22 @@ public class DateTimeUtils {
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_MM_DD_HH_MM_SS_SLASH.value));

YYYY_M_D_HH_MM_15_FORMATTER_MAP.put(
Pattern.compile("\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_SLASH.value));

YYYY_M_D_HH_MM_15_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_ISO8601.value));

YYYY_M_D_HH_MM_SS_17_FORMATTER_MAP.put(
Pattern.compile("\\d{4}/\\d{1,2}/\\d{1,2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_SS_SLASH.value));

YYYY_M_D_HH_MM_SS_17_FORMATTER_MAP.put(
Pattern.compile("\\d{4}-\\d{1,2}-\\d{1,2}\\s\\d{2}:\\d{2}:\\d{2}"),
DateTimeFormatter.ofPattern(Formatter.YYYY_M_D_HH_MM_SS_ISO8601.value));

YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.put(
Pattern.compile("\\d{4}/\\d{2}/\\d{2}\\s\\d{2}:\\d{2}.*"),
new DateTimeFormatterBuilder()
Expand Down Expand Up @@ -159,6 +204,12 @@ public class DateTimeUtils {
YYYY_MM_DD_HH_MM_SS_19_FORMATTER_MAP.entrySet());
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP_ENTRY_SET.addAll(
YYYY_MM_DD_HH_MM_SS_M19_FORMATTER_MAP.entrySet());

YYYY_M_D_HH_MM_SS_17_FORMATTER_MAP_ENTRY_SET.addAll(
YYYY_M_D_HH_MM_SS_17_FORMATTER_MAP.entrySet());

YYYY_M_D_HH_MM_15_FORMATTER_MAP_ENTRY_SET.addAll(
YYYY_M_D_HH_MM_15_FORMATTER_MAP.entrySet());
}

/**
Expand All @@ -183,6 +234,20 @@ public static DateTimeFormatter matchDateTimeFormatter(String dateTime) {
return entry.getValue();
}
}
} else if (dateTime.length() == 17 || dateTime.length() == 18) {
for (Map.Entry<Pattern, DateTimeFormatter> entry :
YYYY_M_D_HH_MM_SS_17_FORMATTER_MAP_ENTRY_SET) {
if (entry.getKey().matcher(dateTime).matches()) {
return entry.getValue();
}
}
} else if (dateTime.length() == 15 || dateTime.length() == 16) {
for (Map.Entry<Pattern, DateTimeFormatter> entry :
YYYY_M_D_HH_MM_15_FORMATTER_MAP_ENTRY_SET) {
if (entry.getKey().matcher(dateTime).matches()) {
return entry.getValue();
}
}
} else if (dateTime.length() == 14) {
return YYYY_MM_DD_HH_MM_SS_14_FORMATTER;
}
Expand Down Expand Up @@ -247,6 +312,10 @@ public enum Formatter {
YYYY_MM_DD_HH_MM_SS_SSSSSS("yyyy-MM-dd HH:mm:ss.SSSSSS"),
YYYY_MM_DD_HH_MM_SS_SPOT("yyyy.MM.dd HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SLASH("yyyy/MM/dd HH:mm:ss"),
YYYY_M_D_HH_MM_SLASH("yyyy/M/d HH:mm"),
YYYY_M_D_HH_MM_ISO8601("yyyy-M-d HH:mm"),
YYYY_M_D_HH_MM_SS_SLASH("yyyy/M/d HH:mm:ss"),
YYYY_M_D_HH_MM_SS_ISO8601("yyyy-M-d HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_NO_SPLIT("yyyyMMddHHmmss"),
YYYY_MM_DD_HH_MM_SS_ISO8601("yyyy-MM-dd'T'HH:mm:ss"),
YYYY_MM_DD_HH_MM_SS_SSS_ISO8601("yyyy-MM-dd'T'HH:mm:ss.SSS"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ public class DateUtils {
Pattern.compile("\\d{4}年\\d{2}月\\d{2}日"),
Pattern.compile("\\d{4}/\\d{2}/\\d{2}"),
Pattern.compile("\\d{4}\\.\\d{2}\\.\\d{2}"),
Pattern.compile("\\d{8}")
Pattern.compile("\\d{8}"),
Pattern.compile("\\d{4}/\\d{1,2}/\\d{1,2}")
};

public static final Map<Pattern, DateTimeFormatter> DATE_FORMATTER_MAP = new HashMap();
Expand Down Expand Up @@ -116,6 +117,12 @@ public class DateUtils {
.appendValue(DAY_OF_MONTH, 2)
.toFormatter())
.toFormatter());
DATE_FORMATTER_MAP.put(
PATTERN_ARRAY[5],
new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.append(DateTimeFormatter.ofPattern("yyyy/M/d"))
.toFormatter());
}

/**
Expand Down Expand Up @@ -153,8 +160,10 @@ public static String toString(LocalDate date, Formatter formatter) {

public enum Formatter {
YYYY_MM_DD("yyyy-MM-dd"),
YYYY_M_D("yyyy/M/d"),
YYYY_MM_DD_SPOT("yyyy.MM.dd"),
YYYY_MM_DD_SLASH("yyyy/MM/dd");

private final String value;

Formatter(String value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.time.format.DateTimeFormatter;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

public class TimeUtils {
private static final Map<Formatter, DateTimeFormatter> FORMATTER_MAP =
Expand All @@ -37,6 +38,31 @@ public static LocalTime parse(String time, Formatter formatter) {
return LocalTime.parse(time, FORMATTER_MAP.get(formatter));
}

public static final Pattern[] PATTERN_ARRAY =
new Pattern[] {
Pattern.compile("\\d{2}:\\d{2}:\\d{2}"),
Pattern.compile("\\d{2}:\\d{2}:\\d{2}.\\d{3}"),
};

public static DateTimeFormatter matchTimeFormatter(String dateTime) {
for (int j = 0; j < PATTERN_ARRAY.length; j++) {
if (PATTERN_ARRAY[j].matcher(dateTime).matches()) {
DateTimeFormatter dateTimeFormatter = Time_FORMATTER_MAP.get(PATTERN_ARRAY[j]);
return dateTimeFormatter;
}
}
return null;
}

public static final Map<Pattern, DateTimeFormatter> Time_FORMATTER_MAP = new HashMap();

static {
Time_FORMATTER_MAP.put(
PATTERN_ARRAY[0], DateTimeFormatter.ofPattern(Formatter.HH_MM_SS.value));
Time_FORMATTER_MAP.put(
PATTERN_ARRAY[1], DateTimeFormatter.ofPattern(Formatter.HH_MM_SS_SSS.value));
}

public static String toString(LocalTime time, Formatter formatter) {
return time.format(FORMATTER_MAP.get(formatter));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
<hadoop-minikdc.version>3.1.4</hadoop-minikdc.version>
<dom4j.version>2.1.4</dom4j.version>
<jaxen.version>2.0.0</jaxen.version>
<easyexcel.version>4.0.3</easyexcel.version>
</properties>

<dependencyManagement>
Expand Down Expand Up @@ -158,6 +159,13 @@
<artifactId>jaxen</artifactId>
<version>${jaxen.version}</version>
</dependency>

<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>${easyexcel.version}</version>
</dependency>
Comment on lines +162 to +167
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we all know, easyexcel is no longer maintained. It doesn't seem good to introduce it at this time. We can try other alternatives, such as fastexcel. There are also reports online that it is faster than easyexcel. What do you think? cc @hailin0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or easyexcel-plus?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will give it a try

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or easyexcel-plus?

easyexcel-plus was only on GitHub last night, and I haven't seen it in the maven repository yet

Copy link
Author

@dwave dwave Nov 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we all know, easyexcel is no longer maintained. It doesn't seem good to introduce it at this time. We can try other alternatives, such as fastexcel. There are also reports online that it is faster than easyexcel. What do you think? cc @hailin0

I tried using fastexcel, but there is a problem with its xls support for excel97-2003

dhatim/fastexcel#287
image

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, let's add an option to configure the excel parse engine, default POI, support POI and easyexcel at now. So we can implement other engine in the future.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, let's add an option to configure the excel parse engine, default POI, support POI and easyexcel at now. So we can implement other engine in the future.

Will there be any conflict between poi versions?


</dependencies>

<build>
Expand Down
Loading
Loading