From b283a2fa5807cab73a013f48205037ae7dadbbe9 Mon Sep 17 00:00:00 2001 From: liuwenjie <1518386192@qq.com> Date: Mon, 29 Aug 2022 17:42:24 +0800 Subject: [PATCH 1/3] =?UTF-8?q?[hotfix-#1195]change=20the=20code=20positio?= =?UTF-8?q?n=20to=20support=20oracle=20nchar=E3=80=81nvarchar2=20chinese?= =?UTF-8?q?=20value?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../oraclelogminer/listener/LogParser.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java b/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java index 4a41608d26..9a61dff9d6 100644 --- a/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java +++ b/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java @@ -206,13 +206,6 @@ public static String parseTime(String value) { return value.substring(17, value.length() - 2); } - // support nchar、nvarchar2 chinese value - if (value.startsWith("UNISTR('") && value.endsWith("')")) { - String substring = value.substring(8, value.length() - 2); - String replace = substring.replace("\\", "\\u"); - return unicodeToString(replace); - } - return value; } @@ -252,6 +245,13 @@ public static String parseString(String value) { return value.substring(15, value.length() - 2); } + // support nchar、nvarchar2 chinese value + if (value.startsWith("UNISTR('") && value.endsWith("')")) { + String substring = value.substring(8, value.length() - 2); + String replace = substring.replace("\\", "\\u"); + return unicodeToString(replace); + } + return value; } From 5b00d6079f8027409adfcb8d2124b905a34cc126 Mon Sep 17 00:00:00 2001 From: liuwenjie <1518386192@qq.com> Date: Tue, 30 Aug 2022 10:01:27 +0800 Subject: [PATCH 2/3] =?UTF-8?q?[hotfix-#1195]change=20the=20code=20positio?= =?UTF-8?q?n=20to=20support=20oracle=20nchar=E3=80=81nvarchar2=20chinese?= =?UTF-8?q?=20value?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../oraclelogminer/listener/LogParser.java | 17 ++--------------- .../chunjun/util/UnicodeToStringTest.java | 18 ++---------------- 2 files changed, 4 insertions(+), 31 deletions(-) diff --git a/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java b/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java index 9a61dff9d6..5e725c1c0e 100644 --- a/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java +++ b/chunjun-connectors/chunjun-connector-oraclelogminer/src/main/java/com/dtstack/chunjun/connector/oraclelogminer/listener/LogParser.java @@ -40,6 +40,7 @@ import net.sf.jsqlparser.statement.insert.Insert; import net.sf.jsqlparser.statement.update.Update; import org.apache.commons.codec.binary.Hex; +import org.apache.commons.lang3.StringEscapeUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,8 +53,6 @@ import java.util.LinkedList; import java.util.List; import java.util.Objects; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * @author jiangbo @@ -65,8 +64,6 @@ public class LogParser { public static SnowflakeIdWorker idWorker = new SnowflakeIdWorker(1, 1); - public static final Pattern pattern = Pattern.compile("(\\\\u(\\w{4}))"); - private final LogMinerConf config; public LogParser(LogMinerConf config) { @@ -209,16 +206,6 @@ public static String parseTime(String value) { return value; } - public static String unicodeToString(String str) { - Matcher matcher = pattern.matcher(str); - char ch; - while (matcher.find()) { - ch = (char) Integer.parseInt(matcher.group(2), 16); - str = str.replace(matcher.group(1), String.valueOf(ch)); - } - return str; - } - public static String parseString(String value) { if (!value.endsWith("')")) { return value; @@ -249,7 +236,7 @@ public static String parseString(String value) { if (value.startsWith("UNISTR('") && value.endsWith("')")) { String substring = value.substring(8, value.length() - 2); String replace = substring.replace("\\", "\\u"); - return unicodeToString(replace); + return StringEscapeUtils.unescapeJava(replace); } return value; diff --git a/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java b/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java index 13eb75b08c..8c4ac91915 100644 --- a/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java +++ b/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java @@ -18,20 +18,16 @@ package com.dtstack.chunjun.util; +import org.apache.commons.lang3.StringEscapeUtils; import org.junit.Assert; import org.junit.Test; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - /** * @author liuche * @date 2022/8/26 14:39 */ public class UnicodeToStringTest { - public static final Pattern pattern = Pattern.compile("(\\\\u(\\w{4}))"); - @Test public void testUnicodeToString() { String str = "UNISTR('\\5927\\6D77')"; @@ -39,18 +35,8 @@ public void testUnicodeToString() { if (str.startsWith("UNISTR('") && str.endsWith("')")) { String substring = str.substring(8, str.length() - 2); String replace = substring.replace("\\", "\\u"); - str = unicodeToString(replace); + str= StringEscapeUtils.unescapeJava(replace); } Assert.assertEquals(str, "大海"); } - - public static String unicodeToString(String str) { - Matcher matcher = pattern.matcher(str); - char ch; - while (matcher.find()) { - ch = (char) Integer.parseInt(matcher.group(2), 16); - str = str.replace(matcher.group(1), String.valueOf(ch)); - } - return str; - } } From dcceb998cf55d5d9af52c67196a228cf8ae12989 Mon Sep 17 00:00:00 2001 From: liuwenjie <1518386192@qq.com> Date: Tue, 30 Aug 2022 11:03:46 +0800 Subject: [PATCH 3/3] [hotfix-#1195]mvn spotless:apply --- .../test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java b/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java index 8c4ac91915..f1923657cc 100644 --- a/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java +++ b/chunjun-core/src/test/java/com/dtstack/chunjun/util/UnicodeToStringTest.java @@ -35,7 +35,7 @@ public void testUnicodeToString() { if (str.startsWith("UNISTR('") && str.endsWith("')")) { String substring = str.substring(8, str.length() - 2); String replace = substring.replace("\\", "\\u"); - str= StringEscapeUtils.unescapeJava(replace); + str = StringEscapeUtils.unescapeJava(replace); } Assert.assertEquals(str, "大海"); }