diff --git a/chunjun-connectors/chunjun-connector-hdfs/pom.xml b/chunjun-connectors/chunjun-connector-hdfs/pom.xml
index 0c0ab3709d..a521e22b2d 100644
--- a/chunjun-connectors/chunjun-connector-hdfs/pom.xml
+++ b/chunjun-connectors/chunjun-connector-hdfs/pom.xml
@@ -23,6 +23,11 @@
1.11.1
compile
+
+ groovy-all
+ org.codehaus.groovy
+ 2.4.4
+
org.apache.hive
diff --git a/chunjun-connectors/chunjun-connector-s3/pom.xml b/chunjun-connectors/chunjun-connector-s3/pom.xml
new file mode 100644
index 0000000000..c539fa057d
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/pom.xml
@@ -0,0 +1,126 @@
+
+
+
+ chunjun-connectors
+ com.dtstack.chunjun
+ 1.12-SNAPSHOT
+
+ 4.0.0
+
+ chunjun-connector-s3
+ ChunJun : Connectors : S3
+
+
+ 8
+ 8
+
+
+
+ com.amazonaws
+ aws-java-sdk-s3
+ 1.11.1001
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-logging
+ commons-logging
+
+
+ httpclient
+ org.apache.httpcomponents
+
+
+ jackson-databind
+ com.fasterxml.jackson.core
+
+
+ jackson-dataformat-cbor
+ com.fasterxml.jackson.dataformat
+
+
+
+
+ com.fasterxml.jackson.dataformat
+ jackson-dataformat-cbor
+ 2.9.10
+
+
+ dk.brics.automaton
+ automaton
+ 1.11-8
+ test
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.2.1
+
+
+ package
+
+ shade
+
+
+ false
+
+
+ org.slf4j:slf4j-api
+ log4j:log4j
+ ch.qos.logback:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+
+
+
+
+
+ maven-antrun-plugin
+
+
+ copy-resources
+
+ package
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/conf/S3Conf.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/conf/S3Conf.java
new file mode 100644
index 0000000000..62d1d5240b
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/conf/S3Conf.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.conf;
+
+import com.dtstack.chunjun.conf.ChunJunCommonConf;
+
+import com.amazonaws.regions.Regions;
+import org.codehaus.jackson.annotate.JsonIgnoreProperties;
+
+import java.io.Serializable;
+import java.util.List;
+
+/** @author jier */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class S3Conf extends ChunJunCommonConf implements Serializable {
+
+ private static final long serialVersionUID = 9008329384464201903L;
+
+ private String accessKey;
+
+ private String secretKey;
+
+ private String region = Regions.CN_NORTH_1.getName();
+
+ private String endpoint;
+
+ private String bucket;
+
+ private List objects;
+
+ private String object;
+
+ private char fieldDelimiter = ',';
+
+ private String writeMode = "overwrite";
+
+ private String encoding = "UTF-8";
+
+ private boolean isFirstLineHeader = false;
+
+ private long maxFileSize = 1024 * 1024L;
+
+ private String Protocol = "HTTP";
+
+ /**
+ * Limit the number of files obtained per request. If the number of files is greater than
+ * fetchSize, then read in a loop
+ */
+ private int fetchSize = 512;
+
+ /** Use v2 or v1 api to get directory files */
+ private boolean useV2 = true;
+ /**
+ * Safety caution to prevent the parser from using large amounts of memory in the case where
+ * parsing settings like file encodings don't end up matching the actual format of a file. This
+ * switch can be turned off if the file format is known and tested. With the switch off, the max
+ * column lengths and max column count per record supported by the parser will greatly increase.
+ * Default is false.
+ */
+ private boolean safetySwitch = false;
+
+ public String getAccessKey() {
+ return accessKey;
+ }
+
+ public void setAccessKey(String accessKey) {
+ this.accessKey = accessKey;
+ }
+
+ public String getSecretKey() {
+ return secretKey;
+ }
+
+ public void setSecretKey(String secretKey) {
+ this.secretKey = secretKey;
+ }
+
+ public String getRegion() {
+ return region;
+ }
+
+ public void setRegion(String region) {
+ this.region = region;
+ }
+
+ public String getBucket() {
+ return bucket;
+ }
+
+ public void setBucket(String bucket) {
+ this.bucket = bucket;
+ }
+
+ public List getObjects() {
+ return objects;
+ }
+
+ public void setObjects(List objects) {
+ this.objects = objects;
+ }
+
+ public char getFieldDelimiter() {
+ return fieldDelimiter;
+ }
+
+ public void setFieldDelimiter(char fieldDelimiter) {
+ this.fieldDelimiter = fieldDelimiter;
+ }
+
+ public String getEncoding() {
+ return encoding;
+ }
+
+ public void setEncoding(String encoding) {
+ this.encoding = encoding;
+ }
+
+ public String getEndpoint() {
+ return endpoint;
+ }
+
+ public void setEndpoint(String endpoint) {
+ this.endpoint = endpoint;
+ }
+
+ public boolean isFirstLineHeader() {
+ return isFirstLineHeader;
+ }
+
+ public void setIsFirstLineHeader(boolean isFirstLineHeader) {
+ this.isFirstLineHeader = isFirstLineHeader;
+ }
+
+ public String getWriteMode() {
+ return writeMode;
+ }
+
+ public void setWriteMode(String writeMode) {
+ this.writeMode = writeMode;
+ }
+
+ public long getMaxFileSize() {
+ return maxFileSize;
+ }
+
+ public void setMaxFileSize(long maxFileSize) {
+ this.maxFileSize = maxFileSize;
+ }
+
+ public String getObject() {
+ return object;
+ }
+
+ public void setObject(String object) {
+ this.object = object;
+ }
+
+ public String getProtocol() {
+ return Protocol;
+ }
+
+ public void setProtocol(String protocol) {
+ Protocol = protocol;
+ }
+
+ public int getFetchSize() {
+ return fetchSize;
+ }
+
+ public void setFetchSize(int fetchSize) {
+ this.fetchSize = fetchSize;
+ }
+
+ public boolean isUseV2() {
+ return useV2;
+ }
+
+ public boolean isSafetySwitch() {
+ return safetySwitch;
+ }
+
+ public void setSafetySwitch(boolean safetySwitch) {
+ this.safetySwitch = safetySwitch;
+ }
+
+ public void setUseV2(boolean useV2) {
+ this.useV2 = useV2;
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3ColumnConverter.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3ColumnConverter.java
new file mode 100644
index 0000000000..f20ce4c88d
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3ColumnConverter.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.converter;
+
+import com.dtstack.chunjun.conf.ChunJunCommonConf;
+import com.dtstack.chunjun.conf.FieldConf;
+import com.dtstack.chunjun.converter.AbstractRowConverter;
+import com.dtstack.chunjun.converter.IDeserializationConverter;
+import com.dtstack.chunjun.converter.ISerializationConverter;
+import com.dtstack.chunjun.element.ColumnRowData;
+import com.dtstack.chunjun.element.column.BigDecimalColumn;
+import com.dtstack.chunjun.element.column.SqlDateColumn;
+import com.dtstack.chunjun.element.column.StringColumn;
+import com.dtstack.chunjun.element.column.TimestampColumn;
+import com.dtstack.chunjun.throwable.UnsupportedTypeException;
+
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+
+import org.apache.commons.lang.StringUtils;
+
+import java.sql.Date;
+import java.sql.Time;
+import java.util.List;
+
+public class S3ColumnConverter
+ extends AbstractRowConverter {
+
+ public S3ColumnConverter(RowType rowType, ChunJunCommonConf conf) {
+ super(rowType, conf);
+ super.commonConf = conf;
+ for (int i = 0; i < fieldTypes.length; i++) {
+ LogicalType type = fieldTypes[i];
+ toInternalConverters.add(
+ wrapIntoNullableInternalConverter(createInternalConverter(type)));
+ toExternalConverters.add(
+ wrapIntoNullableExternalConverter(createExternalConverter(type), type));
+ }
+ }
+
+ @Override
+ public RowData toInternal(String[] input) {
+ List fieldConfList = commonConf.getColumn();
+ ColumnRowData rowData = new ColumnRowData(fieldConfList.size());
+ for (int i = 0; i < fieldConfList.size(); i++) {
+ StringColumn stringColumn = null;
+ if (StringUtils.isBlank(fieldConfList.get(i).getValue())) {
+ stringColumn = new StringColumn(input[fieldConfList.get(i).getIndex()]);
+ }
+ rowData.addField(assembleFieldProps(fieldConfList.get(i), stringColumn));
+ }
+ return rowData;
+ }
+
+ @Override
+ public String[] toExternal(RowData rowData, String[] output) {
+ for (int i = 0; i < output.length; i++) {
+ output[i] = String.valueOf(rowData.getString(i));
+ }
+ return output;
+ }
+
+ @Override
+ protected IDeserializationConverter createInternalConverter(LogicalType type) {
+ switch (type.getTypeRoot()) {
+ case NULL:
+ return val -> null;
+ case INTEGER:
+ return val -> new BigDecimalColumn((Integer) val);
+ case BIGINT:
+ return val -> new BigDecimalColumn((Long) val);
+ case FLOAT:
+ return val -> new BigDecimalColumn((Float) val);
+ case DOUBLE:
+ return val -> new BigDecimalColumn((Double) val);
+ case VARCHAR:
+ return val -> new StringColumn((String) val);
+ case DATE:
+ return val -> new SqlDateColumn((Date) val);
+ case TIMESTAMP_WITHOUT_TIME_ZONE:
+ return val -> new TimestampColumn((Time) val, 0);
+ default:
+ throw new UnsupportedTypeException(type);
+ }
+ }
+
+ @Override
+ protected ISerializationConverter createExternalConverter(LogicalType type) {
+ switch (type.getTypeRoot()) {
+ case NULL:
+ return (val, index, output) -> output[index] = null;
+ case INTEGER:
+ return (val, index, output) -> output[index] = String.valueOf(val.getInt(index));
+ case BIGINT:
+ return (val, index, output) -> output[index] = String.valueOf(val.getLong(index));
+ case FLOAT:
+ return (val, index, output) -> output[index] = String.valueOf(val.getFloat(index));
+ case DOUBLE:
+ return (val, index, output) -> output[index] = String.valueOf(val.getDouble(index));
+ case VARCHAR:
+ return (val, index, output) -> output[index] = val.getString(index).toString();
+ case DATE:
+ return (val, index, output) ->
+ output[index] =
+ ((ColumnRowData) val).getField(index).asSqlDate().toString();
+ case TIMESTAMP_WITHOUT_TIME_ZONE:
+ return (val, index, output) ->
+ output[index] = ((ColumnRowData) val).getField(index).asTime().toString();
+ default:
+ throw new UnsupportedTypeException(type);
+ }
+ }
+
+ @Override
+ protected ISerializationConverter wrapIntoNullableExternalConverter(
+ ISerializationConverter ISerializationConverter, LogicalType type) {
+ return (val, index, output) -> ISerializationConverter.serialize(val, index, output);
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3RawConverter.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3RawConverter.java
new file mode 100644
index 0000000000..4faffa3a6e
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3RawConverter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.converter;
+
+import com.dtstack.chunjun.throwable.UnsupportedTypeException;
+
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.types.DataType;
+
+import java.util.Locale;
+
+public class S3RawConverter {
+ public static DataType apply(String type) throws UnsupportedTypeException {
+ switch (type.toUpperCase(Locale.ENGLISH)) {
+ case "INT":
+ return DataTypes.INT();
+ case "LONG":
+ case "BIGINT":
+ return DataTypes.BIGINT();
+ case "FLOAT":
+ return DataTypes.FLOAT();
+ case "DOUBLE":
+ return DataTypes.DOUBLE();
+ case "CHAR":
+ case "VARCHAR":
+ case "STRING":
+ return DataTypes.STRING();
+ case "DATE":
+ return DataTypes.DATE();
+ case "DATETIME":
+ case "TIMESTAMP":
+ return DataTypes.TIMESTAMP(0);
+ default:
+ throw new UnsupportedTypeException(type);
+ }
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3RowConverter.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3RowConverter.java
new file mode 100644
index 0000000000..14a976443f
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/converter/S3RowConverter.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.converter;
+
+import com.dtstack.chunjun.conf.ChunJunCommonConf;
+import com.dtstack.chunjun.converter.AbstractRowConverter;
+import com.dtstack.chunjun.converter.IDeserializationConverter;
+import com.dtstack.chunjun.converter.ISerializationConverter;
+import com.dtstack.chunjun.throwable.UnsupportedTypeException;
+
+import org.apache.flink.table.data.GenericRowData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.StringData;
+import org.apache.flink.table.types.logical.LogicalType;
+import org.apache.flink.table.types.logical.RowType;
+
+import java.sql.Date;
+import java.sql.Time;
+import java.time.LocalDate;
+import java.time.LocalTime;
+
+public class S3RowConverter extends AbstractRowConverter {
+
+ public S3RowConverter(RowType rowType, ChunJunCommonConf conf) {
+ super(rowType, conf);
+ for (int i = 0; i < rowType.getFieldCount(); i++) {
+ toInternalConverters.add(
+ wrapIntoNullableInternalConverter(
+ createInternalConverter(rowType.getTypeAt(i))));
+ toExternalConverters.add(
+ wrapIntoNullableExternalConverter(
+ createExternalConverter(fieldTypes[i]), fieldTypes[i]));
+ }
+ }
+
+ @Override
+ public RowData toInternal(String[] input) throws Exception {
+ GenericRowData rowData = new GenericRowData(input.length);
+ for (int i = 0; i < rowData.getArity(); i++) {
+ rowData.setField(i, toInternalConverters.get(i).deserialize(input[i]));
+ }
+ return rowData;
+ }
+
+ @Override
+ public String[] toExternal(RowData rowData, String[] output) throws Exception {
+ output = new String[rowData.getArity()];
+ for (int i = 0; i < rowData.getArity(); i++) {
+ toExternalConverters.get(i).serialize(rowData, i, output);
+ }
+ return output;
+ }
+
+ @Override
+ protected IDeserializationConverter createInternalConverter(LogicalType type) {
+ switch (type.getTypeRoot()) {
+ case NULL:
+ return val -> null;
+ case INTEGER:
+ return val -> Integer.valueOf((String) val);
+ case BIGINT:
+ return val -> Long.valueOf((String) val);
+ case FLOAT:
+ return val -> Float.valueOf((String) val);
+ case DOUBLE:
+ return val -> Double.valueOf((String) val);
+ case CHAR:
+ case VARCHAR:
+ return val -> StringData.fromString((String) val);
+ case DATE:
+ return val ->
+ (int) ((Date.valueOf(String.valueOf(val))).toLocalDate().toEpochDay());
+ case TIME_WITHOUT_TIME_ZONE:
+ return val ->
+ (int)
+ ((Time.valueOf(String.valueOf(val))).toLocalTime().toNanoOfDay()
+ / 1_000_000L);
+ default:
+ throw new UnsupportedOperationException(type.toString());
+ }
+ }
+
+ @Override
+ protected ISerializationConverter createExternalConverter(LogicalType type) {
+ switch (type.getTypeRoot()) {
+ case NULL:
+ return (val, index, output) -> output[index] = null;
+ case INTEGER:
+ return (val, index, output) -> output[index] = String.valueOf(val.getInt(index));
+ case BIGINT:
+ return (val, index, output) -> output[index] = String.valueOf(val.getLong(index));
+ case FLOAT:
+ return (val, index, output) -> output[index] = String.valueOf(val.getFloat(index));
+ case DOUBLE:
+ return (val, index, output) -> output[index] = String.valueOf(val.getDouble(index));
+ case CHAR:
+ case VARCHAR:
+ return (val, index, output) ->
+ output[index] = String.valueOf(val.getString(index).toString());
+ case DATE:
+ return (val, index, output) ->
+ output[index] =
+ Date.valueOf(LocalDate.ofEpochDay(val.getInt(index))).toString();
+ case TIME_WITHOUT_TIME_ZONE:
+ return (val, index, output) ->
+ output[index] =
+ Time.valueOf(LocalTime.ofNanoOfDay(val.getInt(index) * 1_000_000L))
+ .toString();
+ default:
+ throw new UnsupportedTypeException(type.toString());
+ }
+ }
+
+ @Override
+ protected ISerializationConverter wrapIntoNullableExternalConverter(
+ ISerializationConverter ISerializationConverter, LogicalType type) {
+ return (val, index, output) -> ISerializationConverter.serialize(val, index, output);
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/MyPartETag.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/MyPartETag.java
new file mode 100644
index 0000000000..ef9bd8ea61
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/MyPartETag.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.sink;
+
+import com.amazonaws.services.s3.model.PartETag;
+
+import java.io.Serializable;
+
+/** @author jier */
+public class MyPartETag implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ private int partNumber;
+
+ private String eTag;
+
+ public MyPartETag() {}
+
+ public MyPartETag(PartETag partETag) {
+ this.partNumber = partETag.getPartNumber();
+ this.eTag = partETag.getETag();
+ }
+
+ public PartETag genPartETag() {
+ return new PartETag(this.partNumber, this.eTag);
+ }
+
+ public int getPartNumber() {
+ return partNumber;
+ }
+
+ public void setPartNumber(int partNumber) {
+ this.partNumber = partNumber;
+ }
+
+ public String geteTag() {
+ return eTag;
+ }
+
+ public void seteTag(String eTag) {
+ this.eTag = eTag;
+ }
+
+ @Override
+ public String toString() {
+ return partNumber + "$" + eTag;
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3DynamicTableSink.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3DynamicTableSink.java
new file mode 100644
index 0000000000..dfcd282bf7
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3DynamicTableSink.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.sink;
+
+import com.dtstack.chunjun.conf.FieldConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.connector.s3.converter.S3RowConverter;
+import com.dtstack.chunjun.sink.DtOutputFormatSinkFunction;
+
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.connector.sink.DynamicTableSink;
+import org.apache.flink.table.connector.sink.SinkFunctionProvider;
+import org.apache.flink.table.types.logical.RowType;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class S3DynamicTableSink implements DynamicTableSink {
+ private final TableSchema schema;
+ private final S3Conf s3Conf;
+
+ public S3DynamicTableSink(TableSchema schema, S3Conf s3Conf) {
+ this.schema = schema;
+ this.s3Conf = s3Conf;
+ }
+
+ @Override
+ public ChangelogMode getChangelogMode(ChangelogMode requestedMode) {
+ return requestedMode;
+ }
+
+ @Override
+ public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
+ final RowType rowType = (RowType) schema.toRowDataType().getLogicalType();
+
+ String[] fieldNames = schema.getFieldNames();
+ List columnList = new ArrayList<>(fieldNames.length);
+ for (int i = 0; i < fieldNames.length; i++) {
+ FieldConf field = new FieldConf();
+ field.setName(fieldNames[i]);
+ field.setType(rowType.getTypeAt(i).asSummaryString());
+ field.setIndex(i);
+ columnList.add(field);
+ }
+ s3Conf.setColumn(columnList);
+ S3OutputFormatBuilder builder = new S3OutputFormatBuilder(new S3OutputFormat());
+ builder.setS3Conf(s3Conf);
+ builder.setRowConverter(new S3RowConverter(rowType, s3Conf));
+
+ return SinkFunctionProvider.of(new DtOutputFormatSinkFunction<>(builder.finish()), 1);
+ }
+
+ @Override
+ public DynamicTableSink copy() {
+ return new S3DynamicTableSink(schema, s3Conf);
+ }
+
+ @Override
+ public String asSummaryString() {
+ return "StreamDynamicTableSink";
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormat.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormat.java
new file mode 100644
index 0000000000..02c1a1f9f9
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormat.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.sink;
+
+import com.dtstack.chunjun.conf.FieldConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.connector.s3.util.S3Util;
+import com.dtstack.chunjun.connector.s3.util.WriterUtil;
+import com.dtstack.chunjun.restore.FormatState;
+import com.dtstack.chunjun.sink.format.BaseRichOutputFormat;
+import com.dtstack.chunjun.throwable.ChunJunRuntimeException;
+import com.dtstack.chunjun.throwable.WriteRecordException;
+
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.table.data.RowData;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.PartETag;
+import com.esotericsoftware.minlog.Log;
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * The OutputFormat Implementation which write data to Amazon S3.
+ *
+ * @author jier
+ */
+public class S3OutputFormat extends BaseRichOutputFormat {
+
+ private transient AmazonS3 amazonS3;
+
+ private S3Conf s3Conf;
+
+ /** Must start at 1 and cannot be greater than 10,000 */
+ private static int currentPartNumber;
+
+ private static String currentUploadId;
+ private static boolean willClose = false;
+ private transient StringWriter sw;
+ private transient List myPartETags;
+
+ private static final String OVERWRITE_MODE = "overwrite";
+ private transient WriterUtil writerUtil;
+
+ private static final long MIN_SIZE = 1024 * 1024 * 25L;
+
+ @Override
+ protected void openInternal(int taskNumber, int numTasks) {
+ openSource();
+ restore();
+ checkOutputDir();
+ createActionFinishedTag();
+ nextBlock();
+ List column = s3Conf.getColumn();
+ columnNameList = column.stream().map(FieldConf::getName).collect(Collectors.toList());
+ columnTypeList = column.stream().map(FieldConf::getType).collect(Collectors.toList());
+ }
+
+ private void openSource() {
+ this.amazonS3 = S3Util.getS3Client(s3Conf);
+ this.myPartETags = new ArrayList<>();
+ this.currentPartNumber = taskNumber - numTasks + 1;
+ beforeWriteRecords();
+ }
+
+ private void restore() {
+ if (formatState != null && formatState.getState() != null) {
+ Tuple2> state =
+ (Tuple2>) formatState.getState();
+ this.currentUploadId = state.f0;
+ this.myPartETags = state.f1;
+ }
+ }
+
+ private void checkOutputDir() {
+ if (S3Util.doesObjectExist(amazonS3, s3Conf.getBucket(), s3Conf.getObject())) {
+ if (OVERWRITE_MODE.equalsIgnoreCase(s3Conf.getWriteMode())) {
+ S3Util.deleteObject(amazonS3, s3Conf.getBucket(), s3Conf.getObject());
+ }
+ }
+ }
+
+ private void nextBlock() {
+ sw = new StringWriter();
+ this.writerUtil = new WriterUtil(sw, s3Conf.getFieldDelimiter());
+ this.currentPartNumber = this.currentPartNumber + numTasks;
+ }
+
+ /** Create file multipart upload ID */
+ private void createActionFinishedTag() {
+ if (!StringUtils.isNotBlank(currentUploadId)) {
+ this.currentUploadId =
+ S3Util.initiateMultipartUploadAndGetId(
+ amazonS3, s3Conf.getBucket(), s3Conf.getObject());
+ }
+ }
+
+ private void beforeWriteRecords() {
+ if (s3Conf.isFirstLineHeader()) {
+ try {
+ RowData rowData =
+ rowConverter.toInternal(
+ columnNameList.toArray(new String[columnNameList.size()]));
+ writeSingleRecordInternal(rowData);
+ } catch (Exception e) {
+ e.printStackTrace();
+ LOG.warn("first line fail to write");
+ }
+ }
+ }
+
+ protected void flushDataInternal() {
+ StringBuffer sb = sw.getBuffer();
+ if (sb.length() > MIN_SIZE || willClose) {
+ byte[] byteArray;
+ try {
+ byteArray = sb.toString().getBytes(s3Conf.getEncoding());
+ } catch (UnsupportedEncodingException e) {
+ throw new ChunJunRuntimeException(e);
+ }
+ LOG.info("Upload part size:" + byteArray.length);
+ PartETag partETag =
+ S3Util.uploadPart(
+ amazonS3,
+ s3Conf.getBucket(),
+ s3Conf.getObject(),
+ this.currentUploadId,
+ this.currentPartNumber,
+ byteArray);
+
+ MyPartETag myPartETag = new MyPartETag(partETag);
+ myPartETags.add(myPartETag);
+
+ LOG.debug(
+ "task-{} upload etag:[{}]",
+ taskNumber,
+ myPartETags.stream().map(Objects::toString).collect(Collectors.joining(",")));
+ writerUtil.close();
+ writerUtil = null;
+ }
+ }
+
+ private void completeMultipartUploadFile() {
+ if (this.currentPartNumber > 10000) {
+ throw new IllegalArgumentException("part can not bigger than 10000");
+ }
+ List partETags =
+ myPartETags.stream().map(MyPartETag::genPartETag).collect(Collectors.toList());
+ if (partETags.size() > 0) {
+ LOG.info(
+ "Start merging files partETags:{}",
+ partETags.stream().map(PartETag::getETag).collect(Collectors.joining(",")));
+ S3Util.completeMultipartUpload(
+ amazonS3,
+ s3Conf.getBucket(),
+ s3Conf.getObject(),
+ this.currentUploadId,
+ partETags);
+ } else {
+ S3Util.abortMultipartUpload(
+ amazonS3, s3Conf.getBucket(), s3Conf.getObject(), this.currentUploadId);
+ S3Util.putStringObject(amazonS3, s3Conf.getBucket(), s3Conf.getObject(), "");
+ }
+ }
+
+ @Override
+ public void closeInternal() {
+ // Before closing the client, upload the remaining data smaller than 5M
+ willClose = true;
+ flushDataInternal();
+ completeMultipartUploadFile();
+ S3Util.closeS3(amazonS3);
+ Log.info("S3Client close!");
+ }
+
+ @Override
+ protected void writeMultipleRecordsInternal() {
+ throw new UnsupportedOperationException("S3 Writer does not support batch write");
+ }
+
+ @Override
+ public FormatState getFormatState() throws Exception {
+ super.getFormatState();
+ if (formatState != null) {
+ formatState.setNumOfSubTask(taskNumber);
+ formatState.setState(new Tuple2<>(currentUploadId, myPartETags));
+ }
+ return formatState;
+ }
+
+ @Override
+ protected void writeSingleRecordInternal(RowData rowData) throws WriteRecordException {
+ try {
+ if (this.writerUtil == null) {
+ nextBlock();
+ }
+ String[] stringRecord = new String[columnNameList.size()];
+ // convert row to string
+ rowConverter.toExternal(rowData, stringRecord);
+ try {
+ for (int i = 0; i < columnNameList.size(); ++i) {
+
+ String column = stringRecord[i];
+
+ if (column == null) {
+ continue;
+ }
+ writerUtil.write(column);
+ }
+ writerUtil.endRecord();
+ flushDataInternal();
+ } catch (Exception ex) {
+ String msg = "RowData2string error RowData(" + rowData + ")";
+ throw new WriteRecordException(msg, ex, 0, rowData);
+ }
+ } catch (Exception ex) {
+ throw new WriteRecordException(ex.getMessage(), ex);
+ }
+ }
+
+ public void setS3Conf(S3Conf s3Conf) {
+ this.s3Conf = s3Conf;
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormatBuilder.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormatBuilder.java
new file mode 100644
index 0000000000..68ce055afd
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3OutputFormatBuilder.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.sink;
+
+import com.dtstack.chunjun.conf.SpeedConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.sink.format.BaseRichOutputFormatBuilder;
+
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * The builder of FtpOutputFormat
+ *
+ * @author jier
+ */
+public class S3OutputFormatBuilder extends BaseRichOutputFormatBuilder {
+
+ private SpeedConf speedConf;
+
+ public S3OutputFormatBuilder(S3OutputFormat format) {
+ super(format);
+ }
+
+ @Override
+ protected void checkFormat() {
+
+ StringBuilder sb = new StringBuilder(256);
+ S3Conf s3Config = (S3Conf) format.getConfig();
+ if (StringUtils.isBlank(s3Config.getBucket())) {
+ LOG.info("bucket was not supplied separately.");
+ sb.append("bucket was not supplied separately;\n");
+ }
+ if (StringUtils.isBlank(s3Config.getAccessKey())) {
+ LOG.info("accessKey was not supplied separately.");
+ sb.append("accessKey was not supplied separately;\n");
+ }
+ if (StringUtils.isBlank(s3Config.getSecretKey())) {
+ LOG.info("secretKey was not supplied separately.");
+ sb.append("secretKey was not supplied separately;\n");
+ }
+ if (StringUtils.isBlank(s3Config.getObject())) {
+ LOG.info("object was not supplied separately.");
+ sb.append("object was not supplied separately;\n");
+ }
+ if (speedConf.getChannel() > 1) {
+ sb.append(
+ String.format(
+ "S3Writer can not support channel bigger than 1, current channel is [%s]",
+ speedConf.getChannel()));
+ }
+ if (sb.length() > 0) {
+ throw new IllegalArgumentException(sb.toString());
+ }
+ }
+
+ public void setS3Conf(S3Conf conf) {
+ super.setConfig(conf);
+ ((S3OutputFormat) format).setS3Conf(conf);
+ }
+
+ public SpeedConf getSpeedConf() {
+ return speedConf;
+ }
+
+ public void setSpeedConf(SpeedConf speedConf) {
+ this.speedConf = speedConf;
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3SinkFactory.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3SinkFactory.java
new file mode 100644
index 0000000000..3759fcd86e
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/sink/S3SinkFactory.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.sink;
+
+import com.dtstack.chunjun.conf.SpeedConf;
+import com.dtstack.chunjun.conf.SyncConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.connector.s3.converter.S3ColumnConverter;
+import com.dtstack.chunjun.connector.s3.converter.S3RawConverter;
+import com.dtstack.chunjun.connector.s3.converter.S3RowConverter;
+import com.dtstack.chunjun.converter.AbstractRowConverter;
+import com.dtstack.chunjun.converter.RawTypeConverter;
+import com.dtstack.chunjun.sink.SinkFactory;
+import com.dtstack.chunjun.util.GsonUtil;
+import com.dtstack.chunjun.util.TableUtil;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+
+public class S3SinkFactory extends SinkFactory {
+
+ private final S3Conf s3Conf;
+ private final SpeedConf speedConf;
+
+ public S3SinkFactory(SyncConf syncConf) {
+ super(syncConf);
+ s3Conf =
+ GsonUtil.GSON.fromJson(
+ GsonUtil.GSON.toJson(syncConf.getWriter().getParameter()), S3Conf.class);
+ s3Conf.setColumn(syncConf.getWriter().getFieldList());
+ speedConf = syncConf.getSpeed();
+ super.initCommonConf(s3Conf);
+ }
+
+ @Override
+ public RawTypeConverter getRawTypeConverter() {
+ return S3RawConverter::apply;
+ }
+
+ @Override
+ public DataStreamSink createSink(DataStream dataSet) {
+ S3OutputFormatBuilder builder = new S3OutputFormatBuilder(new S3OutputFormat());
+ final RowType rowType = TableUtil.createRowType(s3Conf.getColumn(), getRawTypeConverter());
+ AbstractRowConverter rowConverter;
+ if (useAbstractBaseColumn) {
+ rowConverter = new S3ColumnConverter(rowType, s3Conf);
+ } else {
+ rowConverter = new S3RowConverter(rowType, s3Conf);
+ }
+ builder.setRowConverter(rowConverter, useAbstractBaseColumn);
+ builder.setSpeedConf(speedConf);
+ builder.setS3Conf(s3Conf);
+ return createOutput(dataSet, builder.finish());
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3DynamicTableSource.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3DynamicTableSource.java
new file mode 100644
index 0000000000..88d7127fd3
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3DynamicTableSource.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.source;
+
+import com.dtstack.chunjun.conf.FieldConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.connector.s3.converter.S3RowConverter;
+import com.dtstack.chunjun.source.DtInputFormatSourceFunction;
+import com.dtstack.chunjun.table.connector.source.ParallelSourceFunctionProvider;
+
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.connector.ChangelogMode;
+import org.apache.flink.table.connector.source.DynamicTableSource;
+import org.apache.flink.table.connector.source.ScanTableSource;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.runtime.typeutils.InternalTypeInfo;
+import org.apache.flink.table.types.logical.RowType;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class S3DynamicTableSource implements ScanTableSource {
+ private final TableSchema schema;
+ private final S3Conf s3Conf;
+
+ public S3DynamicTableSource(TableSchema schema, S3Conf s3Conf) {
+ this.schema = schema;
+ this.s3Conf = s3Conf;
+ }
+
+ @Override
+ public ChangelogMode getChangelogMode() {
+ return ChangelogMode.insertOnly();
+ }
+
+ @Override
+ public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
+ final RowType rowType = (RowType) schema.toRowDataType().getLogicalType();
+ TypeInformation typeInformation = InternalTypeInfo.of(rowType);
+ String[] fieldNames = schema.getFieldNames();
+ List columnList = new ArrayList<>(fieldNames.length);
+ for (int i = 0; i < fieldNames.length; i++) {
+ FieldConf field = new FieldConf();
+ field.setName(fieldNames[i]);
+ field.setType(rowType.getTypeAt(i).asSummaryString());
+ field.setIndex(i);
+ columnList.add(field);
+ }
+ s3Conf.setColumn(columnList);
+ S3InputFormatBuilder builder = new S3InputFormatBuilder(new S3InputFormat());
+ builder.setRowConverter(new S3RowConverter(rowType, s3Conf));
+ builder.setS3Conf(s3Conf);
+ return ParallelSourceFunctionProvider.of(
+ new DtInputFormatSourceFunction<>(builder.finish(), typeInformation), false, null);
+ }
+
+ @Override
+ public DynamicTableSource copy() {
+ return new S3DynamicTableSource(this.schema, this.s3Conf);
+ }
+
+ @Override
+ public String asSummaryString() {
+ return "S3DynamicTableSource";
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormat.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormat.java
new file mode 100644
index 0000000000..f9eca3a32c
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormat.java
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.source;
+
+import com.dtstack.chunjun.conf.RestoreConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.connector.s3.util.ReaderUtil;
+import com.dtstack.chunjun.connector.s3.util.S3SimpleObject;
+import com.dtstack.chunjun.connector.s3.util.S3Util;
+import com.dtstack.chunjun.restore.FormatState;
+import com.dtstack.chunjun.source.format.BaseRichInputFormat;
+import com.dtstack.chunjun.throwable.ChunJunRuntimeException;
+import com.dtstack.chunjun.throwable.ReadRecordException;
+import com.dtstack.chunjun.util.GsonUtil;
+
+import org.apache.flink.core.io.InputSplit;
+import org.apache.flink.table.data.RowData;
+
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.model.GetObjectRequest;
+import com.amazonaws.services.s3.model.S3Object;
+import com.amazonaws.services.s3.model.S3ObjectInputStream;
+import org.apache.commons.lang.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.stream.Collectors;
+
+/** @author jier */
+public class S3InputFormat extends BaseRichInputFormat {
+
+ private static final Logger LOG = LoggerFactory.getLogger(S3InputFormat.class);
+
+ private static final long serialVersionUID = -3217513386563100062L;
+
+ private S3Conf s3Conf;
+ private Iterator splits;
+
+ private transient AmazonS3 amazonS3;
+
+ private transient String currentObject;
+ private transient Map offsetMap;
+
+ private transient ReaderUtil readerUtil = null;
+
+ private RestoreConf restoreConf;
+
+ @Override
+ public void openInputFormat() throws IOException {
+ super.openInputFormat();
+ }
+
+ @Override
+ protected void openInternal(InputSplit split) {
+ amazonS3 = S3Util.getS3Client(s3Conf);
+ S3InputSplit inputSplit = (S3InputSplit) split;
+ List splitsList = inputSplit.getSplits();
+ LinkedList result = new LinkedList<>();
+ if (restoreConf.isRestore()
+ && formatState != null
+ && formatState.getState() != null
+ && formatState.getState() instanceof Map) {
+ offsetMap = (Map) formatState.getState();
+ for (int i = 0; i < splitsList.size(); i++) {
+ String object = splitsList.get(i);
+ if (i % inputSplit.getTotalNumberOfSplits() == indexOfSubTask) {
+ if (offsetMap.containsKey(object) && 0 < offsetMap.get(object)) {
+ result.addFirst(object);
+ } else if (!offsetMap.containsKey(object) || 0 == offsetMap.get(object)) {
+ result.add(object);
+ }
+ }
+ }
+ } else {
+ if (restoreConf.isRestore()) {
+ offsetMap = new ConcurrentHashMap<>(inputSplit.getSplits().size());
+ }
+ for (int i = 0; i < splitsList.size(); i++) {
+ String object = splitsList.get(i);
+ if (i % inputSplit.getTotalNumberOfSplits() == inputSplit.getSplitNumber()) {
+ result.add(object);
+ }
+ }
+ }
+ splits = result.iterator();
+ }
+
+ @Override
+ protected InputSplit[] createInputSplitsInternal(int minNumSplits) {
+ List objects = resolveObjects();
+ if (objects.isEmpty()) {
+ throw new ChunJunRuntimeException(
+ "No objects found in bucket: "
+ + s3Conf.getBucket()
+ + ",objects: "
+ + s3Conf.getObjects());
+ }
+ LOG.info("read file {}", GsonUtil.GSON.toJson(objects));
+ List keys = new ArrayList<>();
+ for (S3SimpleObject object : objects) {
+ keys.add(object.getKey());
+ }
+ S3InputSplit[] splits = new S3InputSplit[minNumSplits];
+ for (int i = 0; i < minNumSplits; i++) {
+ splits[i] = new S3InputSplit(i, minNumSplits, keys);
+ }
+ return splits;
+ }
+
+ @Override
+ protected RowData nextRecordInternal(RowData rowData) throws ReadRecordException {
+ String[] fields;
+ try {
+ fields = readerUtil.getValues();
+ rowData = rowConverter.toInternal(fields);
+ } catch (IOException e) {
+ throw new ChunJunRuntimeException(e);
+ } catch (Exception e) {
+ throw new ReadRecordException("", e, 0, rowData);
+ }
+ if (restoreConf.isRestore()) {
+ offsetMap.replace(currentObject, readerUtil.getNextOffset());
+ }
+ return rowData;
+ }
+
+ @Override
+ protected void closeInternal() {
+ if (amazonS3 != null) {
+ amazonS3.shutdown();
+ amazonS3 = null;
+ }
+ if (readerUtil != null) {
+ readerUtil.close();
+ readerUtil = null;
+ }
+ }
+
+ @Override
+ public boolean reachedEnd() throws IOException {
+ return reachedEndWithoutCheckState();
+ }
+
+ public boolean reachedEndWithoutCheckState() throws IOException {
+ // br is empty, indicating that a new file needs to be read
+ if (readerUtil == null) {
+ if (splits.hasNext()) {
+ // If there is a new file, read the new file
+ currentObject = splits.next();
+ GetObjectRequest rangeObjectRequest =
+ new GetObjectRequest(s3Conf.getBucket(), currentObject);
+ LOG.info("Current read file {}", currentObject);
+ if (restoreConf.isRestore()
+ && offsetMap.containsKey(currentObject)
+ && 0 <= offsetMap.get(currentObject)) {
+ // If the breakpoint resume is turned on, it means that the file has been read
+ // but not finished, so continue reading
+ long offset = offsetMap.getOrDefault(currentObject, 0L);
+ rangeObjectRequest.setRange(offset);
+ S3Object o = amazonS3.getObject(rangeObjectRequest);
+
+ S3ObjectInputStream s3is = o.getObjectContent();
+ readerUtil =
+ new ReaderUtil(
+ new InputStreamReader(s3is, s3Conf.getEncoding()),
+ s3Conf.getFieldDelimiter(),
+ offset,
+ s3Conf.isSafetySwitch());
+ offsetMap.put(currentObject, offset);
+ } else {
+ // The resumable upload is not enabled or the resumable upload is enabled but
+ // the file has not been read
+ S3Object o = amazonS3.getObject(rangeObjectRequest);
+ S3ObjectInputStream s3is = o.getObjectContent();
+ readerUtil =
+ new ReaderUtil(
+ new InputStreamReader(s3is, s3Conf.getEncoding()),
+ s3Conf.getFieldDelimiter(),
+ 0L,
+ s3Conf.isSafetySwitch());
+ if (s3Conf.isFirstLineHeader()) {
+ readerUtil.readHeaders();
+ }
+ if (restoreConf.isRestore()) {
+ offsetMap.put(currentObject, readerUtil.getNextOffset());
+ }
+ }
+ } else {
+ // All files have been read
+ return true;
+ }
+ }
+ if (readerUtil.readRecord()) {
+ // The file has not been read
+ return false;
+ } else {
+ // After reading the file read this time, close br and clear it
+ readerUtil.close();
+ readerUtil = null;
+ if (restoreConf.isRestore()) {
+ offsetMap.replace(currentObject, -1L);
+ }
+ // try to read the new file
+ return reachedEndWithoutCheckState();
+ }
+ }
+
+ @Override
+ public FormatState getFormatState() {
+ super.getFormatState();
+ if (formatState != null && offsetMap != null && !offsetMap.isEmpty()) {
+ formatState.setState(offsetMap);
+ }
+ return formatState;
+ }
+
+ public List resolveObjects() {
+ String bucket = s3Conf.getBucket();
+ Set resolved = new HashSet<>();
+ AmazonS3 amazonS3 = S3Util.getS3Client(s3Conf);
+ for (String key : s3Conf.getObjects()) {
+ if (StringUtils.isNotBlank(key)) {
+ if (key.endsWith(".*")) {
+ // End with .*, indicating that the object is prefixed
+ String prefix = key.substring(0, key.indexOf(".*"));
+ List subObjects;
+ if (s3Conf.isUseV2()) {
+ subObjects =
+ S3Util.listObjectsKeyByPrefix(
+ amazonS3, bucket, prefix, s3Conf.getFetchSize());
+ } else {
+ subObjects =
+ S3Util.listObjectsByv1(
+ amazonS3, bucket, prefix, s3Conf.getFetchSize());
+ }
+ for (String subObject : subObjects) {
+ S3SimpleObject s3SimpleObject = S3Util.getS3SimpleObject(subObject);
+ resolved.add(s3SimpleObject);
+ }
+ } else if (S3Util.doesObjectExist(amazonS3, bucket, key)) {
+ // Exact query and object exists
+ S3SimpleObject s3SimpleObject = S3Util.getS3SimpleObject(key);
+ resolved.add(s3SimpleObject);
+ }
+ }
+ }
+ List distinct = new ArrayList<>(resolved);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(
+ "match object is[{}]",
+ distinct.stream().map(S3SimpleObject::getKey).collect(Collectors.joining(",")));
+ }
+ return distinct;
+ }
+
+ public S3Conf getS3Conf() {
+ return s3Conf;
+ }
+
+ public void setS3Conf(S3Conf s3Conf) {
+ this.s3Conf = s3Conf;
+ }
+
+ public RestoreConf getRestoreConf() {
+ return restoreConf;
+ }
+
+ public void setRestoreConf(RestoreConf restoreConf) {
+ this.restoreConf = restoreConf;
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormatBuilder.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormatBuilder.java
new file mode 100644
index 0000000000..6949b52825
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputFormatBuilder.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.source;
+
+import com.dtstack.chunjun.conf.RestoreConf;
+import com.dtstack.chunjun.conf.SpeedConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.source.format.BaseRichInputFormatBuilder;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * build S3InputFormat{@link S3InputFormat} and check S3Config{@link S3Conf}
+ *
+ * @author jier
+ */
+public class S3InputFormatBuilder extends BaseRichInputFormatBuilder {
+ private SpeedConf speedConf;
+
+ public S3InputFormatBuilder(S3InputFormat format) {
+ super(format);
+ }
+
+ public void setS3Conf(S3Conf s3Conf) {
+ super.setConfig(s3Conf);
+ ((S3InputFormat) format).setS3Conf(s3Conf);
+ }
+
+ @Override
+ protected void checkFormat() {
+ StringBuilder sb = new StringBuilder(256);
+ S3InputFormat s3InputFormat = (S3InputFormat) format;
+ S3Conf s3Config = (S3Conf) s3InputFormat.getConfig();
+ if (StringUtils.isBlank(s3Config.getBucket())) {
+ LOG.info("bucket was not supplied separately.");
+ sb.append("bucket was not supplied separately;\n");
+ }
+ if (StringUtils.isBlank(s3Config.getAccessKey())) {
+ LOG.info("accessKey was not supplied separately.");
+ sb.append("accessKey was not supplied separately;\n");
+ }
+ if (StringUtils.isBlank(s3Config.getSecretKey())) {
+ LOG.info("secretKey was not supplied separately.");
+ sb.append("secretKey was not supplied separately;\n");
+ }
+ if (CollectionUtils.isEmpty(s3Config.getObjects())) {
+ LOG.info("objects was not supplied separately.");
+ sb.append("objects was not supplied separately;\n");
+ }
+ if (sb.length() > 0) {
+ throw new IllegalArgumentException(sb.toString());
+ }
+ }
+
+ public SpeedConf getSpeedConf() {
+ return speedConf;
+ }
+
+ public void setSpeedConf(SpeedConf speedConf) {
+ this.speedConf = speedConf;
+ }
+
+ public void setRestoreConf(RestoreConf restoreConf) {
+ ((S3InputFormat) format).setRestoreConf(restoreConf);
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputSplit.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputSplit.java
new file mode 100644
index 0000000000..3594d21528
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3InputSplit.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.source;
+
+import org.apache.flink.core.io.GenericInputSplit;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * splits {@link S3InputSplit#splits} contains all s3 objects instead of a part of all s3 objects
+ *
+ * @author jier
+ */
+public class S3InputSplit extends GenericInputSplit {
+
+ private static final long serialVersionUID = 8350870573057970895L;
+
+ private List splits;
+
+ public S3InputSplit(int partitionNumber, int totalNumberOfPartitions) {
+ super(partitionNumber, totalNumberOfPartitions);
+ this.splits = new ArrayList<>();
+ }
+
+ /**
+ * Creates a generic input split with the given split number.
+ *
+ * @param partitionNumber The number of the split's partition.
+ * @param totalNumberOfPartitions The total number of the splits (partitions).
+ */
+ public S3InputSplit(int partitionNumber, int totalNumberOfPartitions, List splits) {
+ super(partitionNumber, totalNumberOfPartitions);
+ this.splits = splits;
+ }
+
+ public List getSplits() {
+ return splits;
+ }
+
+ public void setSplits(List splits) {
+ this.splits = splits;
+ }
+
+ public void addSplit(String split) {
+ splits.add(split);
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3SourceFactory.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3SourceFactory.java
new file mode 100644
index 0000000000..d099bd2323
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/source/S3SourceFactory.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.source;
+
+import com.dtstack.chunjun.conf.RestoreConf;
+import com.dtstack.chunjun.conf.SpeedConf;
+import com.dtstack.chunjun.conf.SyncConf;
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.connector.s3.converter.S3ColumnConverter;
+import com.dtstack.chunjun.connector.s3.converter.S3RawConverter;
+import com.dtstack.chunjun.connector.s3.converter.S3RowConverter;
+import com.dtstack.chunjun.converter.AbstractRowConverter;
+import com.dtstack.chunjun.converter.RawTypeConverter;
+import com.dtstack.chunjun.source.SourceFactory;
+import com.dtstack.chunjun.util.GsonUtil;
+import com.dtstack.chunjun.util.TableUtil;
+
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class S3SourceFactory extends SourceFactory {
+ private static final Logger LOG = LoggerFactory.getLogger(S3SourceFactory.class);
+
+ private final S3Conf s3Conf;
+ private final RestoreConf restoreConf;
+ private final SpeedConf speedConf;
+
+ public S3SourceFactory(SyncConf config, StreamExecutionEnvironment env) {
+ super(config, env);
+ s3Conf =
+ GsonUtil.GSON.fromJson(
+ GsonUtil.GSON.toJson(config.getReader().getParameter()), S3Conf.class);
+ s3Conf.setColumn(config.getReader().getFieldList());
+ restoreConf = config.getRestore();
+ speedConf = config.getSpeed();
+ super.initCommonConf(s3Conf);
+ }
+
+ @Override
+ public RawTypeConverter getRawTypeConverter() {
+ return S3RawConverter::apply;
+ }
+
+ @Override
+ public DataStream createSource() {
+ S3InputFormatBuilder builder = new S3InputFormatBuilder(new S3InputFormat());
+ builder.setRestoreConf(restoreConf);
+ builder.setSpeedConf(speedConf);
+ builder.setS3Conf(s3Conf);
+
+ AbstractRowConverter rowConverter;
+ final RowType rowType = TableUtil.createRowType(s3Conf.getColumn(), getRawTypeConverter());
+ if (useAbstractBaseColumn) {
+ rowConverter = new S3ColumnConverter(rowType, s3Conf);
+ } else {
+ checkConstant(s3Conf);
+ rowConverter = new S3RowConverter(rowType, s3Conf);
+ }
+ builder.setRowConverter(rowConverter, useAbstractBaseColumn);
+ return createInput(builder.finish());
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/S3DynamicTableFactory.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/S3DynamicTableFactory.java
new file mode 100644
index 0000000000..48542a31a8
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/S3DynamicTableFactory.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.table;
+
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.connector.s3.sink.S3DynamicTableSink;
+import com.dtstack.chunjun.connector.s3.source.S3DynamicTableSource;
+import com.dtstack.chunjun.connector.s3.table.options.S3Options;
+import com.dtstack.chunjun.util.GsonUtil;
+
+import org.apache.flink.configuration.ConfigOption;
+import org.apache.flink.configuration.ReadableConfig;
+import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.connector.sink.DynamicTableSink;
+import org.apache.flink.table.connector.source.DynamicTableSource;
+import org.apache.flink.table.factories.DynamicTableSinkFactory;
+import org.apache.flink.table.factories.DynamicTableSourceFactory;
+import org.apache.flink.table.factories.FactoryUtil;
+import org.apache.flink.table.utils.TableSchemaUtils;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+
+public class S3DynamicTableFactory implements DynamicTableSourceFactory, DynamicTableSinkFactory {
+ private static final String IDENTIFIER = "s3-x";
+
+ @Override
+ public DynamicTableSource createDynamicTableSource(Context context) {
+ FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
+
+ helper.validate();
+
+ ReadableConfig options = helper.getOptions();
+
+ S3Conf s3Conf = new S3Conf();
+ s3Conf.setAccessKey(options.get(S3Options.ACCESS_Key));
+ s3Conf.setSecretKey(options.get(S3Options.SECRET_Key));
+ s3Conf.setBucket(options.get(S3Options.BUCKET));
+ s3Conf.setObjects(GsonUtil.GSON.fromJson(options.get(S3Options.OBJECTS), ArrayList.class));
+ s3Conf.setFieldDelimiter(options.get(S3Options.FIELD_DELIMITER).trim().toCharArray()[0]);
+ s3Conf.setEncoding(options.get(S3Options.ENCODING));
+ s3Conf.setRegion(options.get(S3Options.REGION));
+ s3Conf.setIsFirstLineHeader(options.get(S3Options.IS_FIRST_LINE_HEADER));
+
+ TableSchema physicalSchema =
+ TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
+
+ return new S3DynamicTableSource(physicalSchema, s3Conf);
+ }
+
+ @Override
+ public String factoryIdentifier() {
+ return IDENTIFIER;
+ }
+
+ @Override
+ public Set> requiredOptions() {
+ Set> options = new HashSet();
+ options.add(S3Options.ACCESS_Key);
+ options.add(S3Options.SECRET_Key);
+ options.add(S3Options.BUCKET);
+ options.add(S3Options.FIELD_DELIMITER);
+ return options;
+ }
+
+ @Override
+ public Set> optionalOptions() {
+ Set> options = new HashSet();
+ options.add(S3Options.ENCODING);
+ options.add(S3Options.REGION);
+ options.add(S3Options.IS_FIRST_LINE_HEADER);
+ options.add(S3Options.OBJECTS);
+ options.add(S3Options.OBJECT);
+ return options;
+ }
+
+ @Override
+ public DynamicTableSink createDynamicTableSink(Context context) {
+ FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
+
+ helper.validate();
+
+ ReadableConfig options = helper.getOptions();
+
+ S3Conf s3Conf = new S3Conf();
+ s3Conf.setAccessKey(options.get(S3Options.ACCESS_Key));
+ s3Conf.setSecretKey(options.get(S3Options.SECRET_Key));
+ s3Conf.setBucket(options.get(S3Options.BUCKET));
+ s3Conf.setObject(options.get(S3Options.OBJECT));
+ s3Conf.setFieldDelimiter(options.get(S3Options.FIELD_DELIMITER).trim().toCharArray()[0]);
+ s3Conf.setEncoding(options.get(S3Options.ENCODING));
+ s3Conf.setRegion(options.get(S3Options.REGION));
+ s3Conf.setIsFirstLineHeader(options.get(S3Options.IS_FIRST_LINE_HEADER));
+
+ TableSchema physicalSchema =
+ TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
+
+ return new S3DynamicTableSink(physicalSchema, s3Conf);
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/options/S3Options.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/options/S3Options.java
new file mode 100644
index 0000000000..8cd87c2f64
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/table/options/S3Options.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.table.options;
+
+import org.apache.flink.configuration.ConfigOption;
+
+import static org.apache.flink.configuration.ConfigOptions.key;
+
+public class S3Options {
+ public static final ConfigOption ACCESS_Key =
+ key("assessKey").stringType().noDefaultValue().withDescription("aws_access_key_id");
+
+ public static final ConfigOption SECRET_Key =
+ key("secretKey").stringType().noDefaultValue().withDescription("aws_secret_access_key");
+
+ public static final ConfigOption BUCKET =
+ key("bucket").stringType().noDefaultValue().withDescription("aws_bucket_name");
+
+ public static final ConfigOption OBJECTS =
+ key("objects")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("object to be synchronized. supports regular expressions");
+
+ public static final ConfigOption OBJECT =
+ key("object")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("writer file object, can only be one");
+
+ public static final ConfigOption FIELD_DELIMITER =
+ key("fieldDelimiter")
+ .stringType()
+ .defaultValue(",")
+ .withDescription("the field delimiter to read");
+
+ public static final ConfigOption ENCODING =
+ key("encoding")
+ .stringType()
+ .defaultValue("UTF-8")
+ .withDescription("read the encoding configuration of the file");
+
+ public static final ConfigOption REGION =
+ key("region")
+ .stringType()
+ .defaultValue("us-west-2")
+ .withDescription("an area where buckets are stored");
+
+ public static final ConfigOption IS_FIRST_LINE_HEADER =
+ key("isFirstLineHeader")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription(
+ "whether the first line is a header line, if so, the first line is not read");
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/ReaderUtil.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/ReaderUtil.java
new file mode 100644
index 0000000000..2d138d3a55
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/ReaderUtil.java
@@ -0,0 +1,1676 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.nio.charset.Charset;
+import java.text.NumberFormat;
+import java.util.HashMap;
+
+/** A stream based parser for parsing delimited text data from a file or a stream. */
+public class ReaderUtil {
+
+ private long nextOffset;
+
+ private Reader inputStream = null;
+
+ private String fileName = null;
+
+ // this holds all the values for switches that the user is allowed to set
+ private UserSettings userSettings = new UserSettings();
+
+ private Charset charset = null;
+
+ private boolean useCustomRecordDelimiter = false;
+
+ // this will be our working buffer to hold data chunks
+ // read in from the data file
+
+ private DataBuffer dataBuffer = new DataBuffer();
+
+ private ColumnBuffer columnBuffer = new ColumnBuffer();
+
+ // 缓存当前记录的值,当一个条记录需要跨两个流缓存读取时,需要将已读取的部分记录的字符缓存至此
+ private RawRecordBuffer rawBuffer = new RawRecordBuffer();
+
+ private boolean[] isQualified = null;
+
+ private String rawRecord = "";
+
+ private HeadersHolder headersHolder = new HeadersHolder();
+
+ // these are all more or less global loop variables
+ // to keep from needing to pass them all into various
+ // methods during parsing
+
+ private boolean startedColumn = false;
+
+ private boolean startedWithQualifier = false;
+
+ private boolean hasMoreData = true;
+
+ private char lastLetter = '\0';
+
+ private boolean hasReadNextLine = false;
+
+ private int columnsCount = 0;
+
+ private long currentRecord = 0;
+
+ private String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT];
+
+ private boolean initialized = false;
+
+ private boolean closed = false;
+
+ /** Double up the text qualifier to represent an occurance of the text qualifier. */
+ public static final int ESCAPE_MODE_DOUBLED = 1;
+
+ /**
+ * Use a backslash character before the text qualifier to represent an occurance of the text
+ * qualifier.
+ */
+ public static final int ESCAPE_MODE_BACKSLASH = 2;
+
+ public ReaderUtil(String fileName, char delimiter, Charset charset)
+ throws FileNotFoundException {
+ this(fileName, delimiter, charset, 0L);
+ }
+
+ public ReaderUtil(String fileName, char delimiter, Charset charset, Long nextOffset)
+ throws FileNotFoundException {
+ if (fileName == null) {
+ throw new IllegalArgumentException("Parameter fileName can not be null.");
+ }
+
+ if (charset == null) {
+ throw new IllegalArgumentException("Parameter charset can not be null.");
+ }
+
+ if (!new File(fileName).exists()) {
+ throw new FileNotFoundException("File " + fileName + " does not exist.");
+ }
+
+ this.fileName = fileName;
+ this.userSettings.Delimiter = delimiter;
+ this.charset = charset;
+ this.nextOffset = nextOffset;
+
+ isQualified = new boolean[values.length];
+ }
+
+ public ReaderUtil(String fileName, char delimiter) throws FileNotFoundException {
+ this(fileName, delimiter, Charset.forName("ISO-8859-1"));
+ }
+
+ public ReaderUtil(String fileName) throws FileNotFoundException {
+ this(fileName, Letters.COMMA);
+ }
+
+ public ReaderUtil(Reader inputStream, char delimiter) {
+ this(inputStream, delimiter, 0L, false);
+ }
+
+ public ReaderUtil(Reader inputStream, char delimiter, Long nextOffset, boolean safetySwitch) {
+ if (inputStream == null) {
+ throw new IllegalArgumentException("Parameter inputStream can not be null.");
+ }
+
+ this.inputStream = inputStream;
+ this.userSettings.Delimiter = delimiter;
+ this.userSettings.SafetySwitch = safetySwitch;
+ initialized = true;
+ this.nextOffset = nextOffset;
+
+ isQualified = new boolean[values.length];
+ }
+
+ public ReaderUtil(Reader inputStream) {
+ this(inputStream, Letters.COMMA);
+ }
+
+ public ReaderUtil(InputStream inputStream, char delimiter, Charset charset, Long nextOffset) {
+ this(new InputStreamReader(inputStream, charset), delimiter, nextOffset, false);
+ }
+
+ public ReaderUtil(InputStream inputStream, char delimiter, Charset charset) {
+ this(new InputStreamReader(inputStream, charset), delimiter, 0L, false);
+ }
+
+ public ReaderUtil(InputStream inputStream, Charset charset) {
+ this(new InputStreamReader(inputStream, charset));
+ }
+
+ public boolean getCaptureRawRecord() {
+ return userSettings.CaptureRawRecord;
+ }
+
+ public void setCaptureRawRecord(boolean captureRawRecord) {
+ userSettings.CaptureRawRecord = captureRawRecord;
+ }
+
+ public String getRawRecord() {
+ return rawRecord;
+ }
+
+ /**
+ * Gets whether leading and trailing whitespace characters are being trimmed from
+ * non-textqualified column data. Default is true.
+ *
+ * @return Whether leading and trailing whitespace characters are being trimmed from
+ * non-textqualified column data.
+ */
+ public boolean getTrimWhitespace() {
+ return userSettings.TrimWhitespace;
+ }
+
+ /**
+ * Sets whether leading and trailing whitespace characters should be trimmed from
+ * non-textqualified column data or not. Default is true.
+ *
+ * @param trimWhitespace Whether leading and trailing whitespace characters should be trimmed
+ * from non-textqualified column data or not.
+ */
+ public void setTrimWhitespace(boolean trimWhitespace) {
+ userSettings.TrimWhitespace = trimWhitespace;
+ }
+
+ /**
+ * Gets the character being used as the column delimiter. Default is comma, ','.
+ *
+ * @return The character being used as the column delimiter.
+ */
+ public char getDelimiter() {
+ return userSettings.Delimiter;
+ }
+
+ /**
+ * Sets the character to use as the column delimiter. Default is comma, ','.
+ *
+ * @param delimiter The character to use as the column delimiter.
+ */
+ public void setDelimiter(char delimiter) {
+ userSettings.Delimiter = delimiter;
+ }
+
+ public char getRecordDelimiter() {
+ return userSettings.RecordDelimiter;
+ }
+
+ /**
+ * Sets the character to use as the record delimiter.
+ *
+ * @param recordDelimiter The character to use as the record delimiter. Default is combination
+ * of standard end of line characters for Windows, Unix, or Mac.
+ */
+ public void setRecordDelimiter(char recordDelimiter) {
+ useCustomRecordDelimiter = true;
+ userSettings.RecordDelimiter = recordDelimiter;
+ }
+
+ /**
+ * Gets the character to use as a text qualifier in the data.
+ *
+ * @return The character to use as a text qualifier in the data.
+ */
+ public char getTextQualifier() {
+ return userSettings.TextQualifier;
+ }
+
+ /**
+ * Sets the character to use as a text qualifier in the data.
+ *
+ * @param textQualifier The character to use as a text qualifier in the data.
+ */
+ public void setTextQualifier(char textQualifier) {
+ userSettings.TextQualifier = textQualifier;
+ }
+
+ /**
+ * Whether text qualifiers will be used while parsing or not.
+ *
+ * @return Whether text qualifiers will be used while parsing or not.
+ */
+ public boolean getUseTextQualifier() {
+ return userSettings.UseTextQualifier;
+ }
+
+ /**
+ * Sets whether text qualifiers will be used while parsing or not.
+ *
+ * @param useTextQualifier Whether to use a text qualifier while parsing or not.
+ */
+ public void setUseTextQualifier(boolean useTextQualifier) {
+ userSettings.UseTextQualifier = useTextQualifier;
+ }
+
+ /**
+ * Gets the character being used as a comment signal.
+ *
+ * @return The character being used as a comment signal.
+ */
+ public char getComment() {
+ return userSettings.Comment;
+ }
+
+ /**
+ * Sets the character to use as a comment signal.
+ *
+ * @param comment The character to use as a comment signal.
+ */
+ public void setComment(char comment) {
+ userSettings.Comment = comment;
+ }
+
+ /**
+ * Gets whether comments are being looked for while parsing or not.
+ *
+ * @return Whether comments are being looked for while parsing or not.
+ */
+ public boolean getUseComments() {
+ return userSettings.UseComments;
+ }
+
+ /**
+ * Sets whether comments are being looked for while parsing or not.
+ *
+ * @param useComments Whether comments are being looked for while parsing or not.
+ */
+ public void setUseComments(boolean useComments) {
+ userSettings.UseComments = useComments;
+ }
+
+ /**
+ * Gets the current way to escape an occurance of the text qualifier inside qualified data.
+ *
+ * @return The current way to escape an occurance of the text qualifier inside qualified data.
+ */
+ public int getEscapeMode() {
+ return userSettings.EscapeMode;
+ }
+
+ /**
+ * Sets the current way to escape an occurance of the text qualifier inside qualified data.
+ *
+ * @param escapeMode The way to escape an occurance of the text qualifier inside qualified data.
+ * @throws IllegalArgumentException When an illegal value is specified for escapeMode.
+ */
+ public void setEscapeMode(int escapeMode) throws IllegalArgumentException {
+ if (escapeMode != ESCAPE_MODE_DOUBLED && escapeMode != ESCAPE_MODE_BACKSLASH) {
+ throw new IllegalArgumentException("Parameter escapeMode must be a valid value.");
+ }
+
+ userSettings.EscapeMode = escapeMode;
+ }
+
+ public boolean getSkipEmptyRecords() {
+ return userSettings.SkipEmptyRecords;
+ }
+
+ public void setSkipEmptyRecords(boolean skipEmptyRecords) {
+ userSettings.SkipEmptyRecords = skipEmptyRecords;
+ }
+
+ /**
+ * Safety caution to prevent the parser from using large amounts of memory in the case where
+ * parsing settings like file encodings don't end up matching the actual format of a file. This
+ * switch can be turned off if the file format is known and tested. With the switch off, the max
+ * column lengths and max column count per record supported by the parser will greatly increase.
+ * Default is true.
+ *
+ * @return The current setting of the safety switch.
+ */
+ public boolean getSafetySwitch() {
+ return userSettings.SafetySwitch;
+ }
+
+ /**
+ * Safety caution to prevent the parser from using large amounts of memory in the case where
+ * parsing settings like file encodings don't end up matching the actual format of a file. This
+ * switch can be turned off if the file format is known and tested. With the switch off, the max
+ * column lengths and max column count per record supported by the parser will greatly increase.
+ * Default is true.
+ *
+ * @param safetySwitch
+ */
+ public void setSafetySwitch(boolean safetySwitch) {
+ userSettings.SafetySwitch = safetySwitch;
+ }
+
+ /**
+ * Gets the count of columns found in this record.
+ *
+ * @return The count of columns found in this record.
+ */
+ public int getColumnCount() {
+ return columnsCount;
+ }
+
+ /**
+ * Gets the index of the current record.
+ *
+ * @return The index of the current record.
+ */
+ public long getCurrentRecord() {
+ return currentRecord - 1;
+ }
+
+ public int getHeaderCount() {
+ return headersHolder.Length;
+ }
+
+ /**
+ * Returns the header values as a string array.
+ *
+ * @return The header values as a String array.
+ * @throws IOException Thrown if this object has already been closed.
+ */
+ public String[] getHeaders() throws IOException {
+ checkClosed();
+
+ if (headersHolder.Headers == null) {
+ return null;
+ } else {
+ // use clone here to prevent the outside code from
+ // setting values on the array directly, which would
+ // throw off the index lookup based on header name
+ String[] clone = new String[headersHolder.Length];
+ System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length);
+ return clone;
+ }
+ }
+
+ public void setHeaders(String[] headers) {
+ headersHolder.Headers = headers;
+
+ headersHolder.IndexByName.clear();
+
+ if (headers != null) {
+ headersHolder.Length = headers.length;
+ } else {
+ headersHolder.Length = 0;
+ }
+
+ // use headersHolder.Length here in case headers is null
+ for (int i = 0; i < headersHolder.Length; i++) {
+ headersHolder.IndexByName.put(headers[i], new Integer(i));
+ }
+ }
+
+ public String[] getValues() throws IOException {
+ checkClosed();
+
+ // need to return a clone, and can't use clone because values.Length
+ // might be greater than columnsCount
+ String[] clone = new String[columnsCount];
+ System.arraycopy(values, 0, clone, 0, columnsCount);
+ return clone;
+ }
+
+ /**
+ * Returns the current column value for a given column index.
+ *
+ * @param columnIndex The index of the column.
+ * @return The current column value.
+ * @throws IOException Thrown if this object has already been closed.
+ */
+ public String get(int columnIndex) throws IOException {
+ checkClosed();
+
+ if (columnIndex > -1 && columnIndex < columnsCount) {
+ return values[columnIndex];
+ } else {
+ return "";
+ }
+ }
+
+ /**
+ * Returns the current column value for a given column header name.
+ *
+ * @param headerName The header name of the column.
+ * @return The current column value.
+ * @throws IOException Thrown if this object has already been closed.
+ */
+ public String get(String headerName) throws IOException {
+ checkClosed();
+
+ return get(getIndex(headerName));
+ }
+
+ public static ReaderUtil parse(String data) {
+ if (data == null) {
+ throw new IllegalArgumentException("Parameter data can not be null.");
+ }
+
+ return new ReaderUtil(new StringReader(data));
+ }
+
+ /**
+ * Reads another record.
+ *
+ * @return Whether another record was successfully read or not.
+ * @throws IOException Thrown if an error occurs while reading data from the source stream.
+ */
+ public boolean readRecord() throws IOException {
+ checkClosed();
+
+ columnsCount = 0;
+ rawBuffer.Position = 0;
+
+ dataBuffer.LineStart = dataBuffer.Position;
+
+ hasReadNextLine = false;
+
+ // 确认下是否还存在数据
+
+ if (hasMoreData) {
+ // 遍历数据流,直到找到数据的末尾或找到记录的末尾
+
+ do {
+ if (dataBuffer.Position == dataBuffer.Count) {
+ // 在遍历过程中如果遍历完数据,且还没找到这一行的结束符,则填充数据
+ checkDataLength();
+ } else {
+ startedWithQualifier = false;
+
+ // 抓取当前位置字母作为字符
+
+ char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
+
+ if (userSettings.UseTextQualifier
+ && currentLetter == userSettings.TextQualifier) {
+ // 这将是一个文本限定列,因此我们需要设置startsWithQualifier使其进入单独的分支以处理文本限定列
+
+ lastLetter = currentLetter;
+
+ // read qualified
+ startedColumn = true;
+ // 读到文本限定符意味下一个字节是这个字段的起始位置
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ // 标记一下这个字段是以文本限定符开始的
+ startedWithQualifier = true;
+ boolean lastLetterWasQualifier = false;
+
+ char escapeChar = userSettings.TextQualifier;
+
+ if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
+ escapeChar = Letters.BACKSLASH;
+ }
+
+ // 在一个字段中遇到文本限定符
+ boolean eatingTrailingJunk = false;
+ boolean lastLetterWasEscape = false;
+ boolean readingComplexEscape = false;
+ int escape = ComplexEscape.UNICODE;
+ int escapeLength = 0;
+ char escapeValue = (char) 0;
+
+ // 偏移量+1,即移动到字段开始的偏移位置
+ dataBuffer.Position++;
+
+ // 从起始文本限定符开始循环遍历dataBuffer的数据,获取当前字段的值
+ do {
+ if (dataBuffer.Position == dataBuffer.Count) {
+ // 如果在之前偏移量移动之后到了缓存的数据的最后,则填充数据
+ checkDataLength();
+ } else {
+ // 抓取当前字母作为字符
+
+ currentLetter = dataBuffer.Buffer[dataBuffer.Position];
+
+ if (eatingTrailingJunk) {
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+
+ if (currentLetter == userSettings.Delimiter) {
+ endColumn();
+ } else if ((!useCustomRecordDelimiter
+ && (currentLetter == Letters.CR
+ || currentLetter == Letters.LF))
+ || (useCustomRecordDelimiter
+ && currentLetter
+ == userSettings.RecordDelimiter)) {
+ endColumn();
+
+ endRecord();
+ }
+ } else if (readingComplexEscape) {
+ escapeLength++;
+
+ switch (escape) {
+ case ComplexEscape.UNICODE:
+ escapeValue *= (char) 16;
+ escapeValue += hexToDec(currentLetter);
+
+ if (escapeLength == 4) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ case ComplexEscape.OCTAL:
+ escapeValue *= (char) 8;
+ escapeValue += (char) (currentLetter - '0');
+
+ if (escapeLength == 3) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ case ComplexEscape.DECIMAL:
+ escapeValue *= (char) 10;
+ escapeValue += (char) (currentLetter - '0');
+
+ if (escapeLength == 3) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ case ComplexEscape.HEX:
+ escapeValue *= (char) 16;
+ escapeValue += hexToDec(currentLetter);
+
+ if (escapeLength == 2) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ }
+
+ if (!readingComplexEscape) {
+ appendLetter(escapeValue);
+ } else {
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ }
+ } else if (currentLetter == userSettings.TextQualifier) {
+ // 如果当前读取到的字符为文本限定符
+ if (lastLetterWasEscape) {
+ lastLetterWasEscape = false;
+ lastLetterWasQualifier = false;
+ } else {
+ updateCurrentValue();
+
+ if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) {
+ lastLetterWasEscape = true;
+ }
+
+ lastLetterWasQualifier = true;
+ }
+ } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
+ && lastLetterWasEscape) {
+ switch (currentLetter) {
+ case 'n':
+ appendLetter(Letters.LF);
+ break;
+ case 'r':
+ appendLetter(Letters.CR);
+ break;
+ case 't':
+ appendLetter(Letters.TAB);
+ break;
+ case 'b':
+ appendLetter(Letters.BACKSPACE);
+ break;
+ case 'f':
+ appendLetter(Letters.FORM_FEED);
+ break;
+ case 'e':
+ appendLetter(Letters.ESCAPE);
+ break;
+ case 'v':
+ appendLetter(Letters.VERTICAL_TAB);
+ break;
+ case 'a':
+ appendLetter(Letters.ALERT);
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ escape = ComplexEscape.OCTAL;
+ readingComplexEscape = true;
+ escapeLength = 1;
+ escapeValue = (char) (currentLetter - '0');
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ break;
+ case 'u':
+ case 'x':
+ case 'o':
+ case 'd':
+ case 'U':
+ case 'X':
+ case 'O':
+ case 'D':
+ switch (currentLetter) {
+ case 'u':
+ case 'U':
+ escape = ComplexEscape.UNICODE;
+ break;
+ case 'x':
+ case 'X':
+ escape = ComplexEscape.HEX;
+ break;
+ case 'o':
+ case 'O':
+ escape = ComplexEscape.OCTAL;
+ break;
+ case 'd':
+ case 'D':
+ escape = ComplexEscape.DECIMAL;
+ break;
+ }
+
+ readingComplexEscape = true;
+ escapeLength = 0;
+ escapeValue = (char) 0;
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+
+ break;
+ default:
+ break;
+ }
+
+ lastLetterWasEscape = false;
+
+ // can only happen for ESCAPE_MODE_BACKSLASH
+ } else if (currentLetter == escapeChar) {
+ updateCurrentValue();
+ lastLetterWasEscape = true;
+ } else {
+ if (lastLetterWasQualifier) {
+ // 若上一个字符是文本限定符
+ if (currentLetter == userSettings.Delimiter) {
+ // 且当前字符是字段分隔符,则认为该字段获取完
+ endColumn();
+ } else if ((!useCustomRecordDelimiter
+ && (currentLetter == Letters.CR
+ || currentLetter == Letters.LF))
+ || (useCustomRecordDelimiter
+ && currentLetter
+ == userSettings.RecordDelimiter)) {
+ // 且 当前字符是默认换行符或者是自定义换行符,则认为改记录获取完毕
+ endColumn();
+
+ endRecord();
+ } else {
+ // 且当前字符是其他字符,则认为该字段的值还未结束,
+ // 上个文本限定符是字段内的一个普通字符,偏移量+1,eatingTrailingJunk 置 true
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+
+ eatingTrailingJunk = true;
+ }
+
+ // make sure to clear the flag for next
+ // run of the loop
+
+ lastLetterWasQualifier = false;
+ }
+ }
+
+ // 记录最后获得的字符,需要给下文使用
+ // it for several key decisions
+
+ lastLetter = currentLetter;
+ if (startedColumn) {
+ // 如果已经开始读取这一个字段的数据了,偏移量 +1
+ dataBuffer.Position++;
+
+ if (userSettings.SafetySwitch
+ && dataBuffer.Position
+ - dataBuffer.ColumnStart
+ + columnBuffer.Position
+ > 100000) {
+ close();
+
+ throw new IOException(
+ "Maximum column length of 100,000 exceeded in column "
+ + NumberFormat.getIntegerInstance()
+ .format(columnsCount)
+ + " in record "
+ + NumberFormat.getIntegerInstance()
+ .format(currentRecord)
+ + ". Set the SafetySwitch property to false"
+ + " if you're expecting column lengths greater than 100,000 characters to"
+ + " avoid this error.");
+ }
+ }
+ } // end else
+
+ } while (hasMoreData && startedColumn);
+ } else if (currentLetter == userSettings.Delimiter) {
+ // we encountered a column with no data, so
+ // just send the end column
+
+ lastLetter = currentLetter;
+
+ endColumn();
+ } else if (useCustomRecordDelimiter
+ && currentLetter == userSettings.RecordDelimiter) {
+ // this will skip blank lines
+ if (startedColumn || columnsCount > 0 || !userSettings.SkipEmptyRecords) {
+ endColumn();
+
+ endRecord();
+ } else {
+ dataBuffer.LineStart = dataBuffer.Position + 1;
+ }
+
+ lastLetter = currentLetter;
+ } else if (!useCustomRecordDelimiter
+ && (currentLetter == Letters.CR || currentLetter == Letters.LF)) {
+ // this will skip blank lines
+ if (startedColumn
+ || columnsCount > 0
+ || (!userSettings.SkipEmptyRecords
+ && (currentLetter == Letters.CR
+ || lastLetter != Letters.CR))) {
+ endColumn();
+
+ endRecord();
+ } else {
+ dataBuffer.LineStart = dataBuffer.Position + 1;
+ }
+
+ lastLetter = currentLetter;
+ } else if (userSettings.UseComments
+ && columnsCount == 0
+ && currentLetter == userSettings.Comment) {
+ // encountered a comment character at the beginning of
+ // the line so just ignore the rest of the line
+
+ lastLetter = currentLetter;
+
+ skipLine();
+ } else if (userSettings.TrimWhitespace
+ && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) {
+ // do nothing, this will trim leading whitespace
+ // for both text qualified columns and non
+
+ startedColumn = true;
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ } else {
+ // since the letter wasn't a special letter, this
+ // will be the first letter of our current column
+
+ startedColumn = true;
+ dataBuffer.ColumnStart = dataBuffer.Position;
+ boolean lastLetterWasBackslash = false;
+ boolean readingComplexEscape = false;
+ int escape = ComplexEscape.UNICODE;
+ int escapeLength = 0;
+ char escapeValue = (char) 0;
+
+ boolean firstLoop = true;
+
+ do {
+ if (!firstLoop && dataBuffer.Position == dataBuffer.Count) {
+ checkDataLength();
+ } else {
+ if (!firstLoop) {
+ // grab the current letter as a char
+ currentLetter = dataBuffer.Buffer[dataBuffer.Position];
+ }
+
+ if (!userSettings.UseTextQualifier
+ && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
+ && currentLetter == Letters.BACKSLASH) {
+ if (lastLetterWasBackslash) {
+ lastLetterWasBackslash = false;
+ } else {
+ updateCurrentValue();
+ lastLetterWasBackslash = true;
+ }
+ } else if (readingComplexEscape) {
+ escapeLength++;
+
+ switch (escape) {
+ case ComplexEscape.UNICODE:
+ escapeValue *= (char) 16;
+ escapeValue += hexToDec(currentLetter);
+
+ if (escapeLength == 4) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ case ComplexEscape.OCTAL:
+ escapeValue *= (char) 8;
+ escapeValue += (char) (currentLetter - '0');
+
+ if (escapeLength == 3) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ case ComplexEscape.DECIMAL:
+ escapeValue *= (char) 10;
+ escapeValue += (char) (currentLetter - '0');
+
+ if (escapeLength == 3) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ case ComplexEscape.HEX:
+ escapeValue *= (char) 16;
+ escapeValue += hexToDec(currentLetter);
+
+ if (escapeLength == 2) {
+ readingComplexEscape = false;
+ }
+
+ break;
+ }
+
+ if (!readingComplexEscape) {
+ appendLetter(escapeValue);
+ } else {
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ }
+ } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH
+ && lastLetterWasBackslash) {
+ switch (currentLetter) {
+ case 'n':
+ appendLetter(Letters.LF);
+ break;
+ case 'r':
+ appendLetter(Letters.CR);
+ break;
+ case 't':
+ appendLetter(Letters.TAB);
+ break;
+ case 'b':
+ appendLetter(Letters.BACKSPACE);
+ break;
+ case 'f':
+ appendLetter(Letters.FORM_FEED);
+ break;
+ case 'e':
+ appendLetter(Letters.ESCAPE);
+ break;
+ case 'v':
+ appendLetter(Letters.VERTICAL_TAB);
+ break;
+ case 'a':
+ appendLetter(Letters.ALERT);
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ escape = ComplexEscape.OCTAL;
+ readingComplexEscape = true;
+ escapeLength = 1;
+ escapeValue = (char) (currentLetter - '0');
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ break;
+ case 'u':
+ case 'x':
+ case 'o':
+ case 'd':
+ case 'U':
+ case 'X':
+ case 'O':
+ case 'D':
+ switch (currentLetter) {
+ case 'u':
+ case 'U':
+ escape = ComplexEscape.UNICODE;
+ break;
+ case 'x':
+ case 'X':
+ escape = ComplexEscape.HEX;
+ break;
+ case 'o':
+ case 'O':
+ escape = ComplexEscape.OCTAL;
+ break;
+ case 'd':
+ case 'D':
+ escape = ComplexEscape.DECIMAL;
+ break;
+ }
+
+ readingComplexEscape = true;
+ escapeLength = 0;
+ escapeValue = (char) 0;
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+
+ break;
+ default:
+ break;
+ }
+
+ lastLetterWasBackslash = false;
+ } else {
+ if (currentLetter == userSettings.Delimiter) {
+ endColumn();
+ } else if ((!useCustomRecordDelimiter
+ && (currentLetter == Letters.CR
+ || currentLetter == Letters.LF))
+ || (useCustomRecordDelimiter
+ && currentLetter
+ == userSettings.RecordDelimiter)) {
+ endColumn();
+
+ endRecord();
+ }
+ }
+
+ // keep track of the last letter because we need
+ // it for several key decisions
+
+ lastLetter = currentLetter;
+ firstLoop = false;
+
+ if (startedColumn) {
+ dataBuffer.Position++;
+
+ if (userSettings.SafetySwitch
+ && dataBuffer.Position
+ - dataBuffer.ColumnStart
+ + columnBuffer.Position
+ > 100000) {
+ close();
+
+ throw new IOException(
+ "Maximum column length of 100,000 exceeded in column "
+ + NumberFormat.getIntegerInstance()
+ .format(columnsCount)
+ + " in record "
+ + NumberFormat.getIntegerInstance()
+ .format(currentRecord)
+ + ". Set the SafetySwitch property to false"
+ + " if you're expecting column lengths greater than 100,000 characters to"
+ + " avoid this error.");
+ }
+ }
+ } // end else
+ } while (hasMoreData && startedColumn);
+ }
+
+ if (hasMoreData) {
+ dataBuffer.Position++;
+ }
+ } // end else
+ } while (hasMoreData && !hasReadNextLine);
+
+ // check to see if we hit the end of the file
+ // without processing the current record
+
+ if (startedColumn || lastLetter == userSettings.Delimiter) {
+ endColumn();
+
+ endRecord();
+ }
+ }
+
+ if (userSettings.CaptureRawRecord) {
+ if (hasMoreData) {
+ if (rawBuffer.Position == 0) {
+ rawRecord =
+ new String(
+ dataBuffer.Buffer,
+ dataBuffer.LineStart,
+ dataBuffer.Position - dataBuffer.LineStart - 1);
+ nextOffset = nextOffset + dataBuffer.Position - dataBuffer.LineStart;
+ } else {
+ rawRecord =
+ new String(rawBuffer.Buffer, 0, rawBuffer.Position)
+ + new String(
+ dataBuffer.Buffer,
+ dataBuffer.LineStart,
+ dataBuffer.Position - dataBuffer.LineStart - 1);
+ nextOffset =
+ nextOffset
+ + rawBuffer.Position
+ + dataBuffer.Position
+ - dataBuffer.LineStart;
+ }
+ } else {
+ // for hasMoreData to ever be false, all data would have had to
+ // have been
+ // copied to the raw buffer
+ rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position);
+ nextOffset = nextOffset + rawBuffer.Position;
+ }
+ } else {
+ rawRecord = "";
+ }
+
+ return hasReadNextLine;
+ }
+
+ /** @throws IOException Thrown if an error occurs while reading data from the source stream. */
+ private void checkDataLength() throws IOException {
+ if (!initialized) {
+ if (fileName != null) {
+ inputStream =
+ new BufferedReader(
+ new InputStreamReader(new FileInputStream(fileName), charset),
+ StaticSettings.MAX_FILE_BUFFER_SIZE);
+ }
+
+ charset = null;
+ initialized = true;
+ }
+
+ updateCurrentValue();
+
+ if (userSettings.CaptureRawRecord && dataBuffer.Count > 0) {
+ if (rawBuffer.Buffer.length - rawBuffer.Position
+ < dataBuffer.Count - dataBuffer.LineStart) {
+ int newLength =
+ rawBuffer.Buffer.length
+ + Math.max(
+ dataBuffer.Count - dataBuffer.LineStart,
+ rawBuffer.Buffer.length);
+
+ char[] holder = new char[newLength];
+
+ System.arraycopy(rawBuffer.Buffer, 0, holder, 0, rawBuffer.Position);
+
+ rawBuffer.Buffer = holder;
+ }
+
+ System.arraycopy(
+ dataBuffer.Buffer,
+ dataBuffer.LineStart,
+ rawBuffer.Buffer,
+ rawBuffer.Position,
+ dataBuffer.Count - dataBuffer.LineStart);
+
+ rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart;
+ }
+
+ try {
+ dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0, dataBuffer.Buffer.length);
+ } catch (IOException ex) {
+ close();
+
+ throw ex;
+ }
+
+ // if no more data could be found, set flag stating that
+ // the end of the data was found
+
+ // 如果没有更多的数据读取了,则认为文件读完了
+ if (dataBuffer.Count == -1) {
+ hasMoreData = false;
+ }
+
+ // 重置偏移量
+ dataBuffer.Position = 0;
+ dataBuffer.LineStart = 0;
+ dataBuffer.ColumnStart = 0;
+ }
+
+ /**
+ * Read the first record of data as column headers.
+ *
+ * @return Whether the header record was successfully read or not.
+ * @throws IOException Thrown if an error occurs while reading data from the source stream.
+ */
+ public boolean readHeaders() throws IOException {
+ boolean result = readRecord();
+
+ // copy the header data from the column array
+ // to the header string array
+
+ headersHolder.Length = columnsCount;
+
+ headersHolder.Headers = new String[columnsCount];
+
+ for (int i = 0; i < headersHolder.Length; i++) {
+ String columnValue = get(i);
+
+ headersHolder.Headers[i] = columnValue;
+
+ // if there are duplicate header names, we will save the last one
+ headersHolder.IndexByName.put(columnValue, new Integer(i));
+ }
+
+ if (result) {
+ currentRecord--;
+ }
+
+ columnsCount = 0;
+
+ return result;
+ }
+
+ /**
+ * Returns the column header value for a given column index.
+ *
+ * @param columnIndex The index of the header column being requested.
+ * @return The value of the column header at the given column index.
+ * @throws IOException Thrown if this object has already been closed.
+ */
+ public String getHeader(int columnIndex) throws IOException {
+ checkClosed();
+
+ // check to see if we have read the header record yet
+
+ // check to see if the column index is within the bounds
+ // of our header array
+
+ if (columnIndex > -1 && columnIndex < headersHolder.Length) {
+ // return the processed header data for this column
+
+ return headersHolder.Headers[columnIndex];
+ } else {
+ return "";
+ }
+ }
+
+ public boolean isQualified(int columnIndex) throws IOException {
+ checkClosed();
+
+ if (columnIndex < columnsCount && columnIndex > -1) {
+ return isQualified[columnIndex];
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * @throws IOException Thrown if a very rare extreme exception occurs during parsing, normally
+ * resulting from improper data format.
+ */
+ private void endColumn() throws IOException {
+ String currentValue = "";
+
+ // must be called before setting startedColumn = false
+ if (startedColumn) {
+ if (columnBuffer.Position == 0) {
+ // columnBuffer 中没有缓存
+ if (dataBuffer.ColumnStart < dataBuffer.Position) {
+ int lastLetter = dataBuffer.Position - 1;
+
+ if (userSettings.TrimWhitespace && !startedWithQualifier) {
+ while (lastLetter >= dataBuffer.ColumnStart
+ && (dataBuffer.Buffer[lastLetter] == Letters.SPACE
+ || dataBuffer.Buffer[lastLetter] == Letters.TAB)) {
+ lastLetter--;
+ }
+ }
+
+ currentValue =
+ new String(
+ dataBuffer.Buffer,
+ dataBuffer.ColumnStart,
+ lastLetter - dataBuffer.ColumnStart + 1);
+ }
+ } else {
+ // columnBuffer 中存在缓存,将当前数据缓冲区的剩余字段的字符填入字段缓存中,并从中获取字段的值
+ updateCurrentValue();
+
+ int lastLetter = columnBuffer.Position - 1;
+
+ // 去空格
+ if (userSettings.TrimWhitespace && !startedWithQualifier) {
+ while (lastLetter >= 0
+ && (columnBuffer.Buffer[lastLetter] == Letters.SPACE
+ || columnBuffer.Buffer[lastLetter] == Letters.SPACE)) {
+ lastLetter--;
+ }
+ }
+
+ currentValue = new String(columnBuffer.Buffer, 0, lastLetter + 1);
+ }
+ }
+
+ columnBuffer.Position = 0;
+
+ startedColumn = false;
+
+ if (columnsCount >= 100000 && userSettings.SafetySwitch) {
+ close();
+
+ throw new IOException(
+ "Maximum column count of 100,000 exceeded in record "
+ + NumberFormat.getIntegerInstance().format(currentRecord)
+ + ". Set the SafetySwitch property to false"
+ + " if you're expecting more than 100,000 columns per record to"
+ + " avoid this error.");
+ }
+
+ // check to see if our current holder array for
+ // column chunks is still big enough to handle another
+ // column chunk
+
+ if (columnsCount == values.length) {
+ // holder array needs to grow to be able to hold another column
+ int newLength = values.length * 2;
+
+ String[] holder = new String[newLength];
+
+ System.arraycopy(values, 0, holder, 0, values.length);
+
+ values = holder;
+
+ boolean[] qualifiedHolder = new boolean[newLength];
+
+ System.arraycopy(isQualified, 0, qualifiedHolder, 0, isQualified.length);
+
+ isQualified = qualifiedHolder;
+ }
+
+ values[columnsCount] = currentValue;
+
+ isQualified[columnsCount] = startedWithQualifier;
+
+ currentValue = "";
+
+ columnsCount++;
+ }
+
+ private void appendLetter(char letter) {
+ if (columnBuffer.Position == columnBuffer.Buffer.length) {
+ int newLength = columnBuffer.Buffer.length * 2;
+
+ char[] holder = new char[newLength];
+
+ System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position);
+
+ columnBuffer.Buffer = holder;
+ }
+ columnBuffer.Buffer[columnBuffer.Position++] = letter;
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ }
+
+ /** 更新当前值,缓存 */
+ private void updateCurrentValue() {
+ if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) {
+ // 若已经开始读取一个字段,并且字段的起始偏移量比当前读取到的偏移量要小时
+ if (columnBuffer.Buffer.length - columnBuffer.Position
+ < dataBuffer.Position - dataBuffer.ColumnStart) {
+ // 如果字段缓存的长度减去当前读取到的偏移量小于当前已经读取的长度,对 columnBuffer 进行扩容
+ int newLength =
+ columnBuffer.Buffer.length
+ + Math.max(
+ dataBuffer.Position - dataBuffer.ColumnStart,
+ columnBuffer.Buffer.length);
+
+ char[] holder = new char[newLength];
+
+ System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position);
+
+ columnBuffer.Buffer = holder;
+ }
+
+ System.arraycopy(
+ dataBuffer.Buffer,
+ dataBuffer.ColumnStart,
+ columnBuffer.Buffer,
+ columnBuffer.Position,
+ dataBuffer.Position - dataBuffer.ColumnStart);
+
+ // 字段缓存的偏移量为缓存的数据大小
+ columnBuffer.Position += dataBuffer.Position - dataBuffer.ColumnStart;
+ }
+
+ dataBuffer.ColumnStart = dataBuffer.Position + 1;
+ }
+
+ /** @throws IOException Thrown if an error occurs while reading data from the source stream. */
+ private void endRecord() throws IOException {
+ // 将 hasReadNextLine 标志置 true 用于跳出循环
+
+ hasReadNextLine = true;
+ currentRecord++;
+ }
+
+ /**
+ * Gets the corresponding column index for a given column header name.
+ *
+ * @param headerName The header name of the column.
+ * @return The column index for the given column header name. Returns -1 if not found.
+ * @throws IOException Thrown if this object has already been closed.
+ */
+ public int getIndex(String headerName) throws IOException {
+ checkClosed();
+
+ Object indexValue = headersHolder.IndexByName.get(headerName);
+
+ if (indexValue != null) {
+ return ((Integer) indexValue).intValue();
+ } else {
+ return -1;
+ }
+ }
+
+ public boolean skipRecord() throws IOException {
+ checkClosed();
+
+ boolean recordRead = false;
+
+ if (hasMoreData) {
+ recordRead = readRecord();
+
+ if (recordRead) {
+ currentRecord--;
+ }
+ }
+
+ return recordRead;
+ }
+
+ /**
+ * Skips the next line of data using the standard end of line characters and does not do any
+ * column delimited parsing.
+ *
+ * @return Whether a line was successfully skipped or not.
+ * @throws IOException Thrown if an error occurs while reading data from the source stream.
+ */
+ public boolean skipLine() throws IOException {
+ checkClosed();
+
+ // clear public column values for current line
+
+ columnsCount = 0;
+
+ boolean skippedLine = false;
+
+ if (hasMoreData) {
+ boolean foundEol = false;
+
+ do {
+ if (dataBuffer.Position == dataBuffer.Count) {
+ checkDataLength();
+ } else {
+ skippedLine = true;
+
+ // grab the current letter as a char
+
+ char currentLetter = dataBuffer.Buffer[dataBuffer.Position];
+
+ if (currentLetter == Letters.CR || currentLetter == Letters.LF) {
+ foundEol = true;
+ }
+
+ // keep track of the last letter because we need
+ // it for several key decisions
+
+ lastLetter = currentLetter;
+
+ if (!foundEol) {
+ dataBuffer.Position++;
+ }
+ } // end else
+ } while (hasMoreData && !foundEol);
+
+ columnBuffer.Position = 0;
+
+ dataBuffer.LineStart = dataBuffer.Position + 1;
+ }
+
+ rawBuffer.Position = 0;
+ rawRecord = "";
+
+ return skippedLine;
+ }
+
+ /** Closes and releases all related resources. */
+ public void close() {
+ if (!closed) {
+ close(true);
+
+ closed = true;
+ }
+ }
+
+ /** */
+ private void close(boolean closing) {
+ if (!closed) {
+ if (closing) {
+ charset = null;
+ headersHolder.Headers = null;
+ headersHolder.IndexByName = null;
+ dataBuffer.Buffer = null;
+ columnBuffer.Buffer = null;
+ rawBuffer.Buffer = null;
+ }
+
+ try {
+ if (initialized) {
+ inputStream.close();
+ }
+ } catch (Exception e) {
+ // just eat the exception
+ }
+
+ inputStream = null;
+
+ closed = true;
+ }
+ }
+
+ /** @throws IOException Thrown if this object has already been closed. */
+ private void checkClosed() throws IOException {
+ if (closed) {
+ throw new IOException("This instance of the CsvReader class has already been closed.");
+ }
+ }
+
+ /** */
+ @Override
+ protected void finalize() {
+ close(false);
+ }
+
+ private class ComplexEscape {
+ private static final int UNICODE = 1;
+
+ private static final int OCTAL = 2;
+
+ private static final int DECIMAL = 3;
+
+ private static final int HEX = 4;
+ }
+
+ private static char hexToDec(char hex) {
+ char result;
+
+ if (hex >= 'a') {
+ result = (char) (hex - 'a' + 10);
+ } else if (hex >= 'A') {
+ result = (char) (hex - 'A' + 10);
+ } else {
+ result = (char) (hex - '0');
+ }
+
+ return result;
+ }
+
+ private class DataBuffer {
+ // 缓存的数据,默认大小为1024
+ public char[] Buffer;
+
+ // 读取的偏移量
+ public int Position;
+
+ // 单次从流中读取并储存到缓存的数据量大小,可能小于缓存长度。
+ public int Count;
+
+ // /
+ // / The position of the cursor in the buffer when the
+ // / current column was started or the last time data
+ // / was moved out to the column buffer.
+ // /
+ public int ColumnStart;
+
+ public int LineStart;
+
+ public DataBuffer() {
+ Buffer = new char[StaticSettings.MAX_BUFFER_SIZE];
+ Position = 0;
+ Count = 0;
+ ColumnStart = 0;
+ LineStart = 0;
+ }
+ }
+
+ private class ColumnBuffer {
+ public char[] Buffer;
+
+ public int Position;
+
+ public ColumnBuffer() {
+ Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE];
+ Position = 0;
+ }
+ }
+
+ private class RawRecordBuffer {
+ public char[] Buffer;
+
+ public int Position;
+
+ public RawRecordBuffer() {
+ Buffer =
+ new char
+ [StaticSettings.INITIAL_COLUMN_BUFFER_SIZE
+ * StaticSettings.INITIAL_COLUMN_COUNT];
+ Position = 0;
+ }
+ }
+
+ private class Letters {
+ public static final char LF = '\n';
+
+ public static final char CR = '\r';
+
+ public static final char QUOTE = '"';
+
+ public static final char COMMA = ',';
+
+ public static final char SPACE = ' ';
+
+ public static final char TAB = '\t';
+
+ public static final char POUND = '#';
+
+ public static final char BACKSLASH = '\\';
+
+ public static final char NULL = '\0';
+
+ public static final char BACKSPACE = '\b';
+
+ public static final char FORM_FEED = '\f';
+
+ public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape
+
+ public static final char VERTICAL_TAB = '\u000B';
+
+ public static final char ALERT = '\u0007';
+ }
+
+ private class UserSettings {
+ // having these as publicly accessible members will prevent
+ // the overhead of the method call that exists on properties
+ public boolean CaseSensitive;
+
+ public char TextQualifier;
+
+ public boolean TrimWhitespace;
+
+ public boolean UseTextQualifier;
+
+ public char Delimiter;
+
+ public char RecordDelimiter;
+
+ public char Comment;
+
+ public boolean UseComments;
+
+ public int EscapeMode;
+
+ public boolean SafetySwitch;
+
+ public boolean SkipEmptyRecords;
+
+ public boolean CaptureRawRecord;
+
+ public UserSettings() {
+ CaseSensitive = true;
+ TextQualifier = Letters.QUOTE;
+ TrimWhitespace = true;
+ UseTextQualifier = false;
+ Delimiter = Letters.COMMA;
+ RecordDelimiter = Letters.NULL;
+ Comment = Letters.POUND;
+ UseComments = false;
+ EscapeMode = ReaderUtil.ESCAPE_MODE_DOUBLED;
+ SafetySwitch = true;
+ SkipEmptyRecords = true;
+ CaptureRawRecord = true;
+ }
+ }
+
+ private class HeadersHolder {
+ public String[] Headers;
+
+ public int Length;
+
+ public HashMap IndexByName;
+
+ public HeadersHolder() {
+ Headers = null;
+ Length = 0;
+ IndexByName = new HashMap();
+ }
+ }
+
+ private class StaticSettings {
+ // these are static instead of final so they can be changed in unit test
+ // isn't visible outside this class and is only accessed once during
+ // CsvReader construction
+ public static final int MAX_BUFFER_SIZE = 1024;
+
+ public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024;
+
+ public static final int INITIAL_COLUMN_COUNT = 10;
+
+ public static final int INITIAL_COLUMN_BUFFER_SIZE = 50;
+ }
+
+ public long getNextOffset() {
+ return nextOffset;
+ }
+
+ public void setNextOffset(long nextOffset) {
+ this.nextOffset = nextOffset;
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3SimpleObject.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3SimpleObject.java
new file mode 100644
index 0000000000..5dc6198d0f
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3SimpleObject.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.util;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+/** @author jier */
+public class S3SimpleObject implements Serializable {
+
+ private static final long serialVersionUID = -7199607264925678753L;
+
+ private String key;
+
+ public S3SimpleObject() {}
+
+ public S3SimpleObject(String key) {
+ this.key = key;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ S3SimpleObject that = (S3SimpleObject) o;
+ return Objects.equals(key, that.key);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(key);
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3Util.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3Util.java
new file mode 100644
index 0000000000..3097edd0cd
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/S3Util.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.util;
+
+import com.dtstack.chunjun.connector.s3.conf.S3Conf;
+import com.dtstack.chunjun.util.GsonUtil;
+
+import com.amazonaws.ClientConfiguration;
+import com.amazonaws.Protocol;
+import com.amazonaws.auth.AWSStaticCredentialsProvider;
+import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.client.builder.AwsClientBuilder;
+import com.amazonaws.regions.Regions;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3Client;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
+import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
+import com.amazonaws.services.s3.model.GetObjectRequest;
+import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
+import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
+import com.amazonaws.services.s3.model.ListObjectsRequest;
+import com.amazonaws.services.s3.model.ListObjectsV2Request;
+import com.amazonaws.services.s3.model.ListObjectsV2Result;
+import com.amazonaws.services.s3.model.ObjectListing;
+import com.amazonaws.services.s3.model.PartETag;
+import com.amazonaws.services.s3.model.PutObjectResult;
+import com.amazonaws.services.s3.model.S3ObjectSummary;
+import com.amazonaws.services.s3.model.UploadPartRequest;
+import com.amazonaws.services.s3.model.UploadPartResult;
+import org.apache.commons.lang.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * a util for connect to AmazonS3
+ *
+ * @author jier
+ */
+public class S3Util {
+ private static final Logger LOG = LoggerFactory.getLogger(S3Util.class);
+
+ public static AmazonS3 getS3Client(S3Conf s3Config) {
+ if (s3Config != null) {
+ if (StringUtils.isNotBlank(s3Config.getRegion())) {
+ Regions clientRegion = Regions.fromName(s3Config.getRegion());
+ AmazonS3ClientBuilder builder =
+ AmazonS3ClientBuilder.standard()
+ .withCredentials(
+ new AWSStaticCredentialsProvider(
+ new BasicAWSCredentials(
+ s3Config.getAccessKey(),
+ s3Config.getSecretKey())));
+ if (null != s3Config.getEndpoint() && !"".equals(s3Config.getEndpoint().trim())) {
+ builder =
+ builder.withEndpointConfiguration(
+ new AwsClientBuilder.EndpointConfiguration(
+ s3Config.getEndpoint(), clientRegion.getName()));
+ } else {
+ builder = builder.withRegion(clientRegion.getName());
+ }
+
+ return builder.build();
+ } else {
+ BasicAWSCredentials cred =
+ new BasicAWSCredentials(s3Config.getAccessKey(), s3Config.getSecretKey());
+ ClientConfiguration ccfg = new ClientConfiguration();
+ if (StringUtils.isBlank(s3Config.getProtocol())
+ || "HTTP".equals(s3Config.getProtocol())) {
+ ccfg.setProtocol(Protocol.HTTP);
+ } else {
+ ccfg.setProtocol(Protocol.HTTPS);
+ }
+ AmazonS3Client client = new AmazonS3Client(cred, ccfg);
+ client.setEndpoint(s3Config.getEndpoint());
+ return client;
+ }
+ } else {
+ // todo: throw exception
+ return null;
+ }
+ }
+
+ public static PutObjectResult putStringObject(
+ AmazonS3 s3Client, String bucketName, String key, String content) {
+ return s3Client.putObject(bucketName, key, content);
+ }
+
+ public static List listObjectsKeyByPrefix(
+ AmazonS3 s3Client, String bucketName, String prefix, int fetchSize) {
+ List objects = new ArrayList<>(fetchSize);
+ ListObjectsV2Request req =
+ new ListObjectsV2Request().withBucketName(bucketName).withMaxKeys(fetchSize);
+ if (StringUtils.isNotBlank(prefix)) {
+ req.setPrefix(prefix);
+ }
+ ListObjectsV2Result result;
+ do {
+ result = s3Client.listObjectsV2(req);
+
+ for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
+ objects.add(objectSummary.getKey());
+ }
+ String token = result.getNextContinuationToken();
+ req.setContinuationToken(token);
+ if (LOG.isDebugEnabled()) {
+ if (objects.size() > 1024) {
+ LOG.debug(
+ "nextToken {}, result.isTruncated {}, objectsize {}",
+ token,
+ result.isTruncated(),
+ objects.size());
+ } else {
+ LOG.debug(
+ "nextToken {}, result.isTruncated {}, objects {}",
+ token,
+ result.isTruncated(),
+ GsonUtil.GSON.toJson(objects));
+ }
+ }
+ } while (result.isTruncated());
+ return objects;
+ }
+
+ public static List listObjectsByv1(
+ AmazonS3 s3Client, String bucketName, String prefix, int fetchSize) {
+ List objects = new ArrayList<>(fetchSize);
+
+ ListObjectsRequest req = new ListObjectsRequest(bucketName, prefix, null, null, fetchSize);
+ ObjectListing ol;
+ do {
+ ol = s3Client.listObjects(req);
+
+ for (S3ObjectSummary os : ol.getObjectSummaries()) {
+ objects.add(os.getKey());
+ }
+
+ if (ol.isTruncated()) {
+ // next page
+ String marker = ol.getNextMarker();
+ if (StringUtils.isNotBlank(marker)) {
+ req.setMarker(marker);
+ if (LOG.isDebugEnabled()) {
+ if (objects.size() > 1024) {
+ LOG.debug(
+ "nextToken {}, result.isTruncated {}, objectsSize {}",
+ marker,
+ true,
+ objects.size());
+ } else {
+ LOG.debug(
+ "nextToken {}, result.isTruncated {}, objects {}",
+ marker,
+ true,
+ GsonUtil.GSON.toJson(objects));
+ }
+ }
+ } else {
+ LOG.warn("Warning: missing NextMarker when IsTruncated");
+ }
+ }
+ } while (ol.isTruncated());
+ return objects;
+ }
+
+ public static boolean doesObjectExist(AmazonS3 s3Client, String bucketName, String object) {
+ return s3Client.doesObjectExist(bucketName, object);
+ }
+
+ /**
+ * get S3SimpleObject{@link S3SimpleObject} from AWS S3
+ *
+ * @param object
+ * @return
+ */
+ public static S3SimpleObject getS3SimpleObject(String object) {
+ return new S3SimpleObject(object);
+ }
+
+ public static void deleteObject(AmazonS3 s3Client, String bucketName, String object) {
+ s3Client.deleteObject(bucketName, object);
+ }
+
+ public static void closeS3(AmazonS3 amazonS3) {
+ if (amazonS3 != null) {
+ amazonS3.shutdown();
+ amazonS3 = null;
+ }
+ }
+
+ public static String initiateMultipartUploadAndGetId(
+ AmazonS3 s3Client, String bucketName, String object) {
+ InitiateMultipartUploadRequest initRequest =
+ new InitiateMultipartUploadRequest(bucketName, object);
+ InitiateMultipartUploadResult initResponse = s3Client.initiateMultipartUpload(initRequest);
+ return initResponse.getUploadId();
+ }
+
+ public static PartETag uploadPart(
+ AmazonS3 s3Client,
+ String bucketName,
+ String object,
+ String uploadId,
+ int partNumber,
+ byte[] data) {
+ InputStream inputStream = new ByteArrayInputStream(data);
+
+ UploadPartRequest uploadRequest =
+ new UploadPartRequest()
+ .withBucketName(bucketName)
+ .withKey(object)
+ .withUploadId(uploadId)
+ .withPartNumber(partNumber)
+ .withInputStream(inputStream)
+ .withPartSize(data.length);
+ UploadPartResult uploadResult = s3Client.uploadPart(uploadRequest);
+ return uploadResult.getPartETag();
+ }
+
+ public static void completeMultipartUpload(
+ AmazonS3 s3Client,
+ String bucketName,
+ String object,
+ String uploadId,
+ List partETags) {
+ CompleteMultipartUploadRequest compRequest =
+ new CompleteMultipartUploadRequest(bucketName, object, uploadId, partETags);
+ s3Client.completeMultipartUpload(compRequest);
+ }
+
+ public static void abortMultipartUpload(
+ AmazonS3 s3Client, String bucketName, String object, String uploadId) {
+ s3Client.abortMultipartUpload(
+ new AbortMultipartUploadRequest(bucketName, object, uploadId));
+ }
+
+ public static long getFileSize(AmazonS3 s3Client, String bucketName, String keyName) {
+ GetObjectRequest getObjectRequest = new GetObjectRequest(bucketName, keyName);
+ return s3Client.getObject(getObjectRequest).getObjectMetadata().getInstanceLength();
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/WriterUtil.java b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/WriterUtil.java
new file mode 100644
index 0000000000..9ecf3a7d75
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/java/com/dtstack/chunjun/connector/s3/util/WriterUtil.java
@@ -0,0 +1,578 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.dtstack.chunjun.connector.s3.util;
+
+import com.dtstack.chunjun.throwable.WriteRecordException;
+import com.dtstack.chunjun.util.StringUtil;
+
+import org.apache.flink.types.Row;
+
+import java.io.BufferedWriter;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.List;
+
+/** A stream based writer for writing delimited text data to a file or a stream. */
+public class WriterUtil {
+ private Writer outputStream = null;
+
+ private String fileName = null;
+
+ private boolean firstColumn = true;
+
+ private boolean useCustomRecordDelimiter = false;
+
+ private Charset charset = null;
+
+ // this holds all the values for switches that the user is allowed to set
+ private UserSettings userSettings = new UserSettings();
+
+ private boolean initialized = false;
+
+ private boolean closed = false;
+
+ private String systemRecordDelimiter = System.getProperty("line.separator");
+
+ /** Double up the text qualifier to represent an occurrence of the text qualifier. */
+ public static final int ESCAPE_MODE_DOUBLED = 1;
+
+ /**
+ * Use a backslash character before the text qualifier to represent an occurrence of the text
+ * qualifier.
+ */
+ public static final int ESCAPE_MODE_BACKSLASH = 2;
+
+ public WriterUtil(String fileName, char delimiter, Charset charset) {
+ if (fileName == null) {
+ throw new IllegalArgumentException("Parameter fileName can not be null.");
+ }
+
+ if (charset == null) {
+ throw new IllegalArgumentException("Parameter charset can not be null.");
+ }
+
+ this.fileName = fileName;
+ userSettings.Delimiter = delimiter;
+ this.charset = charset;
+ }
+
+ public WriterUtil(String fileName) {
+ this(fileName, Letters.COMMA, Charset.forName("ISO-8859-1"));
+ }
+
+ public WriterUtil(Writer outputStream, char delimiter) {
+ if (outputStream == null) {
+ throw new IllegalArgumentException("Parameter outputStream can not be null.");
+ }
+
+ this.outputStream = outputStream;
+ userSettings.Delimiter = delimiter;
+ initialized = true;
+ }
+
+ public WriterUtil(OutputStream outputStream, char delimiter, Charset charset) {
+ this(new OutputStreamWriter(outputStream, charset), delimiter);
+ }
+
+ /**
+ * Gets the character being used as the column delimiter.
+ *
+ * @return The character being used as the column delimiter.
+ */
+ public char getDelimiter() {
+ return userSettings.Delimiter;
+ }
+
+ /**
+ * Sets the character to use as the column delimiter.
+ *
+ * @param delimiter The character to use as the column delimiter.
+ */
+ public void setDelimiter(char delimiter) {
+ userSettings.Delimiter = delimiter;
+ }
+
+ public char getRecordDelimiter() {
+ return userSettings.RecordDelimiter;
+ }
+
+ /**
+ * Sets the character to use as the record delimiter.
+ *
+ * @param recordDelimiter The character to use as the record delimiter. Default is combination
+ * of standard end of line characters for Windows, Unix, or Mac.
+ */
+ public void setRecordDelimiter(char recordDelimiter) {
+ useCustomRecordDelimiter = true;
+ userSettings.RecordDelimiter = recordDelimiter;
+ }
+
+ /**
+ * Gets the character to use as a text qualifier in the data.
+ *
+ * @return The character to use as a text qualifier in the data.
+ */
+ public char getTextQualifier() {
+ return userSettings.TextQualifier;
+ }
+
+ /**
+ * Sets the character to use as a text qualifier in the data.
+ *
+ * @param textQualifier The character to use as a text qualifier in the data.
+ */
+ public void setTextQualifier(char textQualifier) {
+ userSettings.TextQualifier = textQualifier;
+ }
+
+ /**
+ * Whether text qualifiers will be used while writing data or not.
+ *
+ * @return Whether text qualifiers will be used while writing data or not.
+ */
+ public boolean getUseTextQualifier() {
+ return userSettings.UseTextQualifier;
+ }
+
+ /**
+ * Sets whether text qualifiers will be used while writing data or not.
+ *
+ * @param useTextQualifier Whether to use a text qualifier while writing data or not.
+ */
+ public void setUseTextQualifier(boolean useTextQualifier) {
+ userSettings.UseTextQualifier = useTextQualifier;
+ }
+
+ public int getEscapeMode() {
+ return userSettings.EscapeMode;
+ }
+
+ public void setEscapeMode(int escapeMode) {
+ userSettings.EscapeMode = escapeMode;
+ }
+
+ public void setComment(char comment) {
+ userSettings.Comment = comment;
+ }
+
+ public char getComment() {
+ return userSettings.Comment;
+ }
+
+ /**
+ * Whether fields will be surrounded by the text qualifier even if the qualifier is not
+ * necessarily needed to escape this field.
+ *
+ * @return Whether fields will be forced to be qualified or not.
+ */
+ public boolean getForceQualifier() {
+ return userSettings.ForceQualifier;
+ }
+
+ /**
+ * Use this to force all fields to be surrounded by the text qualifier even if the qualifier is
+ * not necessarily needed to escape this field. Default is false.
+ *
+ * @param forceQualifier Whether to force the fields to be qualified or not.
+ */
+ public void setForceQualifier(boolean forceQualifier) {
+ userSettings.ForceQualifier = forceQualifier;
+ }
+
+ /**
+ * Writes another column of data to this record.
+ *
+ * @param content The data for the new column.
+ * @param preserveSpaces Whether to preserve leading and trailing whitespace in this column of
+ * data.
+ * @exception IOException Thrown if an error occurs while writing data to the destination
+ * stream.
+ */
+ public void write(String content, boolean preserveSpaces) throws IOException {
+ checkClosed();
+
+ checkInit();
+
+ if (content == null) {
+ content = "";
+ }
+
+ if (!firstColumn) {
+ outputStream.write(userSettings.Delimiter);
+ }
+
+ boolean textQualify = userSettings.ForceQualifier;
+
+ if (!preserveSpaces && content.length() > 0) {
+ content = content.trim();
+ }
+
+ if (!textQualify
+ && userSettings.UseTextQualifier
+ && (content.indexOf(userSettings.TextQualifier) > -1
+ || content.indexOf(userSettings.Delimiter) > -1
+ || (!useCustomRecordDelimiter
+ && (content.indexOf(Letters.LF) > -1
+ || content.indexOf(Letters.CR) > -1))
+ || (useCustomRecordDelimiter
+ && content.indexOf(userSettings.RecordDelimiter) > -1)
+ || (firstColumn
+ && content.length() > 0
+ && content.charAt(0) == userSettings.Comment)
+ ||
+ // check for empty first column, which if on its own line must
+ // be qualified or the line will be skipped
+ (firstColumn && content.length() == 0))) {
+ textQualify = true;
+ }
+
+ if (userSettings.UseTextQualifier
+ && !textQualify
+ && content.length() > 0
+ && preserveSpaces) {
+ char firstLetter = content.charAt(0);
+
+ if (firstLetter == Letters.SPACE || firstLetter == Letters.TAB) {
+ textQualify = true;
+ }
+
+ if (!textQualify && content.length() > 1) {
+ char lastLetter = content.charAt(content.length() - 1);
+
+ if (lastLetter == Letters.SPACE || lastLetter == Letters.TAB) {
+ textQualify = true;
+ }
+ }
+ }
+
+ if (textQualify) {
+ outputStream.write(userSettings.TextQualifier);
+
+ if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
+ content =
+ replace(
+ content,
+ "" + Letters.BACKSLASH,
+ "" + Letters.BACKSLASH + Letters.BACKSLASH);
+ content =
+ replace(
+ content,
+ "" + userSettings.TextQualifier,
+ "" + Letters.BACKSLASH + userSettings.TextQualifier);
+ } else {
+ content =
+ replace(
+ content,
+ "" + userSettings.TextQualifier,
+ "" + userSettings.TextQualifier + userSettings.TextQualifier);
+ }
+ } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) {
+ content =
+ replace(
+ content,
+ "" + Letters.BACKSLASH,
+ "" + Letters.BACKSLASH + Letters.BACKSLASH);
+ content =
+ replace(
+ content,
+ "" + userSettings.Delimiter,
+ "" + Letters.BACKSLASH + userSettings.Delimiter);
+
+ if (useCustomRecordDelimiter) {
+ content =
+ replace(
+ content,
+ "" + userSettings.RecordDelimiter,
+ "" + Letters.BACKSLASH + userSettings.RecordDelimiter);
+ } else {
+ content = replace(content, "" + Letters.CR, "" + Letters.BACKSLASH + Letters.CR);
+ content = replace(content, "" + Letters.LF, "" + Letters.BACKSLASH + Letters.LF);
+ }
+
+ if (firstColumn && content.length() > 0 && content.charAt(0) == userSettings.Comment) {
+ if (content.length() > 1) {
+ content = "" + Letters.BACKSLASH + userSettings.Comment + content.substring(1);
+ } else {
+ content = "" + Letters.BACKSLASH + userSettings.Comment;
+ }
+ }
+ }
+
+ outputStream.write(content);
+
+ if (textQualify) {
+ outputStream.write(userSettings.TextQualifier);
+ }
+
+ firstColumn = false;
+ }
+
+ /**
+ * Writes another column of data to this record. Does not preserve leading and trailing
+ * whitespace in this column of data.
+ *
+ * @param content The data for the new column.
+ * @exception IOException Thrown if an error occurs while writing data to the destination
+ * stream.
+ */
+ public void write(String content) throws IOException {
+ write(content, false);
+ }
+
+ public void writeComment(String commentText) throws IOException {
+ checkClosed();
+
+ checkInit();
+
+ outputStream.write(userSettings.Comment);
+
+ outputStream.write(commentText);
+
+ if (useCustomRecordDelimiter) {
+ outputStream.write(userSettings.RecordDelimiter);
+ } else {
+ outputStream.write(systemRecordDelimiter);
+ }
+
+ firstColumn = true;
+ }
+
+ /**
+ * Writes a new record using the passed in array of values.
+ *
+ * @param values Values to be written.
+ * @param preserveSpaces Whether to preserver leading and trailing spaces in columns while
+ * writing out to the record or not.
+ * @throws IOException Thrown if an error occurs while writing data to the destination stream.
+ */
+ public void writeRecord(String[] values, boolean preserveSpaces) throws IOException {
+ if (values != null && values.length > 0) {
+ for (int i = 0; i < values.length; i++) {
+ write(values[i], preserveSpaces);
+ }
+
+ endRecord();
+ }
+ }
+
+ /**
+ * Writes a new record using the passed in array of values.
+ *
+ * @param values Values to be written.
+ * @throws IOException Thrown if an error occurs while writing data to the destination stream.
+ */
+ public void writeRecord(String[] values) throws IOException {
+ writeRecord(values, false);
+ }
+
+ /**
+ * Ends the current record by sending the record delimiter.
+ *
+ * @exception IOException Thrown if an error occurs while writing data to the destination
+ * stream.
+ */
+ public void endRecord() throws IOException {
+ checkClosed();
+
+ checkInit();
+
+ if (useCustomRecordDelimiter) {
+ outputStream.write(userSettings.RecordDelimiter);
+ } else {
+ outputStream.write(systemRecordDelimiter);
+ }
+
+ firstColumn = true;
+ }
+
+ /** */
+ private void checkInit() throws IOException {
+ if (!initialized) {
+ if (fileName != null) {
+ outputStream =
+ new BufferedWriter(
+ new OutputStreamWriter(new FileOutputStream(fileName), charset));
+ }
+
+ initialized = true;
+ }
+ }
+
+ /**
+ * Clears all buffers for the current writer and causes any buffered data to be written to the
+ * underlying device.
+ *
+ * @exception IOException Thrown if an error occurs while writing data to the destination
+ * stream.
+ */
+ public void flush() throws IOException {
+ outputStream.flush();
+ }
+
+ /** Closes and releases all related resources. */
+ public void close() {
+ if (!closed) {
+ close(true);
+
+ closed = true;
+ }
+ }
+
+ /** */
+ private void close(boolean closing) {
+ if (!closed) {
+ if (closing) {
+ charset = null;
+ }
+
+ try {
+ if (initialized) {
+ outputStream.close();
+ }
+ } catch (Exception e) {
+ // just eat the exception
+ }
+
+ outputStream = null;
+
+ closed = true;
+ }
+ }
+
+ /** */
+ private void checkClosed() throws IOException {
+ if (closed) {
+ throw new IOException("This instance of the CsvWriter class has already been closed.");
+ }
+ }
+
+ /** */
+ protected void finalize() {
+ close(false);
+ }
+
+ private class Letters {
+ public static final char LF = '\n';
+
+ public static final char CR = '\r';
+
+ public static final char QUOTE = '"';
+
+ public static final char COMMA = ',';
+
+ public static final char SPACE = ' ';
+
+ public static final char TAB = '\t';
+
+ public static final char POUND = '#';
+
+ public static final char BACKSLASH = '\\';
+
+ public static final char NULL = '\0';
+ }
+
+ private class UserSettings {
+ // having these as publicly accessible members will prevent
+ // the overhead of the method call that exists on properties
+ public char TextQualifier;
+
+ public boolean UseTextQualifier;
+
+ public char Delimiter;
+
+ public char RecordDelimiter;
+
+ public char Comment;
+
+ public int EscapeMode;
+
+ public boolean ForceQualifier;
+
+ public UserSettings() {
+ TextQualifier = Letters.QUOTE;
+ UseTextQualifier = true;
+ Delimiter = Letters.COMMA;
+ RecordDelimiter = Letters.NULL;
+ Comment = Letters.POUND;
+ EscapeMode = ESCAPE_MODE_DOUBLED;
+ ForceQualifier = false;
+ }
+ }
+
+ public static String replace(String original, String pattern, String replace) {
+ final int len = pattern.length();
+ int found = original.indexOf(pattern);
+
+ if (found > -1) {
+ StringBuffer sb = new StringBuffer();
+ int start = 0;
+
+ while (found != -1) {
+ sb.append(original.substring(start, found));
+ sb.append(replace);
+ start = found + len;
+ found = original.indexOf(pattern, start);
+ }
+
+ sb.append(original.substring(start));
+
+ return sb.toString();
+ } else {
+ return original;
+ }
+ }
+
+ public static String row2string(Row row, List columnTypes, String delimiter)
+ throws WriteRecordException {
+ // convert row to string
+ int cnt = row.getArity();
+ StringBuilder sb = new StringBuilder(128);
+
+ int i = 0;
+ try {
+ for (; i < cnt; ++i) {
+ if (i != 0) {
+ sb.append(delimiter);
+ }
+
+ Object column = row.getField(i);
+
+ if (column == null) {
+ continue;
+ }
+
+ sb.append(StringUtil.col2string(column, columnTypes.get(i)));
+ }
+ } catch (Exception ex) {
+ String msg =
+ "StringUtil.row2string error: when converting field["
+ + i
+ + "] in Row("
+ + row
+ + ")";
+ throw new WriteRecordException(msg, ex, i, row);
+ }
+
+ return sb.toString();
+ }
+}
diff --git a/chunjun-connectors/chunjun-connector-s3/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/chunjun-connectors/chunjun-connector-s3/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory
new file mode 100644
index 0000000000..81c28c9091
--- /dev/null
+++ b/chunjun-connectors/chunjun-connector-s3/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+com.dtstack.chunjun.connector.s3.table.S3DynamicTableFactory
diff --git a/chunjun-connectors/pom.xml b/chunjun-connectors/pom.xml
index d5f64178dd..f022960fd1 100644
--- a/chunjun-connectors/pom.xml
+++ b/chunjun-connectors/pom.xml
@@ -52,6 +52,7 @@
chunjun-connector-hdfs
chunjun-connector-hive
chunjun-connector-hive3
+ chunjun-connector-s3
chunjun-connector-hbase-base
diff --git a/chunjun-examples/json/s3/ftp_s3.json b/chunjun-examples/json/s3/ftp_s3.json
new file mode 100644
index 0000000000..1c42ada027
--- /dev/null
+++ b/chunjun-examples/json/s3/ftp_s3.json
@@ -0,0 +1,115 @@
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "parameter": {
+ "path": "/root/ExportCSV.csv",
+ "protocol": "ftp",
+ "port": 21,
+ "isFirstLineHeader": false,
+ "host": "localhost",
+ "column": [
+ {
+ "index": 0,
+ "type": "string",
+ "name": "i1"
+ },
+ {
+ "index": 1,
+ "type": "string",
+ "name": "i2"
+ },
+ {
+ "index": 2,
+ "type": "string",
+ "name": "i3"
+ },
+ {
+ "index": 3,
+ "type": "string",
+ "name": "i4"
+ },
+ {
+ "index": 4,
+ "type": "string",
+ "name": "i5"
+ },
+ {
+ "index": 5,
+ "type": "string",
+ "name": "i6"
+ },
+ {
+ "index": 6,
+ "type": "string",
+ "name": "i7"
+ }
+ ],
+ "password":"123456",
+ "fieldDelimiter":",",
+ "encoding":"utf-8",
+ "username":"root"
+ },
+ "name": "ftpreader"
+ },
+ "writer": {
+ "parameter": {
+ "region":"",
+ "bucket": "",
+ "secretKey": "",
+ "accessKey": "",
+ "object": "qingxing.csv",
+ "isFirstLineHeader": false,
+ "column": [
+ {
+ "index": 0,
+ "type": "string",
+ "name": "i1"
+ },
+ {
+ "index": 1,
+ "type": "string",
+ "name": "i2"
+ },
+ {
+ "index": 2,
+ "type": "string",
+ "name": "i3"
+ },
+ {
+ "index": 3,
+ "type": "string",
+ "name": "i4"
+ },
+ {
+ "index": 4,
+ "type": "string",
+ "name": "i5"
+ },
+ {
+ "index": 5,
+ "type": "string",
+ "name": "i6"
+ },
+ {
+ "index": 6,
+ "type": "string",
+ "name": "i7"
+ }
+ ],
+ "encoding": "utf-8",
+ "fieldDelimiter": ","
+ },
+ "name": "s3writer"
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "bytes": 0,
+ "channel": 1
+ }
+ }
+ }
+}
diff --git a/chunjun-examples/sql/s3/s3_stream.sql b/chunjun-examples/sql/s3/s3_stream.sql
new file mode 100644
index 0000000000..cd19716402
--- /dev/null
+++ b/chunjun-examples/sql/s3/s3_stream.sql
@@ -0,0 +1,27 @@
+CREATE TABLE source
+(
+ i1 INT,
+ i2 INT
+) WITH (
+ 'connector' = 's3-x',
+ 'assessKey' = '',
+ 'secretKey' = '',
+ 'bucket' = '',
+ 'objects' = '[""]',
+ 'fieldDelimiter' = '|',
+ 'isFirstLineHeader' = 'false',
+ 'region' = ''
+ );
+
+CREATE TABLE sink
+(
+ i1 INT,
+ i2 INT
+) WITH (
+ 'connector' = 'stream-x',
+ 'print' = 'true'
+ );
+
+INSERT INTO sink
+SELECT *
+FROM source;
diff --git a/chunjun-local-test/pom.xml b/chunjun-local-test/pom.xml
index 440d6f2bad..5e387eb047 100644
--- a/chunjun-local-test/pom.xml
+++ b/chunjun-local-test/pom.xml
@@ -145,8 +145,8 @@
${project.version}
- com.dtstack.flinkx
- flinkx-connector-rabbitmq
+ com.dtstack.chunjun
+ chunjun-connector-rabbitmq
${project.version}
@@ -243,6 +243,11 @@
chunjun-connector-hive
${project.version}
+
+ com.dtstack.chunjun
+ chunjun-connector-s3
+ ${project.version}
+
diff --git "a/docs_zh/ChunJun\350\277\236\346\216\245\345\231\250/s3/s3_sink.md" "b/docs_zh/ChunJun\350\277\236\346\216\245\345\231\250/s3/s3_sink.md"
new file mode 100644
index 0000000000..25c9e55d10
--- /dev/null
+++ "b/docs_zh/ChunJun\350\277\236\346\216\245\345\231\250/s3/s3_sink.md"
@@ -0,0 +1,199 @@
+# S3 Writer
+
+
+
+## 一、插件名称
+
+名称:**s3writer**
+
+
+
+## 二、数据源版本
+
+amazon s3 **所有版本**
+
+
+
+## 三、参数说明
+
+- **accessKey**
+ - 描述:aws 用户凭证:aws_access_key_id
+ - 必选:是
+ - 默认值:无
+
+
+
+- **secretKey**
+ - 描述:aws 用户凭证:aws_secret_access_key
+ - 必选:是
+ - 默认值:无
+
+
+
+- **endpoint**
+ - 描述:若需指定endpoint,则可通过该参数制定,详情可参见官方文档
+ [https://docs.aws.amazon.com/zh_cn/general/latest/gr/rande.html](https://docs.aws.amazon.com/zh_cn/general/latest/gr/rande.html)
+ - 必选:否
+ - 默认值:根据 region 自动选择 endpoint
+
+
+
+- **region**
+ - 描述:储存桶的区域
+ - 必选:否
+ - 默认值:`cn-north-1`
+
+
+
+- **bucket**
+ - 描述:存储桶名称
+ - 必选:是
+ - 默认值:无
+
+
+
+- **object**
+ - 描述:需要写入的对象,只支持一个对象
+ - 必选:是
+ - 默认值:无
+ - 格式:
+ - "abc.xml"
+ - "xxx/abd"
+
+
+
+- **fieldDelimiter**
+ - 描述:写入的字段分隔符,只支持单字符或转义字符
+ - 必选:是
+ - 默认值:`,`
+
+
+
+- **encoding**
+ - 描述:写入文件的编码配置
+ - 必选:否
+ - 默认值:`UTF-8`
+
+
+
+- **isFirstLineHeader**
+ - 描述:是否增加首行为标题行,如果是,将设置第一行为标题
+ - 必选:否
+ - 默认值:false
+
+
+
+
+- **column**
+ - 描述:需要读取的字段
+ - 格式:指定具体信息:
+ ```json
+ "column": [{
+ "name": "col1",
+ "type": "datetime"
+ }]
+ ```
+
+- 属性说明:
+ - name:字段名称
+ - type:字段类型,s3写入的为文本文件,本质上都是要转化成字符串类型,这里可以指定要转换的字段转换之前的类型
+
+
+
+- 必选:是
+- 默认值:无
+- **writeMode**
+ - **描述:写入模式,只支持覆写**
+ - **必选:否**
+ - **默认值:overwrite**
+
+**chunjun1.12 目前只支持写入string类型,只支持单并行度写入**
+
+## 五、使用示例
+
+
+
+```json
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "parameter": {
+ "column": [
+ {
+ "name": "col1",
+ "type": "string"
+ },
+ {
+ "name": "col2",
+ "type": "string"
+ },
+ {
+ "name": "col3",
+ "type": "int"
+ },
+ {
+ "name": "col4",
+ "type": "int"
+ }
+ ],
+ "sliceRecordCount": [
+ "100"
+ ]
+ },
+ "name": "streamreader"
+ },
+ "writer": {
+ "parameter": {
+ "accessKey": "",
+ "secretKey": "",
+ "endpoint": "http://127.0.0.1:9090",
+ "region": "",
+ "bucket": "",
+ "object": "aaa.xml",
+ "column": [
+ {
+ "name": "col1",
+ "type": "string"
+ },
+ {
+ "name": "col2",
+ "type": "string"
+ },
+ {
+ "name": "col3",
+ "type": "int"
+ },
+ {
+ "name": "col4",
+ "type": "int"
+ }
+ ],
+ "fieldDelimiter": ",",
+ "encoding": "utf-8",
+ "isFirstLineHeader": true
+ },
+ "name": "s3writer"
+ }
+ }
+ ],
+ "setting": {
+ "restore": {
+ "maxRowNumForCheckpoint": 0,
+ "isRestore": false,
+ "restoreColumnName": "",
+ "restoreColumnIndex": 0
+ },
+ "errorLimit": {
+ "record": 100
+ },
+ "speed": {
+ "bytes": 0,
+ "channel": 1
+ }
+ }
+ }
+}
+```
+
diff --git "a/docs_zh/ChunJun\350\277\236\346\216\245\345\231\250/s3/s3_source.md" "b/docs_zh/ChunJun\350\277\236\346\216\245\345\231\250/s3/s3_source.md"
new file mode 100644
index 0000000000..976610e837
--- /dev/null
+++ "b/docs_zh/ChunJun\350\277\236\346\216\245\345\231\250/s3/s3_source.md"
@@ -0,0 +1,444 @@
+# S3 Reader
+
+
+
+## 一、插件名称
+
+名称:**s3reader**
+
+
+
+## 二、支持的数据源版本
+
+aws s3 所有版本
+
+
+
+## 三、参数说明
+
+#### 参数说明
+
+- **accessKey**
+ - 描述:aws 用户凭证:aws_access_key_id
+ - 必选:是
+ - 默认值:无
+
+
+
+- **secretKey**
+ - 描述:aws 用户凭证:aws_secret_access_key
+ - 必选:是
+ - 默认值:无
+
+
+
+- **endpoint**
+ - 描述:若需指定endpoint,则可通过该参数制定,详情可参见官方文档
+ [https://docs.aws.amazon.com/zh_cn/general/latest/gr/rande.html](https://docs.aws.amazon.com/zh_cn/general/latest/gr/rande.html)
+ - 必选:否
+ - 默认值:根据 region 自动选择 endpoint
+
+
+
+- **region**
+ - 描述:储存桶的区域
+ - 必选:否
+ - 默认值:`us-west-2`
+
+
+
+- **bucket**
+ - 描述:存储桶名称
+ - 必选:是
+ - 默认值:无
+
+
+
+- **objects**
+ - 描述:需要同步的对象
+ - 格式:
+ - 单个对象
+ - ["abc.xml"]
+ - ["abd"]
+ - 多个对象 必须以.*结尾代表读取此目录下所有文件,只支持目录名称前缀匹配,不支持多层级目录前缀匹配
+ - ["as.*"] 会匹配as1,as2,as3目录下的所有文件
+ - ["as/.*"] as 目录下所有文件
+
+
+
+- **column**
+ - 描述:需要读取的字段。
+ - 格式:支持2种格式
+ - 只指定字段名称:
+
+ ```json
+ "column":["id","name"]
+ ```
+ - 指定具体信息:
+ - 属性说明
+ - name:字段名称
+ - type:字段类型,可以和数据库里的字段类型不一样,程序会做一次类型转换
+ - format:如果字段是时间字符串,可以指定时间的格式,将字段类型转为日期格式返回
+ - value:如果数据库里不存在指定的字段,则会把value的值作为常量列返回,如果指定的字段存在,当指定字段的值为null时,会以此value值作为默认值返回
+ ```json
+ "column": [{
+ "index": 0,
+ "type": "datetime",
+ "format": "yyyy-MM-dd hh:mm:ss",
+ "value": "value"
+ }]
+ ```
+ - 必选:是
+ - 默认值:无
+
+
+
+- **encoding**
+ - 描述:读取文件的编码配置
+ - 必选:否
+ - 默认值:`UTF-8`
+
+
+
+- **fieldDelimiter**
+ - 描述:读取的字段分隔符,只支持单字符或转义字符
+ - 必选:否
+ - 默认值:`,`
+
+
+
+- **isFirstLineHeader**
+ - 描述:首行是否为标题行,如果是则不读取第一行
+ - 必选:否
+ - 默认值:false
+
+
+- **fetchSize**
+ - 描述:单次请求获取目录下文件的数量
+ - 必选:否
+ - 默认值:512
+
+- **useV2**
+ - 描述:获取目录下文件数量的api
+ - 必选:否
+ - 默认值:true 使用ListObjectsV2Request,false使用 ListObjectsRequest
+
+- **safetySwitch**
+ - 描述:在文件编码等解析设置最终与文件的实际格式不匹配的情况下,防止解析器使用大量内存的安全注意事项。关闭后,解析器支持的每条记录的最大列长度和最大列数将大大增加。
+ - 必选:否
+ - 默认值:false(关闭)
+
+
+
+**chunjun1.12 目前只支持string类型**
+
+## 四、使用示例
+
+
+
+#### 1、读取单个文件
+
+```json
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "parameter": {
+ "accessKey": "",
+ "secretKey": "",
+ "endpoint": "http://127.0.0.1:9090",
+ "region": "",
+ "bucket": "",
+ "objects": ["aaa.xml"],
+ "column": [
+ {
+ "index": 0,
+ "type": "string"
+ },
+ {
+ "index": 1,
+ "type": "string"
+ },
+ {
+ "index": 2,
+ "type": "int"
+ },
+ {
+ "index": 3,
+ "type": "int"
+ }
+ ],
+ "encoding": "",
+ "fieldDelimiter": ",",
+ "isFirstLineHeader": true
+ },
+ "name": "s3reader"
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "print": true
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 1,
+ "bytes": 0
+ },
+ "errorLimit": {
+ "record": 100
+ },
+ "log": {
+ "isLogger": false,
+ "level": "debug",
+ "path": "",
+ "pattern": ""
+ }
+ }
+ }
+}
+```
+
+
+
+#### 2、读取多个有文件
+
+```json
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "parameter": {
+ "accessKey": "",
+ "secretKey": "",
+ "region": "",
+ "bucket": "",
+ "objects": [
+ "aaa.xml",
+ "bbb/ccc.xml"
+ ],
+ "column": [
+ {
+ "index": 0,
+ "type": "string"
+ },
+ {
+ "index": 1,
+ "type": "string"
+ },
+ {
+ "index": 2,
+ "type": "int"
+ },
+ {
+ "index": 3,
+ "type": "int"
+ }
+ ],
+ "encoding": "",
+ "fieldDelimiter": ""
+ },
+ "name": "s3reader"
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "print": true
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 1,
+ "bytes": 0
+ },
+ "errorLimit": {
+ "record": 100
+ },
+ "log": {
+ "isLogger": false,
+ "level": "debug",
+ "path": "",
+ "pattern": ""
+ }
+ }
+ }
+}
+```
+
+
+
+#### 3、读取多个路径下的多个文件
+
+```json
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "parameter": {
+ "accessKey": "",
+ "secretKey": "",
+ "region": "",
+ "bucket": "",
+ "objects": ["dir/.+\.xml","bbb/ccc.xml"],
+ "column": [
+ {
+ "index": 0,
+ "type": "string"
+ },
+ {
+ "index": 1,
+ "type": "string"
+ },
+ {
+ "index": 2,
+ "type": "int"
+ },
+ {
+ "index": 3,
+ "type": "int"
+ }
+ ],
+ "encoding": "",
+ "fieldDelimiter": ""
+ },
+ "name": "s3reader"
+ },
+ "writer": {
+ "name": "streamwriter",
+ "parameter": {
+ "print": true
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 1,
+ "bytes": 0
+ },
+ "errorLimit": {
+ "record": 100
+ },
+ "log": {
+ "isLogger": false,
+ "level": "debug",
+ "path": "",
+ "pattern": ""
+ }
+ }
+ }
+}
+```
+
+
+
+#### 4、断点续传
+
+开启断点续传功能需要给 column 中的每个字段都加上 name,name 值可以为不重复的任意字符串。并配置 restore,
+
+restore 中 restoreColumnIndex 的值需选择 reader 中任意一个字段的 index,而 restoreColumnName 选取相应
+
+字段的 name 的值。实际上这几个字段只是为了开启断点续传的功能用的,内部是使用文件流的偏移量进行恢复的。
+
+```json
+{
+ "job": {
+ "content": [
+ {
+ "reader": {
+ "parameter": {
+ "accessKey": "test",
+ "secretKey": "test",
+ "endpoint": "http://127.0.0.1:9090",
+ "region": "us-west-2",
+ "bucket": "test",
+ "objects": ["people_20210426001.csv"],
+ "column": [
+ {
+ "name": "id",
+ "index": 0,
+ "type": "int"
+ },
+ {
+ "name": "value1",
+ "index": 1,
+ "type": "string"
+ },
+ {
+ "name": "value2",
+ "index": 2,
+ "type": "string"
+ }
+ ],
+ "encoding": "UTF-8",
+ "fieldDelimiter": ",",
+ "isFirstLineHeader": true
+ },
+ "name": "s3reader"
+ },
+ "writer": {
+ "name": "mysqlwriter",
+ "parameter": {
+ "username": "",
+ "password": "",
+ "connection": [
+ {
+ "jdbcUrl": "jdbc:mysql://localhost:3306/abc?useSSL=false",
+ "table": [
+ "people_bak_20210421"
+ ]
+ }
+ ],
+ "postSql": [],
+ "batchSize": 100,
+ "writeMode": "insert",
+ "column": [
+ {
+ "name": "id",
+ "type": "int"
+ },
+ {
+ "name": "name",
+ "type": "varchar"
+ },
+ {
+ "name": "uuid",
+ "type": "varchar"
+ }
+ ]
+ }
+ }
+ }
+ ],
+ "setting": {
+ "speed": {
+ "channel": 1,
+ "bytes": 0
+ },
+ "errorLimit": {
+ "record": 100
+ },
+ "restore": {
+ "maxRowNumForCheckpoint": 100,
+ "isStream" : true,
+ "isRestore": true,
+ "restoreColumnName" : "id",
+ "restoreColumnIndex" : 1
+ },
+ "log": {
+ "isLogger": false,
+ "level": "debug",
+ "path": "",
+ "pattern": ""
+ }
+ }
+ }
+}
+```