Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/type_infer' into type_infer
Browse files Browse the repository at this point in the history
  • Loading branch information
2b3c511 committed Nov 22, 2024
2 parents a123b7e + 8a2c0b9 commit a75c4cd
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import org.apache.commons.csv.QuoteMode;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.tsfile.common.conf.TSFileConfig;
import org.apache.tsfile.utils.Binary;
import org.apache.thrift.annotation.Nullable;
import org.apache.tsfile.common.constant.TsFileConstant;
import org.apache.tsfile.enums.TSDataType;
Expand All @@ -56,6 +58,7 @@
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -177,8 +180,6 @@ public abstract class AbstractDataTool {
protected static Boolean aligned;
protected static Session session;
protected static final LongAdder loadFileSuccessfulNum = new LongAdder();
protected static final Map<String, TSDataType> TYPE_INFER_KEY_DICT = new HashMap<>();

protected static final String DATATYPE_BOOLEAN = "boolean";
protected static final String DATATYPE_INT = "int";
protected static final String DATATYPE_LONG = "long";
Expand All @@ -189,10 +190,11 @@ public abstract class AbstractDataTool {
protected static final String DATATYPE_BLOB = "blob";
protected static final String DATATYPE_NAN = "NaN";
protected static final String DATATYPE_TEXT = "text";

protected static final String DATATYPE_NULL = "null";
protected static int batchPointSize = 100_000;

protected static final Map<String, TSDataType> TYPE_INFER_KEY_DICT = new HashMap<>();

static {
TYPE_INFER_KEY_DICT.put(DATATYPE_BOOLEAN, TSDataType.BOOLEAN);
TYPE_INFER_KEY_DICT.put(DATATYPE_INT, TSDataType.FLOAT);
Expand All @@ -201,7 +203,7 @@ public abstract class AbstractDataTool {
TYPE_INFER_KEY_DICT.put(DATATYPE_DOUBLE, TSDataType.DOUBLE);
TYPE_INFER_KEY_DICT.put(DATATYPE_TIMESTAMP, TSDataType.TIMESTAMP);
TYPE_INFER_KEY_DICT.put(DATATYPE_DATE, TSDataType.TIMESTAMP);
TYPE_INFER_KEY_DICT.put(DATATYPE_BLOB, TSDataType.TEXT);
TYPE_INFER_KEY_DICT.put(DATATYPE_BLOB, TSDataType.BLOB);
TYPE_INFER_KEY_DICT.put(DATATYPE_NAN, TSDataType.DOUBLE);
}

Expand All @@ -215,7 +217,7 @@ public abstract class AbstractDataTool {
TYPE_INFER_VALUE_DICT.put(DATATYPE_DOUBLE, TSDataType.DOUBLE);
TYPE_INFER_VALUE_DICT.put(DATATYPE_TIMESTAMP, TSDataType.TIMESTAMP);
TYPE_INFER_VALUE_DICT.put(DATATYPE_DATE, TSDataType.TIMESTAMP);
TYPE_INFER_VALUE_DICT.put(DATATYPE_BLOB, TSDataType.TEXT);
TYPE_INFER_VALUE_DICT.put(DATATYPE_BLOB, TSDataType.BLOB);
TYPE_INFER_VALUE_DICT.put(DATATYPE_TEXT, TSDataType.TEXT);
}

Expand Down Expand Up @@ -690,6 +692,8 @@ private static TSDataType typeInfer(String strValue) {
// "NaN" is returned if the NaN Literal is given in Parser
} else if (DATATYPE_NAN.equals(strValue)) {
return TYPE_INFER_KEY_DICT.get(DATATYPE_NAN);
}else if(DATATYPE_BLOB.equals(strValue)){
return TYPE_INFER_KEY_DICT.get(DATATYPE_BLOB);
} else if (strValue.length() <= 512) {
return STRING;
} else {
Expand Down Expand Up @@ -746,8 +750,30 @@ private static Object typeTrans(String value, TSDataType type) {
case DOUBLE:
return Double.parseDouble(value);
case TIMESTAMP:
return Long.parseLong(value);
case DATE:
if (value.startsWith("\"") && value.endsWith("\"")) {
value.substring(1, value.length() - 1);
}
try {
if (StringUtils.isNotBlank(value)) {
final String[] split = value.split("-");
if (split.length == 3) {
return LocalDate.of(Integer.parseInt(split[0]), Integer.parseInt(split[1]),Integer.parseInt(split[2]));
}
}
}catch (Exception e){
;
}
return null;
case BLOB:
if (value.startsWith("0x")) {
return new Binary(value.replaceFirst("0x",""), TSFileConfig.STRING_CHARSET);
}else if (value.startsWith("\"") && value.endsWith("\"")) {
return new Binary(value.substring(1, value.length() - 1),TSFileConfig.STRING_CHARSET);
}else {
return new Binary(value, TSFileConfig.STRING_CHARSET);
}
default:
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.thrift.TException;
import org.apache.tsfile.common.conf.TSFileConfig;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.read.common.Field;
import org.apache.tsfile.read.common.Path;
Expand Down Expand Up @@ -654,6 +655,8 @@ public static void writeCsvFile(
|| field.getDataType() == TSDataType.STRING)
&& !fieldStringValue.startsWith("root.")) {
fieldStringValue = "\"" + fieldStringValue + "\"";
}else if(field.getDataType() == TSDataType.DATE){
fieldStringValue = field.getDateV().toString();
}
csvPrinterWrapper.print(fieldStringValue);
} else {
Expand Down Expand Up @@ -747,8 +750,13 @@ public static void writeSqlFile(
headersTemp.remove(seriesList.get(index));
continue;
}
if ("TEXT".equalsIgnoreCase(timeseriesList.get(3).getStringValue())) {
final TSDataType dataType = TSDataType.valueOf(timeseriesList.get(3).toString());
if (TSDataType.TEXT == dataType || TSDataType.STRING == dataType) {
values.add("\"" + value + "\"");
}else if (TSDataType.DATE == dataType){
values.add("'"+fields.get(index).getDateV().toString()+"'");
}else if (TSDataType.BLOB == dataType){
values.add(value.replaceFirst("0x","X'")+"'");
} else {
values.add(value);
}
Expand Down

0 comments on commit a75c4cd

Please sign in to comment.