From f4ee9b6f933f25fc9f1c67023d5803005d8c9e16 Mon Sep 17 00:00:00 2001 From: Cory Mickelson Date: Sun, 16 Sep 2018 18:08:34 -0700 Subject: [PATCH] fix batchOffset count when compared to batchSize --- src/Writer.cc | 57 ++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/src/Writer.cc b/src/Writer.cc index cfe194d..8168a7c 100644 --- a/src/Writer.cc +++ b/src/Writer.cc @@ -5,9 +5,9 @@ #include "Writer.h" #include +#include #include #include -#include #include #include @@ -138,17 +138,17 @@ Writer::Schema(const CallbackInfo& info) break; } case TIMESTAMP: { - kind = TypeKind::TIMESTAMP; + kind = TypeKind::TIMESTAMP; schemaType = "timestamp"; break; } case DECIMAL: { - kind = TypeKind::DECIMAL; + kind = TypeKind::DECIMAL; schemaType = "decimal"; break; } case DATE: { - kind = TypeKind::DATE; + kind = TypeKind::DATE; schemaType = "date"; break; } @@ -158,7 +158,7 @@ Writer::Schema(const CallbackInfo& info) break; } case VARCHAR: { - kind = TypeKind::VARCHAR; + kind = TypeKind::VARCHAR; schemaType = "varchar"; break; } @@ -166,7 +166,7 @@ Writer::Schema(const CallbackInfo& info) case MAP: case STRUCT: case UNION: - default:{ + default: { Error::New(info.Env(), "Unsupported type").ThrowAsJavaScriptException(); break; } @@ -193,6 +193,11 @@ Writer::Add(const CallbackInfo& info) .ThrowAsJavaScriptException(); return; } + if (batchOffset == batchSize - 1) { + row->numElements = batchOffset; + writer->add(*batch); + batchOffset = 0; + } for (uint32_t i = 0; i < properties.Length(); i++) { string p = properties.Get(i).As(); if (schema[i].first != p) { @@ -366,13 +371,8 @@ Writer::Add(const CallbackInfo& info) } } } - if (batchOffset == batchSize) { - row->numElements = batchSize; - writer->add(*batch); - batchOffset = 0; - } else { - ++batchOffset; - } + + batchOffset++; } void @@ -398,14 +398,14 @@ class ImportCSVWorker : public AsyncWorker Writer& writer; string csv; - string columnString(string v, uint64_t idx) + string columnString(const string& v, uint64_t idx) { uint64_t col = 0; size_t start = 0; - size_t end = v.find(","); + size_t end = v.find(','); while (col < idx && end != string::npos) { start = end + 1; - end = v.find(",", start); + end = v.find(',', start); ++col; } return col == idx ? v.substr(start, end - start) : ""; @@ -424,7 +424,8 @@ class ImportCSVWorker : public AsyncWorker hasNull = true; } else { batch->notNull[i] = 1; - longBatch->data[i] = atoll(csvCol.c_str()); + char* out; + longBatch->data[i] = strtoll(csvCol.c_str(), nullptr, 10); } } longBatch->hasNulls = hasNull; @@ -473,7 +474,7 @@ class ImportCSVWorker : public AsyncWorker hasNull = true; } else { batch->notNull[i] = 1; - dblBatch->data[i] = atof(col.c_str()); + dblBatch->data[i] = strtod(col.c_str(), nullptr); // atof(col.c_str()); } } dblBatch->hasNulls = hasNull; @@ -544,11 +545,7 @@ class ImportCSVWorker : public AsyncWorker } else { batch->notNull[i] = 1; std::transform(col.begin(), col.end(), col.begin(), ::tolower); - if (col == "true" || col == "t") { - boolBatch->data[i] = true; - } else { - boolBatch->data[i] = false; - } + boolBatch->data[i] = col == "true" || col == "t"; } } boolBatch->hasNulls = hasNull; @@ -561,8 +558,7 @@ class ImportCSVWorker : public AsyncWorker uint64_t numValues, uint64_t colIndex) { - orc::LongVectorBatch* longBatch = - dynamic_cast(batch); + auto* longBatch = dynamic_cast(batch); bool hasNull = false; for (uint64_t i = 0; i < numValues; ++i) { std::string col = columnString(data[i], colIndex); @@ -571,13 +567,14 @@ class ImportCSVWorker : public AsyncWorker hasNull = true; } else { batch->notNull[i] = 1; - struct tm tm; + struct tm tm + {}; memset(&tm, 0, sizeof(struct tm)); strptime(col.c_str(), "%Y-%m-%d", &tm); time_t t = mktime(&tm); time_t t1970 = 0; double seconds = difftime(t, t1970); - int64_t days = static_cast(seconds / (60 * 60 * 24)); + auto days = static_cast(seconds / (60 * 60 * 24)); longBatch->data[i] = days; } } @@ -589,9 +586,9 @@ class ImportCSVWorker : public AsyncWorker uint64_t numValues, uint64_t colIndex) { - struct tm timeStruct; - orc::TimestampVectorBatch* tsBatch = - dynamic_cast(batch); + struct tm timeStruct + {}; + auto* tsBatch = dynamic_cast(batch); bool hasNull = false; for (uint64_t i = 0; i < numValues; ++i) { std::string col = columnString(data[i], colIndex);