Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(core): enable date and time types support in vineyard-graph (and GIE) #3498

Merged
merged 4 commits into from
Jan 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-graphscope-wheels-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ jobs:
run: |
. ~/.graphscope_env
python3 -m pip install libclang
git clone --single-branch -b v0.18.2 --depth=1 https://github.com/v6d-io/v6d.git /tmp/v6d
git clone --single-branch -b v0.20.2 --depth=1 https://github.com/v6d-io/v6d.git /tmp/v6d
cd /tmp/v6d
git submodule update --init
cmake . -DCMAKE_INSTALL_PREFIX=/usr/local \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gae.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-20.04
if: ${{ github.repository == 'alibaba/GraphScope' }}
container:
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.18.2
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.20.2
steps:
- uses: actions/checkout@v3

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gaia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ concurrency:

jobs:
gaia-test:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
steps:
- uses: actions/checkout@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/gss.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
# Require the host is able to run docker without sudo and
# can `ssh localhost` without password, which may need to
# be configured manually when a new self-hosted runner is added.
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
- uses: actions/checkout@v3
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/k8s-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ jobs:
fi

build-analytical:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
Expand All @@ -224,7 +224,7 @@ jobs:
retention-days: 5

build-analytical-java:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
#if: ${{ github.repository == 'alibaba/GraphScope' }}
if: false
Expand Down Expand Up @@ -259,7 +259,7 @@ jobs:
retention-days: 5

build-interactive:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
Expand Down Expand Up @@ -301,7 +301,7 @@ jobs:
retention-days: 5

build-learning:
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
needs: [changes]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
Expand Down Expand Up @@ -616,7 +616,7 @@ jobs:
needs: [changes]
# Require the user id of the self-hosted is 1001, which may need to be
# configured manually when a new self-hosted runner is added.
runs-on: [self-hosted, manylinux2014-ci-test]
runs-on: [self-hosted, manylinux2014-ci]
if: ${{ github.repository == 'alibaba/GraphScope' }}
steps:
- uses: actions/checkout@v3
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/local-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ jobs:
path: artifacts

- name: Prepare Environment
env:
GS_TEST_DIR: ${{ github.workspace }}/gstest
run: |
# install graphscope-client
cd artifacts
Expand All @@ -300,6 +302,9 @@ jobs:
# install java
sudo apt update -y && sudo apt install openjdk-11-jdk -y

# download dataset
git clone -b master --single-branch --depth=1 https://github.com/7br/gstest.git ${GS_TEST_DIR}

- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
if: false
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/networkx-forward-algo-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
run:
shell: bash --noprofile --norc -eo pipefail {0}
container:
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.18.2
image: registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.20.2
options:
--shm-size 4096m

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
sudo mkdir /opt/graphscope
sudo chown -R $(id -u):$(id -g) /opt/graphscope
python3 -m pip install click
python3 gsctl.py install-deps dev --v6d-version v0.18.2
python3 gsctl.py install-deps dev --v6d-version v0.20.2

- name: Setup tmate session
if: false
Expand Down
72 changes: 72 additions & 0 deletions analytical_engine/core/object/fragment_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,46 @@ gs::rpc::graph::DataTypePb PropertyTypeToPb(vineyard::PropertyType type) {
return gs::rpc::graph::DataTypePb::STRING;
} else if (arrow::large_utf8()->Equals(type)) {
return gs::rpc::graph::DataTypePb::STRING;
} else if (arrow::date32()->Equals(type)) {
return gs::rpc::graph::DataTypePb::DATE32;
} else if (arrow::date64()->Equals(type)) {
return gs::rpc::graph::DataTypePb::DATE64;
} else if (type->id() == arrow::Type::TIME32) {
auto time32_type = std::dynamic_pointer_cast<arrow::Time32Type>(type);
switch (time32_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIME32_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIME32_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIME32_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIME32_NS;
}
} else if (type->id() == arrow::Type::TIME64) {
auto time64_type = std::dynamic_pointer_cast<arrow::Time64Type>(type);
switch (time64_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIME64_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIME64_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIME64_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIME64_NS;
}
} else if (type->id() == arrow::Type::TIMESTAMP) {
auto timestamp_type = std::dynamic_pointer_cast<arrow::TimestampType>(type);
switch (timestamp_type->unit()) {
case arrow::TimeUnit::SECOND:
return gs::rpc::graph::DataTypePb::TIMESTAMP_S;
case arrow::TimeUnit::MILLI:
return gs::rpc::graph::DataTypePb::TIMESTAMP_MS;
case arrow::TimeUnit::MICRO:
return gs::rpc::graph::DataTypePb::TIMESTAMP_US;
case arrow::TimeUnit::NANO:
return gs::rpc::graph::DataTypePb::TIMESTAMP_NS;
}
} else if (arrow::large_list(arrow::int32())->Equals(type)) {
return gs::rpc::graph::DataTypePb::INT_LIST;
} else if (arrow::large_list(arrow::int64())->Equals(type)) {
Expand Down Expand Up @@ -138,6 +178,38 @@ gs::rpc::graph::DataTypePb PropertyTypeToPb(const std::string& type) {
return gs::rpc::graph::DataTypePb::LONG_LIST;
} else if (type == "float_list") {
return gs::rpc::graph::DataTypePb::FLOAT_LIST;
} else if (type == "date32[day]") {
return gs::rpc::graph::DataTypePb::DATE32;
} else if (type == "date64[ms]") {
return gs::rpc::graph::DataTypePb::DATE64;
} else if (type == "time32[s]") {
return gs::rpc::graph::DataTypePb::TIME32_S;
} else if (type == "time32[ms]") {
return gs::rpc::graph::DataTypePb::TIME32_MS;
} else if (type == "time32[us]") {
return gs::rpc::graph::DataTypePb::TIME32_US;
} else if (type == "time32[ns]") {
return gs::rpc::graph::DataTypePb::TIME32_NS;
} else if (type == "time64[s]") {
return gs::rpc::graph::DataTypePb::TIME64_S;
} else if (type == "time64[ms]") {
return gs::rpc::graph::DataTypePb::TIME64_MS;
} else if (type == "time64[us]") {
return gs::rpc::graph::DataTypePb::TIME64_US;
} else if (type == "time64[ns]") {
return gs::rpc::graph::DataTypePb::TIME64_NS;
} else if (type.substr(0, std::string("timestamp[s]").length()) ==
"timestamp[s]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_S;
} else if (type.substr(0, std::string("timestamp[ms]").length()) ==
"timestamp[ms]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_MS;
} else if (type.substr(0, std::string("timestamp[us]").length()) ==
"timestamp[us]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_US;
} else if (type.substr(0, std::string("timestamp[ns]").length()) ==
"timestamp[ns]") {
return gs::rpc::graph::DataTypePb::TIMESTAMP_NS;
} else if (type == "double_list") {
return gs::rpc::graph::DataTypePb::DOUBLE_LIST;
} else if (type == "string_list" || type == "str_list") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ public class DataType {
public static final DataType DOUBLE = new DataType(InternalDataType.DOUBLE);
public static final DataType BYTES = new DataType(InternalDataType.BYTES);
public static final DataType STRING = new DataType(InternalDataType.STRING);

// See also: `Date32` in common.proto.
public static final DataType DATE = new DataType(InternalDataType.DATE);
// See also: `Time32` in common.proto.
public static final DataType TIME = new DataType(InternalDataType.TIME);
// See also: `Timestamp` in common.proto.
public static final DataType TIMESTAMP = new DataType(InternalDataType.TIMESTAMP);

// For LIST, SET and MAP
@JsonProperty private String expression;
Expand All @@ -58,6 +64,15 @@ public static DataType toDataType(int i) {
}

public static DataType valueOf(String typeName) {
if (typeName.startsWith("DATE")) {
return DATE;
}
if (typeName.startsWith("TIME")) {
return TIME;
}
if (typeName.startsWith("TIMESTAMP")) {
return TIMESTAMP;
}
return new DataType(InternalDataType.valueOf(typeName));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,19 @@ public enum InternalDataType {
* STRING data type in InteractiveEngine, map to String in Java
*/
STRING,

/**
* INT data type in InteractiveEngine, map to Integer(int) in Java
* Date data type in InteractiveEngine, map to DateValue in Java
*/
DATE,
/**
* Date data type in InteractiveEngine, map to TimeValue in Java
*/
TIME,
/**
* Date data type in InteractiveEngine, map to DateTimeValue in Java
*/
TIMESTAMP,

/**
* SET data type, Collection Type, can mixed with list and map, example:Set, value: List<Map<List<String>,List<String>>>
Expand Down Expand Up @@ -94,6 +103,8 @@ public enum InternalDataType {
&& value != UNKNOWN
&& value != CHAR
&& value != DATE
&& value != TIME
&& value != TIMESTAMP
&& value != SHORT) {
primitiveTypes.add(value.name());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,16 @@ public static DataType parseFromDataType(
return DataType.DOUBLE;

case STRING:
return DataType.STRING;

case DATE:
return DataType.STRING;
return DataType.DATE;

case TIME:
return DataType.TIME32;

case TIMESTAMP:
return DataType.TIMESTAMP;

case BYTES:
return DataType.BYTES;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.alibaba.graphscope.gaia.proto.Common;
import com.alibaba.graphscope.gaia.proto.IrResult;
import com.alibaba.graphscope.gremlin.exception.GremlinResultParserException;
import com.google.common.base.Preconditions;

import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Vertex;
Expand All @@ -29,10 +30,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.time.*;
import java.util.*;
import java.util.stream.Collectors;

public class ParserUtils {
Expand Down Expand Up @@ -91,6 +90,51 @@ private static Object parseCommonValue(Common.Value value) {
return value.getF64();
case STR:
return value.getStr();
case DATE:
Preconditions.checkArgument(
value.getDate().getItem() >= 0,
"Date prior to 1970-00-00 is not supported, got %d",
value.getDate().getItem());
return new Date(((long) value.getDate().getItem()) * 24 * 60 * 60 * 1000);
case TIME:
Preconditions.checkArgument(
value.getTime().getItem() >= 0,
"Time of day must be greater than 00:00:00, got %d",
value.getTime().getItem());
// gremlin-python doesn't support local time
//
// see also:
// https://github.com/apache/tinkerpop/blob/master/gremlin-python/src/main/python/gremlin_python/structure/io/graphbinaryV1.py#L105-L107
OffsetTime time =
LocalTime.ofNanoOfDay(((long) value.getTime().getItem()) * 1000_000L)
.atOffset(ZoneOffset.UTC);
// to ISO-8601 formats: HH:mm:ss.SSSSSS
//
// see also:
// - https://docs.oracle.com/javase/8/docs/api/java/time/LocalTime.html#toString--
// - https://docs.oracle.com/javase/8/docs/api/java/time/OffsetTime.html#toString--
return time.toString();
case TIMESTAMP:
Preconditions.checkArgument(
value.getTimestamp().getItem() >= 0,
"Timestamp prior to 1970-00-00 00:00:00 is not supported, got %d",
value.getTimestamp().getItem());
// gremlin-python will convert timestamp to float, that isn't what we want
//
// see also:
// https://github.com/apache/tinkerpop/blob/master/gremlin-python/src/main/python/gremlin_python/statics.py#L48
//
// We use java.util.Instant rather than java.sql.Timestamp for a UTC timestamp value
OffsetDateTime ts =
Instant.ofEpochSecond(
value.getTimestamp().getItem() / 1000L,
value.getTimestamp().getItem() % 1000L * 1000_000L)
.atOffset(ZoneOffset.UTC);
// to ISO-8601 format: uuuu-MM-dd'T'HH:mm:ss.SSSSSSXXXXX
//
// see also:
// https://docs.oracle.com/javase/8/docs/api/java/time/OffsetDateTime.html#toString--
return ts.toString();
case PAIR_ARRAY:
Common.PairArray pairs = value.getPairArray();
Map pairInMap = new HashMap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
use std::fmt;

use ahash::HashMap;
use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
use dyn_type::DateTimeFormats;
use dyn_type::Object;
use dyn_type::Primitives;
use global_query::store_api::prelude::Property;
Expand Down Expand Up @@ -52,6 +54,18 @@ fn encode_runtime_prop_val(prop_val: Property) -> Object {
Property::Double(d) => Object::Primitive(Primitives::Float(d)),
Property::Bytes(v) => Object::Blob(v.into_boxed_slice()),
Property::String(s) => Object::String(s),
Property::Date(s) => match NaiveDate::parse_from_str(&s, "%Y-%m-%d") {
Ok(date) => Object::DateFormat(DateTimeFormats::Date(date)),
Err(_) => match NaiveTime::parse_from_str(&s, "%H:%M:%S.%6f") {
Ok(time) => Object::DateFormat(DateTimeFormats::Time(time)),
Err(_) => match NaiveDateTime::parse_from_str(&s, "%Y-%m-%d %H:%M:%S.%6f") {
Ok(datetime) => Object::DateFormat(DateTimeFormats::DateTime(datetime)),
Err(_) => {
unimplemented!("Failed to parse the datetime/timestamp property value: '{}'", s)
}
},
},
},
_ => unimplemented!(),
}
}
Expand Down
1 change: 1 addition & 0 deletions interactive_engine/executor/store/global_query/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
chrono = "0.4"
log = "0.4"
itertools = "0.10"
byteorder = "1.4.3"
Expand Down
Loading
Loading