Skip to content

Commit

Permalink
Merge pull request #114 from caraml-dev/disable-broadcast
Browse files Browse the repository at this point in the history
feat: disable broadcast join
  • Loading branch information
khorshuheng authored Mar 11, 2024
2 parents 15f8bda + 1037fc5 commit 6d0b6d5
Showing 1 changed file with 1 addition and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from pyspark.sql import DataFrame, SparkSession, Window
from pyspark.sql import functions as func
from pyspark.sql.functions import (
broadcast,
col,
expr,
monotonically_increasing_id,
Expand Down Expand Up @@ -636,7 +635,7 @@ def filter_feature_table_by_time_range(
time_range_filtered_df = (
time_range_filtered_df.repartition(200)
.join(
broadcast(entities_projected), on=feature_table.entity_names, how="inner",
entities_projected, on=feature_table.entity_names, how="inner",
)
.withColumn(
"distance",
Expand Down Expand Up @@ -668,7 +667,6 @@ def _read_and_verify_entity_df_from_source(
.options(**source.spark_read_options)
.load(source.spark_path)
)

mapped_entity_df = _map_column(entity_df, source.field_mapping)

if source.event_timestamp_column not in mapped_entity_df.columns:
Expand Down

0 comments on commit 6d0b6d5

Please sign in to comment.