From 733c30a915002504f8ffa97f81ea28e92b035209 Mon Sep 17 00:00:00 2001
From: Shreya Shankar <ss.shankar505@gmail.com>
Date: Tue, 24 Sep 2024 11:54:02 -0700
Subject: [PATCH] Show error when trying to execute resolve without blocking.

---
 README.md                    |  2 +-
 docetl/operations/resolve.py | 17 +++++++++++++++++
 docetl/runner.py             |  6 +++++-
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 8d603a54..ce9d70be 100644
--- a/README.md
+++ b/README.md
@@ -65,4 +65,4 @@ make tests-basic
 
 That's it! You've successfully installed DocETL and are ready to start processing documents.
 
-For more detailed information on usage and configuration, please refer to our [documentation](https://shreyashankar.github.io/docetl).
+For more detailed information on usage and configuration, please refer to our [documentation](https://ucbepic.github.io/docetl).
diff --git a/docetl/operations/resolve.py b/docetl/operations/resolve.py
index 9903d271..0daa78b7 100644
--- a/docetl/operations/resolve.py
+++ b/docetl/operations/resolve.py
@@ -23,6 +23,7 @@
     validate_output,
     gen_embedding,
 )
+from rich.prompt import Confirm
 
 
 def compare_pair(
@@ -195,6 +196,22 @@ def execute(self, input_data: List[Dict]) -> Tuple[List[Dict], float]:
         blocking_keys = self.config.get("blocking_keys", [])
         blocking_threshold = self.config.get("blocking_threshold")
         blocking_conditions = self.config.get("blocking_conditions", [])
+
+        if not blocking_threshold and not blocking_conditions:
+            # Prompt the user for confirmation
+            if self.status:
+                self.status.stop()
+            if not Confirm.ask(
+                f"[yellow]Warning: No blocking keys or conditions specified. "
+                f"This may result in a large number of comparisons. "
+                f"We recommend specifying at least one blocking key or condition, or using the optimizer to automatically come up with these. "
+                f"Do you want to continue without blocking?[/yellow]",
+            ):
+                raise ValueError("Operation cancelled by user.")
+
+            if self.status:
+                self.status.start()
+
         input_schema = self.config.get("input", {}).get("schema", {})
         if not blocking_keys:
             # Set them to all keys in the input data
diff --git a/docetl/runner.py b/docetl/runner.py
index 2c6ceb73..dc783f1c 100644
--- a/docetl/runner.py
+++ b/docetl/runner.py
@@ -229,7 +229,11 @@ def execute_step(
 
             operation_class = get_operation(op_object["type"])
             operation_instance = operation_class(
-                op_object, self.default_model, self.max_threads, self.console
+                op_object,
+                self.default_model,
+                self.max_threads,
+                self.console,
+                self.status,
             )
             if op_object["type"] == "equijoin":
                 left_data = self.datasets[op_object["left"]]