From 99b71dd9accc929c0b48ab15eaa64e4e9caca11b Mon Sep 17 00:00:00 2001
From: Arash Taheri-Dezfouli <ataheridezfouli@groq.com>
Date: Fri, 29 Sep 2023 14:58:32 -0700
Subject: [PATCH] GroqFlow release v4.2.1

---
 groqflow/justgroqit/compile.py                       | 12 ++++++++++++
 .../natural_language_processing/distilbert/README.md |  2 +-
 .../distilbert/distilbert.py                         |  7 ++++++-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/groqflow/justgroqit/compile.py b/groqflow/justgroqit/compile.py
index 0bb9682..e619d43 100644
--- a/groqflow/justgroqit/compile.py
+++ b/groqflow/justgroqit/compile.py
@@ -84,10 +84,22 @@ def fire(self, state: build.GroqState):
         if state.num_chips_used != 1:
             multichip_flag = f"--multichip={state.topology}"
             cmd = cmd + [multichip_flag]
+            if not any(
+                flag.startswith("--partition-mode=")
+                for flag in state.config.compiler_flags
+            ):
+                partition_mode_flag = "--partition-mode=daisy-chain"
+                cmd = cmd + [partition_mode_flag]
 
         if state.config.groqview:
             cmd = cmd + ["--groqview"]
 
+        # Add effort=standard by default to help with fit-ability
+        if not any(
+            flag.startswith("--effort=") for flag in state.config.compiler_flags
+        ):
+            cmd = cmd + ["--effort=standard"]
+
         # Add flags
         cmd = (
             cmd
diff --git a/proof_points/natural_language_processing/distilbert/README.md b/proof_points/natural_language_processing/distilbert/README.md
index b42b92b..b934a04 100644
--- a/proof_points/natural_language_processing/distilbert/README.md
+++ b/proof_points/natural_language_processing/distilbert/README.md
@@ -29,4 +29,4 @@ To build and evaluate DistilBERT:
 
 ## Expected Results
 
-It takes approximately 8 minutes for DistilBERT to build and about 2 minutes to evaluate the model's accuracy. The example returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation on 2 GroqCard™ accelerators within a GroqNode™ server.
+It takes approximately 8 minutes for DistilBERT to build and about 2 minutes to evaluate the model's accuracy. The example returns the accuracies for both the PyTorch implementation on a CPU and the Groq implementation on 4 GroqCard™ accelerators within a GroqNode™ server.
diff --git a/proof_points/natural_language_processing/distilbert/distilbert.py b/proof_points/natural_language_processing/distilbert/distilbert.py
index 9bffaab..48be522 100644
--- a/proof_points/natural_language_processing/distilbert/distilbert.py
+++ b/proof_points/natural_language_processing/distilbert/distilbert.py
@@ -33,7 +33,12 @@ def evaluate_distilbert(rebuild_policy=None, should_execute=True):
     # generate groq model
     build_name = "distilbert"
     groq_model = groqit(
-        pytorch_model, dummy_inputs, rebuild=rebuild_policy, build_name=build_name
+        pytorch_model,
+        dummy_inputs,
+        rebuild=rebuild_policy,
+        build_name=build_name,
+        num_chips=4,
+        compiler_flags=["--partition-mode=group-fit"],
     )
 
     # compute performance on CPU and GroqChip