diff --git a/.gitignore b/.gitignore
index 64d811d4bed..ab2dd6a91bb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,7 @@ lib/tbb
 
 # local make include
 /make/local
+/make/ucrt
 
 # python byte code
 *.pyc
diff --git a/make/compiler_flags b/make/compiler_flags
index 1552381f232..a4c6224e6da 100644
--- a/make/compiler_flags
+++ b/make/compiler_flags
@@ -161,6 +161,21 @@ ifeq ($(OS),Windows_NT)
     CXXFLAGS_OS ?= -m64
   endif
 
+  make/ucrt:
+    pound := \#
+    UCRT_STRING := $(shell echo '$(pound)include <windows.h>' | $(CXX) -E -dM -  | findstr _UCRT)
+    ifneq (,$(UCRT_STRING))
+      IS_UCRT ?= true
+    else
+      IS_UCRT ?= false
+    endif
+    $(shell echo "IS_UCRT ?= $(IS_UCRT)" > $(MATH)make/ucrt)
+
+  include make/ucrt
+  ifeq ($(IS_UCRT),true)
+    CXXFLAGS_OS += -D_UCRT
+  endif
+
   ifneq (gcc,$(CXX_TYPE))
     LDLIBS_OS ?= -static-libgcc
   else
diff --git a/make/libraries b/make/libraries
index 441f5b8c3e0..7dd5dcbfd45 100644
--- a/make/libraries
+++ b/make/libraries
@@ -139,6 +139,12 @@ ifeq (Linux, $(OS))
   SHELL = /usr/bin/env bash
 endif
 
+ifeq (Windows_NT, $(OS))
+  ifeq ($(IS_UCRT),true)
+    TBB_CXXFLAGS += -D_UCRT
+  endif
+endif
+
 # If brackets or spaces are found in MAKE on Windows
 # we error, as those characters cause issues when building.
 ifeq (Windows_NT, $(OS))
diff --git a/make/tests b/make/tests
index 14a2550f08d..fcf379d6520 100644
--- a/make/tests
+++ b/make/tests
@@ -101,12 +101,15 @@ HEADER_TESTS := $(addsuffix -test,$(call findfiles,stan,*.hpp))
 
 ifeq ($(OS),Windows_NT)
   DEV_NULL = nul
+  ifeq ($(IS_UCRT),true)
+    UCRT_NULL_FLAG = -S
+  endif
 else
   DEV_NULL = /dev/null
 endif
 
 %.hpp-test : %.hpp test/dummy.cpp
-	$(COMPILE.cpp) $(CXXFLAGS) -O0 -include $^ -o $(DEV_NULL) -Wunused-local-typedefs
+	$(COMPILE.cpp) $(CXXFLAGS) -O0 -include $^ $(UCRT_NULL_FLAG) -o $(DEV_NULL) -Wunused-local-typedefs
 
 test/dummy.cpp:
 	@mkdir -p test
diff --git a/makefile b/makefile
index f34da7b164b..f14ab40df4e 100644
--- a/makefile
+++ b/makefile
@@ -125,6 +125,7 @@ clean-deps:
 	@$(RM) $(call findfiles,test,*.d.*)
 	@$(RM) $(call findfiles,lib,*.d.*)
 	@$(RM) $(call findfiles,stan,*.dSYM)
+	@$(RM) $(call findfiles,make,ucrt)
 
 clean-all: clean clean-doxygen clean-deps clean-libraries
 
diff --git a/stan/math/opencl/kernel_generator.hpp b/stan/math/opencl/kernel_generator.hpp
index a3cb839d776..fd78ce3f9a8 100644
--- a/stan/math/opencl/kernel_generator.hpp
+++ b/stan/math/opencl/kernel_generator.hpp
@@ -108,7 +108,7 @@
 #include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>
 #include <stan/math/opencl/kernel_generator/name_generator.hpp>
 #include <stan/math/opencl/kernel_generator/type_str.hpp>
-
+#include <stan/math/opencl/kernel_generator/assignment_ops.hpp>
 #include <stan/math/opencl/kernel_generator/as_column_vector_or_scalar.hpp>
 #include <stan/math/opencl/kernel_generator/load.hpp>
 #include <stan/math/opencl/kernel_generator/scalar.hpp>
diff --git a/stan/math/opencl/kernel_generator/as_operation_cl.hpp b/stan/math/opencl/kernel_generator/as_operation_cl.hpp
index c6e5e819174..b87e4fb0a9d 100644
--- a/stan/math/opencl/kernel_generator/as_operation_cl.hpp
+++ b/stan/math/opencl/kernel_generator/as_operation_cl.hpp
@@ -2,6 +2,7 @@
 #define STAN_MATH_OPENCL_KERNEL_GENERATOR_AS_OPERATION_CL_HPP
 #ifdef STAN_OPENCL
 
+#include <stan/math/opencl/kernel_generator/assignment_ops.hpp>
 #include <stan/math/opencl/kernel_generator/operation_cl.hpp>
 #include <stan/math/opencl/kernel_generator/load.hpp>
 #include <stan/math/opencl/kernel_generator/scalar.hpp>
@@ -19,11 +20,12 @@ namespace math {
 /**
  * Converts any valid kernel generator expression into an operation. This is an
  * overload for operations - a no-op
+ * @tparam AssignOp ignored
  * @tparam T_operation type of the input operation
  * @param a an operation
  * @return operation
  */
-template <typename T_operation,
+template <assign_op_cl AssignOp = assign_op_cl::equals, typename T_operation,
           typename = std::enable_if_t<std::is_base_of<
               operation_cl_base, std::remove_reference_t<T_operation>>::value>>
 inline T_operation&& as_operation_cl(T_operation&& a) {
@@ -33,11 +35,13 @@ inline T_operation&& as_operation_cl(T_operation&& a) {
 /**
  * Converts any valid kernel generator expression into an operation. This is an
  * overload for scalars (arithmetic types). It wraps them into \c scalar_.
+ * @tparam AssignOp ignored
  * @tparam T_scalar type of the input scalar
  * @param a scalar
  * @return \c scalar_ wrapping the input
  */
-template <typename T_scalar, typename = require_arithmetic_t<T_scalar>,
+template <assign_op_cl AssignOp = assign_op_cl::equals, typename T_scalar,
+          typename = require_arithmetic_t<T_scalar>,
           require_not_same_t<T_scalar, bool>* = nullptr>
 inline scalar_<T_scalar> as_operation_cl(const T_scalar a) {
   return scalar_<T_scalar>(a);
@@ -47,23 +51,29 @@ inline scalar_<T_scalar> as_operation_cl(const T_scalar a) {
  * Converts any valid kernel generator expression into an operation. This is an
  * overload for bool scalars. It wraps them into \c scalar_<char> as \c bool can
  * not be used as a type of a kernel argument.
+ * @tparam AssignOp ignored
  * @param a scalar
  * @return \c scalar_<char> wrapping the input
  */
-inline scalar_<char> as_operation_cl(const bool a) { return scalar_<char>(a); }
+template <assign_op_cl AssignOp = assign_op_cl::equals>
+inline scalar_<char> as_operation_cl(const bool a) {
+  return scalar_<char>(a);
+}
 
 /**
  * Converts any valid kernel generator expression into an operation. This is an
  * overload for \c matrix_cl. It wraps them into into \c load_.
+ * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object
+ *  is assigned using standard or compound assign.
  * @tparam T_matrix_cl \c matrix_cl
  * @param a \c matrix_cl
  * @return \c load_ wrapping the input
  */
-template <typename T_matrix_cl,
+template <assign_op_cl AssignOp = assign_op_cl::equals, typename T_matrix_cl,
           typename = require_any_t<is_matrix_cl<T_matrix_cl>,
                                    is_arena_matrix_cl<T_matrix_cl>>>
-inline load_<T_matrix_cl> as_operation_cl(T_matrix_cl&& a) {
-  return load_<T_matrix_cl>(std::forward<T_matrix_cl>(a));
+inline load_<T_matrix_cl, AssignOp> as_operation_cl(T_matrix_cl&& a) {
+  return load_<T_matrix_cl, AssignOp>(std::forward<T_matrix_cl>(a));
 }
 
 /**
@@ -73,12 +83,16 @@ inline load_<T_matrix_cl> as_operation_cl(T_matrix_cl&& a) {
  * as_operation_cl_t<T>. If the return value of \c as_operation_cl() would be a
  * rvalue reference, the reference is removed, so that a variable of this type
  * actually stores the value.
+ * @tparam T a `matrix_cl` or `Scalar` type
+ * @tparam AssignOp an optional `assign_op_cl` that dictates whether the object
+ *  is assigned using standard or compound assign.
  */
-template <typename T>
-using as_operation_cl_t = std::conditional_t<
-    std::is_lvalue_reference<T>::value,
-    decltype(as_operation_cl(std::declval<T>())),
-    std::remove_reference_t<decltype(as_operation_cl(std::declval<T>()))>>;
+template <typename T, assign_op_cl AssignOp = assign_op_cl::equals>
+using as_operation_cl_t
+    = std::conditional_t<std::is_lvalue_reference<T>::value,
+                         decltype(as_operation_cl<AssignOp>(std::declval<T>())),
+                         std::remove_reference_t<decltype(
+                             as_operation_cl<AssignOp>(std::declval<T>()))>>;
 
 /** @}*/
 }  // namespace math
diff --git a/stan/math/opencl/kernel_generator/assignment_ops.hpp b/stan/math/opencl/kernel_generator/assignment_ops.hpp
new file mode 100644
index 00000000000..a365492a8e1
--- /dev/null
+++ b/stan/math/opencl/kernel_generator/assignment_ops.hpp
@@ -0,0 +1,74 @@
+#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_ASSIGNMENT_OPS
+#define STAN_MATH_OPENCL_KERNEL_GENERATOR_ASSIGNMENT_OPS
+#ifdef STAN_OPENCL
+#include <stan/math/prim/meta/is_detected.hpp>
+
+namespace stan {
+namespace math {
+
+/**
+ * Ops that decide the type of assignment for LHS operations
+ */
+enum class assign_op_cl {
+  equals,
+  plus_equals,
+  minus_equals,
+  divide_equals,
+  multiply_equals
+};
+
+namespace internal {
+/**
+ * @param value A static constexpr const char* member for printing assignment
+ * ops
+ */
+template <assign_op_cl assign_op>
+struct assignment_op_str_impl;
+
+template <>
+struct assignment_op_str_impl<assign_op_cl::equals> {
+  static constexpr const char* value = " = ";
+};
+
+template <>
+struct assignment_op_str_impl<assign_op_cl::plus_equals> {
+  static constexpr const char* value = " += ";
+};
+
+template <>
+struct assignment_op_str_impl<assign_op_cl::minus_equals> {
+  static constexpr const char* value = " -= ";
+};
+
+template <>
+struct assignment_op_str_impl<assign_op_cl::divide_equals> {
+  static constexpr const char* value = " /= ";
+};
+
+template <>
+struct assignment_op_str_impl<assign_op_cl::multiply_equals> {
+  static constexpr const char* value = " *= ";
+};
+
+template <typename, typename = void>
+struct assignment_op_str : assignment_op_str_impl<assign_op_cl::equals> {};
+
+template <typename T>
+struct assignment_op_str<T, void_t<decltype(T::assignment_op)>>
+    : assignment_op_str_impl<T::assignment_op> {};
+
+}  // namespace internal
+
+/**
+ * @tparam T A type that has an `assignment_op` static constexpr member type
+ * @return The types assignment op as a constexpr const char*
+ */
+template <typename T>
+inline constexpr const char* assignment_op() noexcept {
+  return internal::assignment_op_str<std::decay_t<T>>::value;
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/stan/math/opencl/kernel_generator/load.hpp b/stan/math/opencl/kernel_generator/load.hpp
index 319557959b6..6f48252d89e 100644
--- a/stan/math/opencl/kernel_generator/load.hpp
+++ b/stan/math/opencl/kernel_generator/load.hpp
@@ -4,6 +4,8 @@
 
 #include <stan/math/opencl/matrix_cl.hpp>
 #include <stan/math/opencl/matrix_cl_view.hpp>
+#include <stan/math/opencl/kernel_generator/assignment_ops.hpp>
+
 #include <stan/math/opencl/kernel_generator/type_str.hpp>
 #include <stan/math/opencl/kernel_generator/name_generator.hpp>
 #include <stan/math/opencl/kernel_generator/operation_cl.hpp>
@@ -23,17 +25,20 @@ namespace math {
 /**
  * Represents an access to a \c matrix_cl in kernel generator expressions
  * @tparam T \c matrix_cl
+ * @tparam AssignOp tells higher level operations whether the final operation
+ * should be an assignment or a type of compound assignment.
  */
-template <typename T>
+template <typename T, assign_op_cl AssignOp = assign_op_cl::equals>
 class load_
-    : public operation_cl_lhs<load_<T>,
+    : public operation_cl_lhs<load_<T, AssignOp>,
                               typename std::remove_reference_t<T>::type> {
  protected:
   T a_;
 
  public:
+  static constexpr assign_op_cl assignment_op = AssignOp;
   using Scalar = typename std::remove_reference_t<T>::type;
-  using base = operation_cl<load_<T>, Scalar>;
+  using base = operation_cl<load_<T, AssignOp>, Scalar>;
   using base::var_name_;
   static_assert(disjunction<is_matrix_cl<T>, is_arena_matrix_cl<T>>::value,
                 "load_: argument a must be a matrix_cl<T>!");
@@ -51,9 +56,13 @@ class load_
    * Creates a deep copy of this expression.
    * @return copy of \c *this
    */
-  inline load_<T&> deep_copy() & { return load_<T&>(a_); }
-  inline load_<const T&> deep_copy() const& { return load_<const T&>(a_); }
-  inline load_<T> deep_copy() && { return load_<T>(std::forward<T>(a_)); }
+  inline load_<T&, AssignOp> deep_copy() & { return load_<T&, AssignOp>(a_); }
+  inline load_<const T&, AssignOp> deep_copy() const& {
+    return load_<const T&, AssignOp>(a_);
+  }
+  inline load_<T, AssignOp> deep_copy() && {
+    return load_<T, AssignOp>(std::forward<T>(a_));
+  }
 
   /**
    * Generates kernel code for this expression.
@@ -327,6 +336,7 @@ class load_
     }
   }
 };
+
 /** @}*/
 }  // namespace math
 }  // namespace stan
diff --git a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp
index a82d033c5f3..167cd44ed13 100644
--- a/stan/math/opencl/kernel_generator/multi_result_kernel.hpp
+++ b/stan/math/opencl/kernel_generator/multi_result_kernel.hpp
@@ -5,6 +5,7 @@
 #include <stan/math/prim/err/check_size_match.hpp>
 #include <stan/math/prim/meta/is_kernel_expression.hpp>
 #include <stan/math/opencl/kernel_generator/name_generator.hpp>
+#include <stan/math/opencl/kernel_generator/assignment_ops.hpp>
 #include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>
 #include <stan/math/opencl/kernel_generator/calc_if.hpp>
 #include <stan/math/opencl/kernel_generator/check_cl.hpp>
@@ -326,25 +327,83 @@ class results_cl {
    * Incrementing \c results_ object by \c expressions_cl object
    * executes the kernel that evaluates expressions and increments results by
    * those expressions.
+   * @tparam AssignOp an optional `assign_op_cl` that dictates whether the
+   * object is assigned using standard or compound assign.
    * @tparam T_expressions types of expressions
    * @param exprs expressions
    */
-  template <typename... T_expressions,
+  template <assign_op_cl AssignOp = assign_op_cl::plus_equals,
+            typename... T_expressions,
             typename = std::enable_if_t<sizeof...(T_results)
                                         == sizeof...(T_expressions)>>
-  void operator+=(const expressions_cl<T_expressions...>& exprs) {
+  void compound_assignment_impl(const expressions_cl<T_expressions...>& exprs) {
     index_apply<sizeof...(T_expressions)>([this, &exprs](auto... Is) {
-      auto tmp = std::tuple_cat(make_assignment_pair(
+      auto tmp = std::tuple_cat(make_assignment_pair<AssignOp>(
           std::get<Is>(results_), std::get<Is>(exprs.expressions_))...);
       index_apply<std::tuple_size<decltype(tmp)>::value>(
           [this, &tmp](auto... Is2) {
             assignment_impl(std::make_tuple(std::make_pair(
-                std::get<Is2>(tmp).first,
-                std::get<Is2>(tmp).first + std::get<Is2>(tmp).second)...));
+                std::get<Is2>(tmp).first, std::get<Is2>(tmp).second)...));
           });
     });
   }
 
+  /**
+   * Incrementing \c results_ object by \c expressions_cl object
+   * executes the kernel that evaluates expressions and increments results by
+   * those expressions.
+   * @tparam T_expressions types of expressions
+   * @param exprs expressions
+   */
+  template <typename... T_expressions,
+            typename = std::enable_if_t<sizeof...(T_results)
+                                        == sizeof...(T_expressions)>>
+  void operator+=(const expressions_cl<T_expressions...>& exprs) {
+    compound_assignment_impl<assign_op_cl::plus_equals>(exprs);
+  }
+
+  /**
+   * Decrement \c results_ object by \c expressions_cl object
+   * executes the kernel that evaluates expressions and increments results by
+   * those expressions.
+   * @tparam T_expressions types of expressions
+   * @param exprs expressions
+   */
+  template <typename... T_expressions,
+            typename = std::enable_if_t<sizeof...(T_results)
+                                        == sizeof...(T_expressions)>>
+  void operator-=(const expressions_cl<T_expressions...>& exprs) {
+    compound_assignment_impl<assign_op_cl::minus_equals>(exprs);
+  }
+
+  /**
+   * Elementwise divide \c results_ object by \c expressions_cl object
+   * executes the kernel that evaluates expressions and increments results by
+   * those expressions.
+   * @tparam T_expressions types of expressions
+   * @param exprs expressions
+   */
+  template <typename... T_expressions,
+            typename = std::enable_if_t<sizeof...(T_results)
+                                        == sizeof...(T_expressions)>>
+  void operator/=(const expressions_cl<T_expressions...>& exprs) {
+    compound_assignment_impl<assign_op_cl::divide_equals>(exprs);
+  }
+
+  /**
+   * Elementwise multiply \c results_ object by \c expressions_cl object
+   * executes the kernel that evaluates expressions and increments results by
+   * those expressions.
+   * @tparam T_expressions types of expressions
+   * @param exprs expressions
+   */
+  template <typename... T_expressions,
+            typename = std::enable_if_t<sizeof...(T_results)
+                                        == sizeof...(T_expressions)>>
+  void operator*=(const expressions_cl<T_expressions...>& exprs) {
+    compound_assignment_impl<assign_op_cl::multiply_equals>(exprs);
+  }
+
   /**
    * Generates kernel source for evaluating given expressions into results held
    * by \c this.
@@ -525,30 +584,44 @@ class results_cl {
   /**
    * Makes a std::pair of one result and one expression and wraps it into a
    * tuple.
+   * @tparam AssignOp an optional `assign_op_cl` that dictates whether the
+   * object is assigned using standard or compound assign.
+   * @tparam T_result An non scalar type that is normally an `result_cl`
+   * operation holding a `matrix_cl`
+   * @tparam T_expression An expression of set of operations on `matrix_cl` and
+   * scalar types.
    * @param result result
    * @param expression expression
    * @return a tuple of pair of result and expression
    */
-  template <typename T_result, typename T_expression,
+  template <assign_op_cl AssignOp = assign_op_cl::equals, typename T_result,
+            typename T_expression,
             require_all_not_t<is_without_output<T_expression>,
                               conjunction<internal::is_scalar_check<T_result>,
                                           std::is_arithmetic<std::decay_t<
                                               T_expression>>>>* = nullptr>
   static auto make_assignment_pair(T_result&& result,
                                    T_expression&& expression) {
-    return std::make_tuple(
-        std::pair<as_operation_cl_t<T_result>, as_operation_cl_t<T_expression>>(
-            as_operation_cl(std::forward<T_result>(result)),
-            as_operation_cl(std::forward<T_expression>(expression))));
+    return std::make_tuple(std::pair<as_operation_cl_t<T_result, AssignOp>,
+                                     as_operation_cl_t<T_expression>>(
+        as_operation_cl<AssignOp>(std::forward<T_result>(result)),
+        as_operation_cl(std::forward<T_expression>(expression))));
   }
 
   /**
    * If an expression does not need to be calculated this returns an empty tuple
+   * @tparam AssignOp an optional `assign_op_cl` that dictates whether the
+   * object is assigned using standard or compound assign.
+   * @tparam T_result An non scalar type that is normally an `result_cl`
+   * operation holding a `matrix_cl`
+   * @tparam T_expression An expression of set of operations on `matrix_cl` and
+   * scalar types.
    * @param result result
    * @param expression expression
    * @return a tuple of pair of result and expression
    */
-  template <typename T_result, typename T_expression,
+  template <assign_op_cl AssignOp = assign_op_cl::equals, typename T_result,
+            typename T_expression,
             require_t<is_without_output<T_expression>>* = nullptr>
   static auto make_assignment_pair(T_result&& result,
                                    T_expression&& expression) {
@@ -558,11 +631,16 @@ class results_cl {
   /**
    * Checks on scalars are done separately in this overload instead of in
    * kernel.
+   * @tparam AssignOp an optional `assign_op_cl` that dictates whether the
+   * object is assigned using standard or compound assign.
+   * @tparam T_check A scalar type
+   * @tparam T_pass An integral type
    * @param result result - check
    * @param pass bool scalar
    * @return an empty tuple
    */
-  template <typename T_check, typename T_pass,
+  template <assign_op_cl AssignOp = assign_op_cl::equals, typename T_check,
+            typename T_pass,
             require_t<internal::is_scalar_check<T_check>>* = nullptr,
             require_integral_t<T_pass>* = nullptr>
   static std::tuple<> make_assignment_pair(T_check&& result, T_pass&& pass) {
diff --git a/stan/math/opencl/kernel_generator/operation_cl.hpp b/stan/math/opencl/kernel_generator/operation_cl.hpp
index ef2d4977c97..43a50b59654 100644
--- a/stan/math/opencl/kernel_generator/operation_cl.hpp
+++ b/stan/math/opencl/kernel_generator/operation_cl.hpp
@@ -4,6 +4,7 @@
 
 #include <stan/math/prim/meta.hpp>
 #include <stan/math/prim/err/check_nonnegative.hpp>
+#include <stan/math/opencl/kernel_generator/assignment_ops.hpp>
 #include <stan/math/opencl/kernel_generator/type_str.hpp>
 #include <stan/math/opencl/kernel_generator/name_generator.hpp>
 #include <stan/math/opencl/matrix_cl_view.hpp>
@@ -60,7 +61,7 @@ struct kernel_parts {
             args + other.args};
   }
 
-  kernel_parts operator+=(const kernel_parts& other) {
+  kernel_parts& operator+=(const kernel_parts& other) {
     includes += other.includes;
     declarations += other.declarations;
     initialization += other.initialization;
@@ -74,6 +75,24 @@ struct kernel_parts {
   }
 };
 
+inline std::ostream& operator<<(std::ostream& os, kernel_parts& parts) {
+  os << "args:" << std::endl;
+  os << parts.args.substr(0, parts.args.size() - 2) << std::endl;
+  os << "Decl:" << std::endl;
+  os << parts.declarations << std::endl;
+  os << "Init:" << std::endl;
+  os << parts.initialization << std::endl;
+  os << "body:" << std::endl;
+  os << parts.body << std::endl;
+  os << "body_suffix:" << std::endl;
+  os << parts.body_suffix << std::endl;
+  os << "reduction_1d:" << std::endl;
+  os << parts.reduction_1d << std::endl;
+  os << "reduction_2d:" << std::endl;
+  os << parts.reduction_2d << std::endl;
+  return os;
+}
+
 /**
  * Base for all kernel generator operations.
  * @tparam Derived derived type
@@ -201,7 +220,7 @@ class operation_cl : public operation_cl_base {
         generated, generated_all, ng, row_index_name, col_index_name, false);
     kernel_parts out_parts = result.get_kernel_parts_lhs(
         generated, generated_all, ng, row_index_name, col_index_name);
-    out_parts.body += " = " + derived().var_name_ + ";\n";
+    out_parts.body += assignment_op<T_result>() + derived().var_name_ + ";\n";
     parts += out_parts;
     return parts;
   }
diff --git a/stan/math/opencl/prim/normal_lccdf.hpp b/stan/math/opencl/prim/normal_lccdf.hpp
index e9e7f97c079..24a49a70f6f 100644
--- a/stan/math/opencl/prim/normal_lccdf.hpp
+++ b/stan/math/opencl/prim/normal_lccdf.hpp
@@ -82,13 +82,12 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> normal_lccdf(
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> sigma_deriv_cl;
 
-  results(check_y_not_nan, check_mu_finite, check_sigma_positive, lccdf_cl,
-          y_deriv_cl, mu_deriv_cl, sigma_deriv_cl)
-      = expressions(y_not_nan_expr, mu_finite_expr, sigma_positive_expr,
-                    lccdf_expr, calc_if<!is_constant<T_y_cl>::value>(y_deriv),
+  results(check_y_not_nan, check_mu_finite, check_sigma_positive)
+      = expressions(y_not_nan_expr, mu_finite_expr, sigma_positive_expr);
+  results(lccdf_cl, y_deriv_cl, mu_deriv_cl, sigma_deriv_cl)
+      = expressions(lccdf_expr, calc_if<!is_constant<T_y_cl>::value>(y_deriv),
                     calc_if<!is_constant<T_loc_cl>::value>(mu_deriv),
                     calc_if<!is_constant<T_scale_cl>::value>(sigma_deriv));
-
   T_partials_return lccdf = LOG_HALF + sum(from_matrix_cl(lccdf_cl));
 
   auto ops_partials = make_partials_propagator(y_col, mu_col, sigma_col);
diff --git a/stan/math/opencl/rev.hpp b/stan/math/opencl/rev.hpp
index 7303a5d34e3..f48de0b806f 100644
--- a/stan/math/opencl/rev.hpp
+++ b/stan/math/opencl/rev.hpp
@@ -50,6 +50,7 @@
 #include <stan/math/opencl/rev/fmax.hpp>
 #include <stan/math/opencl/rev/fmin.hpp>
 #include <stan/math/opencl/rev/fmod.hpp>
+#include <stan/math/opencl/rev/grad.hpp>
 #include <stan/math/opencl/rev/hypot.hpp>
 #include <stan/math/opencl/rev/inv.hpp>
 #include <stan/math/opencl/rev/inv_cloglog.hpp>
diff --git a/stan/math/opencl/rev/adjoint_results.hpp b/stan/math/opencl/rev/adjoint_results.hpp
index c42297b418d..4a2dede57d0 100644
--- a/stan/math/opencl/rev/adjoint_results.hpp
+++ b/stan/math/opencl/rev/adjoint_results.hpp
@@ -41,22 +41,24 @@ class adjoint_results_cl : protected results_cl<T_results...> {
     index_apply<sizeof...(T_expressions)>([&](auto... Is) {
       auto scalars = std::tuple_cat(select_scalar_assignments(
           std::get<Is>(this->results_), std::get<Is>(exprs.expressions_))...);
-      auto nonscalars_tmp = std::tuple_cat(select_nonscalar_assignments(
-          std::get<Is>(this->results_), std::get<Is>(exprs.expressions_))...);
+      auto nonscalars_tmp = std::tuple_cat(
+          select_nonscalar_assignments<assign_op_cl::plus_equals>(
+              std::get<Is>(this->results_),
+              std::get<Is>(exprs.expressions_))...);
 
       index_apply<std::tuple_size<decltype(nonscalars_tmp)>::value>(
           [&](auto... Is_nonscal) {
-            auto nonscalars = std::make_tuple(std::make_pair(
-                std::get<Is_nonscal>(nonscalars_tmp).first,
-                std::get<Is_nonscal>(nonscalars_tmp).first
-                    + std::get<Is_nonscal>(nonscalars_tmp).second)...);
+            auto nonscalars = std::make_tuple(
+                std::make_pair(std::get<Is_nonscal>(nonscalars_tmp).first,
+                               std::get<Is_nonscal>(nonscalars_tmp).second)...);
 
             index_apply<std::tuple_size<decltype(scalars)>::value>(
                 [&](auto... Is_scal) {
                   // evaluate all expressions
                   this->assignment_impl(std::tuple_cat(
                       nonscalars,
-                      this->make_assignment_pair(
+                      this->template make_assignment_pair<
+                          assign_op_cl::plus_equals>(
                           std::get<2>(std::get<Is_scal>(scalars)),
                           sum_2d(std::get<1>(std::get<Is_scal>(scalars))))...));
 
@@ -102,6 +104,8 @@ class adjoint_results_cl : protected results_cl<T_results...> {
 
   /**
    * Selects assignments that have non-scalar var results.
+   * @tparam AssignOp an optional `assign_op_cl` that dictates whether the
+   * object is assigned using standard or compound assign.
    * @tparam T_result type of result. This overload is used for non-scalar vars.
    * @tparam T_expression type of expression
    * @param result result
@@ -109,16 +113,18 @@ class adjoint_results_cl : protected results_cl<T_results...> {
    * @return pair of result and expression or empty tuple (if the result is
    * check or the expression is `calc_if<false,T>`.
    */
-  template <typename T_result, typename T_expression,
+  template <assign_op_cl AssignOp, typename T_result, typename T_expression,
             require_not_stan_scalar_t<T_result>* = nullptr,
             require_st_var<T_result>* = nullptr>
-  auto select_nonscalar_assignments(const T_result& result,
+  auto select_nonscalar_assignments(T_result&& result,
                                     T_expression&& expression) {
-    return results_cl<T_results...>::make_assignment_pair(
+    return results_cl<T_results...>::template make_assignment_pair<AssignOp>(
         result.adj(), std::forward<T_expression>(expression));
   }
   /**
    * Selects assignments that have non-scalar var results.
+   * @tparam AssignOp an optional `assign_op_cl` that dictates whether the
+   * object is assigned using standard or compound assign.
    * @tparam T_result type of result. This overload is used for results that are
    * either scalars or not vars.
    * @tparam T_expression type of expression
@@ -127,7 +133,7 @@ class adjoint_results_cl : protected results_cl<T_results...> {
    * @return empty tuple
    */
   template <
-      typename T_result, typename T_expression,
+      assign_op_cl AssignOp, typename T_result, typename T_expression,
       std::enable_if_t<is_stan_scalar<T_result>::value
                        || !is_var<scalar_type_t<T_result>>::value>* = nullptr>
   auto select_nonscalar_assignments(T_result&& result,
diff --git a/stan/math/opencl/rev/grad.hpp b/stan/math/opencl/rev/grad.hpp
new file mode 100644
index 00000000000..f4a1dde1dc9
--- /dev/null
+++ b/stan/math/opencl/rev/grad.hpp
@@ -0,0 +1,30 @@
+#ifndef STAN_MATH_OPENCL_REV_FUN_GRAD_HPP
+#define STAN_MATH_OPENCL_REV_FUN_GRAD_HPP
+#ifdef STAN_OPENCL
+#include <stan/math/opencl/rev.hpp>
+
+namespace stan {
+namespace math {
+
+/**
+ * Propagate chain rule to calculate gradients starting from
+ * the specified variable.  Resizes the input vector to be the
+ * correct size.
+ *
+ * The grad() function does not itself recover any memory.  use
+ * <code>recover_memory()</code> or
+ * <code>recover_memory_nested()</code> to recover memory.
+ *
+ * @param[in] v Value of function being differentiated
+ * @param[in] x Variables being differentiated with respect to
+ * @param[out] g Gradient, d/dx v, evaluated at x.
+ */
+inline void grad(var& v, var_value<matrix_cl<double>>& x, Eigen::VectorXd& g) {
+  grad(v.vi_);
+  g = from_matrix_cl<Eigen::VectorXd>(x.adj());
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp
new file mode 100644
index 00000000000..3675dbd0a0f
--- /dev/null
+++ b/test/unit/math/opencl/kernel_generator/assignment_ops_test.cpp
@@ -0,0 +1,84 @@
+#ifdef STAN_OPENCL
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <stan/math/opencl/kernel_generator.hpp>
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/copy.hpp>
+#include <stan/math.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+#include <string>
+
+TEST(KernelGenerator, plus_equals) {
+  using stan::math::from_matrix_cl;
+  using stan::math::matrix_cl;
+  using stan::math::to_matrix_cl;
+  using stan::math::var;
+  using stan::math::var_value;
+  Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10);
+  matrix_cl<double> A_cl = to_matrix_cl(A);
+  matrix_cl<double> B_cl = to_matrix_cl(B);
+  matrix_cl<double> C_cl = to_matrix_cl(C);
+  C += A + B;
+  results(C_cl) += expressions(A_cl + B_cl);
+  Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl);
+  EXPECT_MATRIX_EQ(C_cl_host, C)
+}
+
+TEST(KernelGenerator, minus_equals) {
+  using stan::math::from_matrix_cl;
+  using stan::math::matrix_cl;
+  using stan::math::to_matrix_cl;
+  using stan::math::var;
+  using stan::math::var_value;
+  Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10);
+  matrix_cl<double> A_cl = to_matrix_cl(A);
+  matrix_cl<double> B_cl = to_matrix_cl(B);
+  matrix_cl<double> C_cl = to_matrix_cl(C);
+  C -= A + B;
+  results(C_cl) -= expressions(A_cl + B_cl);
+  Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl);
+  EXPECT_MATRIX_EQ(C_cl_host, C)
+}
+
+TEST(KernelGenerator, divide_equals) {
+  using stan::math::from_matrix_cl;
+  using stan::math::matrix_cl;
+  using stan::math::to_matrix_cl;
+  using stan::math::var;
+  using stan::math::var_value;
+  Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10);
+  matrix_cl<double> A_cl = to_matrix_cl(A);
+  matrix_cl<double> B_cl = to_matrix_cl(B);
+  matrix_cl<double> C_cl = to_matrix_cl(C);
+  C.array() /= A.array() + B.array();
+  results(C_cl) /= expressions(A_cl + B_cl);
+  Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl);
+  EXPECT_MATRIX_EQ(C_cl_host, C)
+}
+
+TEST(KernelGenerator, times_equals) {
+  using stan::math::from_matrix_cl;
+  using stan::math::matrix_cl;
+  using stan::math::to_matrix_cl;
+  using stan::math::var;
+  using stan::math::var_value;
+
+  Eigen::MatrixXd A = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd B = Eigen::MatrixXd::Random(10, 10);
+  Eigen::MatrixXd C = Eigen::MatrixXd::Random(10, 10);
+  matrix_cl<double> A_cl = to_matrix_cl(A);
+  matrix_cl<double> B_cl = to_matrix_cl(B);
+  matrix_cl<double> C_cl = to_matrix_cl(C);
+  C.array() *= A.array() + B.array();
+  results(C_cl) *= expressions(A_cl + B_cl);
+  Eigen::MatrixXd C_cl_host = from_matrix_cl(C_cl);
+  EXPECT_MATRIX_EQ(C_cl_host, C)
+}
+
+#endif
diff --git a/test/unit/math/opencl/rev/add_test.cpp b/test/unit/math/opencl/rev/add_test.cpp
index 15c4d501b2f..f678fbc255c 100644
--- a/test/unit/math/opencl/rev/add_test.cpp
+++ b/test/unit/math/opencl/rev/add_test.cpp
@@ -76,4 +76,29 @@ TEST(OpenCLPrim, add_rev_exceptions) {
   EXPECT_THROW(stan::math::add(md11, md22), std::invalid_argument);
 }
 
+TEST(OpenCLPrim, add_aliasing) {
+  stan::math::matrix_d d1(3, 3);
+  d1 << 1, 2, 3, 4, 5, 6, 7, 8, 9;
+  using stan::math::matrix_cl;
+  using stan::math::var;
+  using stan::math::var_value;
+  using varmat_cl = var_value<matrix_cl<double>>;
+  varmat_cl d11 = stan::math::to_matrix_cl(d1);
+  // Add the same matrix as the left and right hand side
+  var res = stan::math::sum(stan::math::add(d11, d11));
+  res.grad();
+  // Get back adjoints
+  Eigen::MatrixXd grad_res = stan::math::from_matrix_cl(d11.adj());
+  stan::math::recover_memory();
+  Eigen::Matrix<var, -1, -1> d_host = d1;
+  // Same op as above but on the host
+  var res_host = stan::math::sum(stan::math::add(d_host, d_host));
+  res_host.grad();
+  Eigen::MatrixXd grad_res_host = d_host.adj();
+  std::cout << "OpenCL Adjoints: " << std::endl;
+  std::cout << grad_res << std::endl;
+  std::cout << "CPU Adjoints: " << std::endl;
+  std::cout << grad_res_host << std::endl;
+}
+
 #endif
diff --git a/test/unit/math/opencl/rev/grad_test.cpp b/test/unit/math/opencl/rev/grad_test.cpp
new file mode 100644
index 00000000000..23d9ee330e7
--- /dev/null
+++ b/test/unit/math/opencl/rev/grad_test.cpp
@@ -0,0 +1,36 @@
+#ifdef STAN_OPENCL
+#include <stan/math.hpp>
+#include <test/unit/math/opencl/util.hpp>
+#include <test/unit/util.hpp>
+#include <gtest/gtest.h>
+#include <algorithm>
+
+TEST(OpenCLGradTest, exceptions) {
+  using stan::math::matrix_cl;
+  using stan::math::to_matrix_cl;
+  using stan::math::var;
+  using stan::math::var_value;
+  using varmat_cl = var_value<matrix_cl<double>>;
+  Eigen::VectorXd a(6);
+  a << 1, 2, 3, 4, 5, 6;
+  varmat_cl a_cl(to_matrix_cl(a));
+  varmat_cl b_cl(a_cl.block(0, 0, 3, 1));
+  varmat_cl c_cl(a_cl.block(3, 0, 3, 1));
+  Eigen::VectorXd ret_grads_cl(6);
+  using stan::math::add;
+  using stan::math::subtract;
+  var ret = stan::math::sum(add(b_cl, b_cl));
+  stan::math::grad(ret, a_cl, ret_grads_cl);
+  std::cout << "opencl grads: \n" << ret_grads_cl << std::endl;
+  stan::math::recover_memory();
+  Eigen::Matrix<var, -1, 1> a_host = a;
+  Eigen::Matrix<var, -1, 1> b_host = a_host.segment(0, 3);
+  Eigen::Matrix<var, -1, 1> c_host = a_host.segment(3, 3);
+  var ret_host = stan::math::sum(add(b_host, b_host));
+  Eigen::VectorXd ret_grads_host(6);
+  stan::math::grad(ret_host, a_host, ret_grads_host);
+  std::cout << "host grads: \n" << ret_grads_host << std::endl;
+  stan::math::recover_memory();
+}
+
+#endif