diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 4567b5b414301..54c240e84243a 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -73,6 +73,8 @@
     "ffill",
     "first",
     "head",
+    "idxmax",
+    "idxmin",
     "last",
     "median",
     "nunique",
@@ -588,6 +590,8 @@ class GroupByCythonAgg:
             "prod",
             "min",
             "max",
+            "idxmin",
+            "idxmax",
             "mean",
             "median",
             "var",
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index 240b262ca6151..4e80be8fb0fc6 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -517,8 +517,8 @@ listed below, those with a ``*`` do *not* have a Cython-optimized implementation
         :meth:`~.DataFrameGroupBy.count`;Compute the number of non-NA values in the groups
         :meth:`~.DataFrameGroupBy.cov` * ;Compute the covariance of the groups
         :meth:`~.DataFrameGroupBy.first`;Compute the first occurring value in each group
-        :meth:`~.DataFrameGroupBy.idxmax` *;Compute the index of the maximum value in each group
-        :meth:`~.DataFrameGroupBy.idxmin` *;Compute the index of the minimum value in each group
+        :meth:`~.DataFrameGroupBy.idxmax`;Compute the index of the maximum value in each group
+        :meth:`~.DataFrameGroupBy.idxmin`;Compute the index of the minimum value in each group
         :meth:`~.DataFrameGroupBy.last`;Compute the last occurring value in each group
         :meth:`~.DataFrameGroupBy.max`;Compute the maximum value in each group
         :meth:`~.DataFrameGroupBy.mean`;Compute the mean of each group
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index b74f93942c411..e844ebf0e8440 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -303,6 +303,7 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
 - Performance improvement in :meth:`Index.difference` (:issue:`55108`)
 - Performance improvement in :meth:`Series.duplicated` for pyarrow dtypes (:issue:`55255`)
+- Performance improvement in :meth:`SeriesGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`DataFrameGroupBy.idxmin` (:issue:`54234`)
 - Performance improvement when indexing with more than 4 keys (:issue:`54550`)
 - Performance improvement when localizing time to UTC (:issue:`55241`)
 
@@ -403,10 +404,11 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Bug in :class:`.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)
+- Bug in :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, and :meth:`.SeriesGroupBy.idxmax` would not retain :class:`.Categorical` dtype when the index was a :class:`.CategoricalIndex` that contained NA values (:issue:`54234`)
+- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` when ``observed=False`` and ``f="idxmin"`` or ``f="idxmax"`` would incorrectly raise on unobserved categories (:issue:`54234`)
 - Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
 - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
 - Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
--
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi
index d165ddd6c8afa..609663c721950 100644
--- a/pandas/_libs/groupby.pyi
+++ b/pandas/_libs/groupby.pyi
@@ -181,6 +181,18 @@ def group_min(
     mask: np.ndarray | None = ...,
     result_mask: np.ndarray | None = ...,
 ) -> None: ...
+def group_idxmin_idxmax(
+    out: npt.NDArray[np.intp],
+    counts: npt.NDArray[np.int64],
+    values: np.ndarray,  # ndarray[groupby_t, ndim=2]
+    labels: npt.NDArray[np.intp],
+    min_count: int = ...,
+    is_datetimelike: bool = ...,
+    mask: np.ndarray | None = ...,
+    name: str = ...,
+    skipna: bool = ...,
+    result_mask: np.ndarray | None = ...,
+) -> None: ...
 def group_cummin(
     out: np.ndarray,  # groupby_t[:, ::1]
     values: np.ndarray,  # ndarray[groupby_t, ndim=2]
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 7635b261d4149..6f9b33b042959 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1794,6 +1794,151 @@ cdef group_min_max(
         )
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def group_idxmin_idxmax(
+    intp_t[:, ::1] out,
+    int64_t[::1] counts,
+    ndarray[numeric_object_t, ndim=2] values,
+    const intp_t[::1] labels,
+    Py_ssize_t min_count=-1,
+    bint is_datetimelike=False,
+    const uint8_t[:, ::1] mask=None,
+    str name="idxmin",
+    bint skipna=True,
+    uint8_t[:, ::1] result_mask=None,
+):
+    """
+    Compute index of minimum/maximum of columns of `values`, in row groups `labels`.
+
+    This function only computes the row number where the minimum/maximum occurs, we'll
+    take the corresponding index value after this function.
+
+    Parameters
+    ----------
+    out : np.ndarray[intp, ndim=2]
+        Array to store result in.
+    counts : np.ndarray[int64]
+        Input as a zeroed array, populated by group sizes during algorithm
+    values : np.ndarray[numeric_object_t, ndim=2]
+        Values to find column-wise min/max of.
+    labels : np.ndarray[np.intp]
+        Labels to group by.
+    min_count : Py_ssize_t, default -1
+        The minimum number of non-NA group elements, NA result if threshold
+        is not met.
+    is_datetimelike : bool
+        True if `values` contains datetime-like entries.
+    name : {"idxmin", "idxmax"}, default "idxmin"
+        Whether to compute idxmin or idxmax.
+    mask : ndarray[bool, ndim=2], optional
+        If not None, indices represent missing values,
+        otherwise the mask will not be used
+    skipna : bool, default True
+        Flag to ignore nan values during truth testing
+    result_mask : ndarray[bool, ndim=2], optional
+        If not None, these specify locations in the output that are NA.
+        Modified in-place.
+
+    Notes
+    -----
+    This method modifies the `out` parameter, rather than returning an object.
+    `counts` is modified to hold group sizes
+    """
+    cdef:
+        Py_ssize_t i, j, N, K, lab
+        numeric_object_t val
+        numeric_object_t[:, ::1] group_min_or_max
+        bint uses_mask = mask is not None
+        bint isna_entry
+        bint compute_max = name == "idxmax"
+
+    assert name == "idxmin" or name == "idxmax"
+
+    # TODO(cython3):
+    # Instead of `labels.shape[0]` use `len(labels)`
+    if not len(values) == labels.shape[0]:
+        raise AssertionError("len(index) != len(labels)")
+
+    N, K = (<object>values).shape
+
+    if numeric_object_t is object:
+        group_min_or_max = np.empty((<object>out).shape, dtype=object)
+    else:
+        group_min_or_max = np.empty_like(out, dtype=values.dtype)
+    if N > 0 and K > 0:
+        # When N or K is zero, we never use group_min_or_max
+        group_min_or_max[:] = _get_min_or_max(
+            values[0, 0], compute_max, is_datetimelike
+        )
+
+    # When using transform, we need a valid value for take in the case
+    # a category is not observed; these values will be dropped
+    out[:] = 0
+
+    # TODO(cython3): De-duplicate once conditional-nogil is available
+    if numeric_object_t is object:
+        for i in range(N):
+            lab = labels[i]
+            if lab < 0:
+                continue
+
+            for j in range(K):
+                if not skipna and out[lab, j] == -1:
+                    # Once we've hit NA there is no going back
+                    continue
+                val = values[i, j]
+
+                if uses_mask:
+                    isna_entry = mask[i, j]
+                else:
+                    # TODO(cython3): use _treat_as_na here
+                    isna_entry = checknull(val)
+
+                if isna_entry:
+                    if not skipna:
+                        out[lab, j] = -1
+                else:
+                    if compute_max:
+                        if val > group_min_or_max[lab, j]:
+                            group_min_or_max[lab, j] = val
+                            out[lab, j] = i
+                    else:
+                        if val < group_min_or_max[lab, j]:
+                            group_min_or_max[lab, j] = val
+                            out[lab, j] = i
+    else:
+        with nogil:
+            for i in range(N):
+                lab = labels[i]
+                if lab < 0:
+                    continue
+
+                for j in range(K):
+                    if not skipna and out[lab, j] == -1:
+                        # Once we've hit NA there is no going back
+                        continue
+                    val = values[i, j]
+
+                    if uses_mask:
+                        isna_entry = mask[i, j]
+                    else:
+                        isna_entry = _treat_as_na(val, is_datetimelike)
+
+                    if isna_entry:
+                        if not skipna:
+                            out[lab, j] = -1
+                    else:
+                        if compute_max:
+                            if val > group_min_or_max[lab, j]:
+                                group_min_or_max[lab, j] = val
+                                out[lab, j] = i
+                        else:
+                            if val < group_min_or_max[lab, j]:
+                                group_min_or_max[lab, j] = val
+                                out[lab, j] = i
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_max(
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 9a2fe38fe3ed4..eec833c600177 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2701,12 +2701,22 @@ def _groupby_op(
         dtype = self.dtype
         if how in ["sum", "prod", "cumsum", "cumprod", "skew"]:
             raise TypeError(f"{dtype} type does not support {how} operations")
-        if how in ["min", "max", "rank"] and not dtype.ordered:
+        if how in ["min", "max", "rank", "idxmin", "idxmax"] and not dtype.ordered:
             # raise TypeError instead of NotImplementedError to ensure we
             #  don't go down a group-by-group path, since in the empty-groups
             #  case that would fail to raise
             raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
-        if how not in ["rank", "any", "all", "first", "last", "min", "max"]:
+        if how not in [
+            "rank",
+            "any",
+            "all",
+            "first",
+            "last",
+            "min",
+            "max",
+            "idxmin",
+            "idxmax",
+        ]:
             if kind == "transform":
                 raise TypeError(f"{dtype} type does not support {how} operations")
             raise TypeError(f"{dtype} dtype does not support aggregation '{how}'")
@@ -2716,7 +2726,7 @@ def _groupby_op(
         if how == "rank":
             assert self.ordered  # checked earlier
             npvalues = self._ndarray
-        elif how in ["first", "last", "min", "max"]:
+        elif how in ["first", "last", "min", "max", "idxmin", "idxmax"]:
             npvalues = self._ndarray
             result_mask = np.zeros(ngroups, dtype=bool)
         else:
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 56d3711c7d13b..c8447397c7bfe 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -1506,9 +1506,13 @@ def _groupby_op(
             arity = op._cython_arity.get(op.how, 1)
             result_mask = np.tile(result_mask, (arity, 1)).T
 
-        # res_values should already have the correct dtype, we just need to
-        #  wrap in a MaskedArray
-        return self._maybe_mask_result(res_values, result_mask)
+        if op.how in ["idxmin", "idxmax"]:
+            # Result values are indexes to take, keep as ndarray
+            return res_values
+        else:
+            # res_values should already have the correct dtype, we just need to
+            #  wrap in a MaskedArray
+            return self._maybe_mask_result(res_values, result_mask)
 
 
 def transpose_homogeneous_masked_arrays(
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 838792ceb45e9..2d50adbc93f9d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -68,7 +68,10 @@
     deprecate_nonkeyword_arguments,
     doc,
 )
-from pandas.util._exceptions import find_stack_level
+from pandas.util._exceptions import (
+    find_stack_level,
+    rewrite_warning,
+)
 from pandas.util._validators import (
     validate_ascending,
     validate_bool_kwarg,
@@ -11371,7 +11374,20 @@ def _get_data() -> DataFrame:
                     row_index = np.tile(np.arange(nrows), ncols)
                     col_index = np.repeat(np.arange(ncols), nrows)
                     ser = Series(arr, index=col_index, copy=False)
-                    result = ser.groupby(row_index).agg(name, **kwds)
+                    # GroupBy will raise a warning with SeriesGroupBy as the object,
+                    # likely confusing users
+                    with rewrite_warning(
+                        target_message=(
+                            f"The behavior of SeriesGroupBy.{name} with all-NA values"
+                        ),
+                        target_category=FutureWarning,
+                        new_message=(
+                            f"The behavior of {type(self).__name__}.{name} with all-NA "
+                            "values, or any-NA and skipna=False, is deprecated. In "
+                            "a future version this will raise ValueError"
+                        ),
+                    ):
+                        result = ser.groupby(row_index).agg(name, **kwds)
                     result.index = df.index
                     if not skipna and name not in ("any", "all"):
                         mask = df.isna().to_numpy(dtype=np.bool_).any(axis=1)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index f1b97fc5e88f6..67b5c483fcb97 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -89,6 +89,7 @@ class providing the base-class of operations.
 )
 from pandas.core.dtypes.missing import (
     isna,
+    na_value_for_dtype,
     notna,
 )
 
@@ -1879,13 +1880,15 @@ def _agg_general(
         min_count: int = -1,
         *,
         alias: str,
-        npfunc: Callable,
+        npfunc: Callable | None = None,
+        **kwargs,
     ):
         result = self._cython_agg_general(
             how=alias,
             alt=npfunc,
             numeric_only=numeric_only,
             min_count=min_count,
+            **kwargs,
         )
         return result.__finalize__(self.obj, method="groupby")
 
@@ -1935,7 +1938,7 @@ def _agg_py_fallback(
     def _cython_agg_general(
         self,
         how: str,
-        alt: Callable,
+        alt: Callable | None = None,
         numeric_only: bool = False,
         min_count: int = -1,
         **kwargs,
@@ -1963,16 +1966,19 @@ def array_func(values: ArrayLike) -> ArrayLike:
                 # TODO: avoid special casing SparseArray here
                 if how in ["any", "all"] and isinstance(values, SparseArray):
                     pass
-                elif how in ["any", "all", "std", "sem"]:
+                elif alt is None or how in ["any", "all", "std", "sem"]:
                     raise  # TODO: re-raise as TypeError?  should not be reached
             else:
                 return result
 
+            assert alt is not None
             result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
             return result
 
         new_mgr = data.grouped_reduce(array_func)
         res = self._wrap_agged_manager(new_mgr)
+        if how in ["idxmin", "idxmax"]:
+            res = self._wrap_idxmax_idxmin(res)
         out = self._wrap_aggregated_output(res)
         if self.axis == 1:
             out = out.infer_objects(copy=False)
@@ -5730,12 +5736,12 @@ def _idxmax_idxmin(
         axis: Axis | None | lib.NoDefault = lib.no_default,
         skipna: bool = True,
         numeric_only: bool = False,
-    ):
+    ) -> NDFrameT:
         """Compute idxmax/idxmin.
 
         Parameters
         ----------
-        how: {"idxmin", "idxmax"}
+        how : {'idxmin', 'idxmax'}
             Whether to compute idxmin or idxmax.
         axis : {{0 or 'index', 1 or 'columns'}}, default None
             The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
@@ -5785,18 +5791,24 @@ def _idxmax_idxmin(
                 if numeric_only:
                     data = data._get_numeric_data()
                 raise_err = len(data.columns) > 0
-        else:
-            raise_err = False
-        if raise_err:
-            raise ValueError(
-                f"Can't get {how} of an empty group due to unobserved categories. "
-                "Specify observed=True in groupby instead."
-            )
 
-        try:
-            if self.obj.ndim == 1:
-                result = self._op_via_apply(how, skipna=skipna)
-            else:
+            if raise_err:
+                raise ValueError(
+                    f"Can't get {how} of an empty group due to unobserved categories. "
+                    "Specify observed=True in groupby instead."
+                )
+        elif not skipna:
+            if self._obj_with_exclusions.isna().any(axis=None):
+                warnings.warn(
+                    f"The behavior of {type(self).__name__}.{how} with all-NA "
+                    "values, or any-NA and skipna=False, is deprecated. In a future "
+                    "version this will raise ValueError",
+                    FutureWarning,
+                    stacklevel=find_stack_level(),
+                )
+
+        if axis == 1:
+            try:
 
                 def func(df):
                     method = getattr(df, how)
@@ -5806,28 +5818,49 @@ def func(df):
                 result = self._python_apply_general(
                     func, self._obj_with_exclusions, not_indexed_same=True
                 )
-        except ValueError as err:
-            name = "argmax" if how == "idxmax" else "argmin"
-            if f"attempt to get {name} of an empty sequence" in str(err):
-                raise ValueError(
-                    f"Can't get {how} of an empty group due to unobserved categories. "
-                    "Specify observed=True in groupby instead."
-                ) from None
-            raise
+            except ValueError as err:
+                name = "argmax" if how == "idxmax" else "argmin"
+                if f"attempt to get {name} of an empty sequence" in str(err):
+                    raise ValueError(
+                        f"Can't get {how} of an empty group due to unobserved "
+                        "categories. Specify observed=True in groupby instead."
+                    ) from None
+                raise
+            return result
 
-        result = result.astype(self.obj.index.dtype) if result.empty else result
+        result = self._agg_general(
+            numeric_only=numeric_only,
+            min_count=1,
+            alias=how,
+            skipna=skipna,
+        )
+        return result
 
-        if not skipna:
-            has_na_value = result.isnull().any(axis=None)
-            if has_na_value:
-                warnings.warn(
-                    f"The behavior of {type(self).__name__}.{how} with all-NA "
-                    "values, or any-NA and skipna=False, is deprecated. In a future "
-                    "version this will raise ValueError",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
+    def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT:
+        index = self.obj._get_axis(self.axis)
+        if res.size == 0:
+            result = res.astype(index.dtype)
+        else:
+            if isinstance(index, MultiIndex):
+                index = index.to_flat_index()
+            values = res._values
+            assert isinstance(values, np.ndarray)
+            na_value = na_value_for_dtype(index.dtype, compat=False)
+            if isinstance(res, Series):
+                # mypy: expression has type "Series", variable has type "NDFrameT"
+                result = res._constructor(  # type: ignore[assignment]
+                    index.array.take(values, allow_fill=True, fill_value=na_value),
+                    index=res.index,
+                    name=res.name,
                 )
-
+            else:
+                data = {}
+                for k, column_values in enumerate(values.T):
+                    data[k] = index.array.take(
+                        column_values, allow_fill=True, fill_value=na_value
+                    )
+                result = self.obj._constructor(data, index=res.index)
+                result.columns = res.columns
         return result
 
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 607059e5183ec..6923defee4d14 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -133,6 +133,8 @@ def __init__(self, kind: str, how: str, has_dropped_na: bool) -> None:
             "all": functools.partial(libgroupby.group_any_all, val_test="all"),
             "sum": "group_sum",
             "prod": "group_prod",
+            "idxmin": functools.partial(libgroupby.group_idxmin_idxmax, name="idxmin"),
+            "idxmax": functools.partial(libgroupby.group_idxmin_idxmax, name="idxmax"),
             "min": "group_min",
             "max": "group_max",
             "mean": "group_mean",
@@ -187,7 +189,7 @@ def _get_cython_function(
                     f"function is not implemented for this dtype: "
                     f"[how->{how},dtype->{dtype_str}]"
                 )
-            elif how in ["std", "sem"]:
+            elif how in ["std", "sem", "idxmin", "idxmax"]:
                 # We have a partial object that does not have __signatures__
                 return f
             elif how == "skew":
@@ -268,6 +270,10 @@ def _get_out_dtype(self, dtype: np.dtype) -> np.dtype:
 
         if how == "rank":
             out_dtype = "float64"
+        elif how in ["idxmin", "idxmax"]:
+            # The Cython implementation only produces the row number; we'll take
+            # from the index using this in post processing
+            out_dtype = "intp"
         else:
             if dtype.kind in "iufcb":
                 out_dtype = f"{dtype.kind}{dtype.itemsize}"
@@ -399,7 +405,16 @@ def _call_cython_op(
         result = maybe_fill(np.empty(out_shape, dtype=out_dtype))
         if self.kind == "aggregate":
             counts = np.zeros(ngroups, dtype=np.int64)
-            if self.how in ["min", "max", "mean", "last", "first", "sum"]:
+            if self.how in [
+                "idxmin",
+                "idxmax",
+                "min",
+                "max",
+                "mean",
+                "last",
+                "first",
+                "sum",
+            ]:
                 func(
                     out=result,
                     counts=counts,
@@ -463,10 +478,10 @@ def _call_cython_op(
                 **kwargs,
             )
 
-        if self.kind == "aggregate":
+        if self.kind == "aggregate" and self.how not in ["idxmin", "idxmax"]:
             # i.e. counts is defined.  Locations where count<min_count
             # need to have the result set to np.nan, which may require casting,
-            # see GH#40767
+            # see GH#40767. For idxmin/idxmax is handled specially via post-processing
             if result.dtype.kind in "iu" and not is_datetimelike:
                 # if the op keeps the int dtypes, we have to use 0
                 cutoff = max(0 if self.how in ["sum", "prod"] else 1, min_count)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 5992731643d31..de72afccbf1c0 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2064,7 +2064,7 @@ def get_categorical_invalid_expected():
         with pytest.raises(klass, match=msg):
             get_result()
 
-        if op in ["min", "max"] and isinstance(columns, list):
+        if op in ["min", "max", "idxmin", "idxmax"] and isinstance(columns, list):
             # i.e. DataframeGroupBy, not SeriesGroupBy
             result = get_result(numeric_only=True)
             expected = get_categorical_invalid_expected()
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index aaac59515259b..b40c8f45b6d19 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -564,9 +564,10 @@ def test_categorical_reducers(reduction_func, observed, sort, as_index, index_ki
         values = expected["y"].values.tolist()
         if index_kind == "single":
             values = [np.nan if e == 4 else e for e in values]
+            expected["y"] = pd.Categorical(values, categories=[1, 2, 3])
         else:
             values = [(np.nan, np.nan) if e == (4, 4) else e for e in values]
-        expected["y"] = values
+            expected["y"] = values
     if reduction_func == "size":
         # size, unlike other methods, has the desired behavior in GH#49519
         expected = expected.rename(columns={0: "size"})
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index 46bf324fad1d7..0f4a73e4e2a38 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -568,7 +568,7 @@ def test_groupby_raises_category_on_category(
             assert not hasattr(gb, "corrwith")
             return
 
-    empty_groups = any(group.empty for group in gb.groups.values())
+    empty_groups = not observed and any(group.empty for group in gb.groups.values())
     if (
         not observed
         and how != "transform"
@@ -579,6 +579,9 @@ def test_groupby_raises_category_on_category(
         assert not empty_groups
         # TODO: empty_groups should be true due to unobserved categorical combinations
         empty_groups = True
+    if how == "transform":
+        # empty groups will be ignored
+        empty_groups = False
 
     klass, msg = {
         "all": (None, ""),
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index add3c94dcd36a..e9ad41ddad4f7 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -1649,7 +1649,7 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only):
     with tm.assert_produces_warning(FutureWarning, match=msg):
         result = gb.transform(how, 0, skipna, numeric_only)
     warn = None if skipna else FutureWarning
-    msg = f"The behavior of DataFrame.{how} with .* any-NA and skipna=False"
+    msg = f"The behavior of DataFrameGroupBy.{how} with .* any-NA and skipna=False"
     with tm.assert_produces_warning(warn, match=msg):
         expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only)
     tm.assert_frame_equal(result, expected)