diff --git a/python/ray/data/_internal/cluster_autoscaler/default_cluster_autoscaler_v2.py b/python/ray/data/_internal/cluster_autoscaler/default_cluster_autoscaler_v2.py
index 31eedb3bee35..f36b8bd0cebd 100644
--- a/python/ray/data/_internal/cluster_autoscaler/default_cluster_autoscaler_v2.py
+++ b/python/ray/data/_internal/cluster_autoscaler/default_cluster_autoscaler_v2.py
@@ -4,13 +4,16 @@
 from collections import defaultdict
 from dataclasses import dataclass
 from logging import getLogger
-from typing import TYPE_CHECKING, Dict, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, Optional
 
 import ray
 from .default_autoscaling_coordinator import (
     DefaultAutoscalingCoordinator,
 )
-from ray.data._internal.average_calculator import TimeWindowAverageCalculator
+from .resource_utilization_gauge import (
+    ResourceUtilizationGauge,
+    RollingLogicalUtilizationGauge,
+)
 from ray.data._internal.cluster_autoscaler import ClusterAutoscaler
 from ray.data._internal.execution.interfaces.execution_options import ExecutionResources
 
@@ -25,22 +28,26 @@
 class _NodeResourceSpec:
 
     cpu: int
+    gpu: int
     mem: int
 
     def __post_init__(self):
         assert isinstance(self.cpu, int)
         assert self.cpu >= 0
+        assert isinstance(self.gpu, int)
+        assert self.gpu >= 0
         assert isinstance(self.mem, int)
         assert self.mem >= 0
 
     @classmethod
-    def of(cls, cpu, mem):
+    def of(cls, *, cpu=0, gpu=0, mem=0):
         cpu = math.floor(cpu)
+        gpu = math.floor(gpu)
         mem = math.floor(mem)
-        return cls(cpu, mem)
+        return cls(cpu=cpu, gpu=gpu, mem=mem)
 
     def to_bundle(self):
-        return {"CPU": self.cpu, "memory": self.mem}
+        return {"CPU": self.cpu, "GPU": self.gpu, "memory": self.mem}
 
 
 class DefaultClusterAutoscalerV2(ClusterAutoscaler):
@@ -58,9 +65,6 @@ class DefaultClusterAutoscalerV2(ClusterAutoscaler):
         termination.
 
     Notes:
-      * For now, we assume GPUs are only used by actor pools. So cluster autoscaling
-        doesn't need to consider GPU nodes. GPU nodes will scale up as the actor
-        pools that require GPUs scale up.
       * It doesn't consider multiple concurrent Datasets for now, as the cluster
         utilization is calculated by "dataset_usage / global_resources".
     """
@@ -86,25 +90,24 @@ def __init__(
         topology: "Topology",
         resource_manager: "ResourceManager",
         execution_id: str,
+        resource_utilization_calculator: Optional[ResourceUtilizationGauge] = None,
         cluster_scaling_up_util_threshold: float = DEFAULT_CLUSTER_SCALING_UP_UTIL_THRESHOLD,  # noqa: E501
         cluster_scaling_up_delta: float = DEFAULT_CLUSTER_SCALING_UP_DELTA,
         cluster_util_avg_window_s: float = DEFAULT_CLUSTER_UTIL_AVG_WINDOW_S,
         cluster_util_check_interval_s: float = DEFAULT_CLUSTER_UTIL_CHECK_INTERVAL_S,
     ):
+        if resource_utilization_calculator is None:
+            assert cluster_util_check_interval_s >= 0, cluster_util_check_interval_s
+            resource_utilization_calculator = RollingLogicalUtilizationGauge(
+                resource_manager, cluster_util_avg_window_s=cluster_util_avg_window_s
+            )
+
+        self._resource_utilization_calculator = resource_utilization_calculator
         # Threshold of cluster utilization to trigger scaling up.
         self._cluster_scaling_up_util_threshold = cluster_scaling_up_util_threshold
         assert cluster_scaling_up_delta > 0
         self._cluster_scaling_up_delta = cluster_scaling_up_delta
         assert cluster_util_avg_window_s > 0
-        # Calculator to calculate the average of cluster CPU utilization.
-        self._cluster_cpu_util_calculator = TimeWindowAverageCalculator(
-            window_s=cluster_util_avg_window_s,
-        )
-        # Calculator to calculate the average of cluster memory utilization.
-        self._cluster_mem_util_calculator = TimeWindowAverageCalculator(
-            window_s=cluster_util_avg_window_s,
-        )
-        assert cluster_util_check_interval_s >= 0
         self._cluster_util_check_interval_s = cluster_util_check_interval_s
         # Last time when the cluster utilization was checked.
         self._last_cluster_util_check_time = 0
@@ -118,37 +121,26 @@ def __init__(
         super().__init__(topology, resource_manager, execution_id)
 
     def _get_node_resource_spec_and_count(self) -> Dict[_NodeResourceSpec, int]:
-        """Get the unique node resource specs and their count in the cluster.
-
-        Similar to `_get_cluster_cpu_and_mem_util`, we only consider CPU and memory
-        resources.
-        """
-        # Filter out the head node and GPU nodes.
+        """Get the unique node resource specs and their count in the cluster."""
+        # Filter out the head node.
         node_resources = [
             node["Resources"]
             for node in ray.nodes()
-            if node["Alive"]
-            and "node:__internal_head__" not in node["Resources"]
-            and "GPU" not in node["Resources"]
+            if node["Alive"] and "node:__internal_head__" not in node["Resources"]
         ]
 
         nodes_resource_spec_count = defaultdict(int)
         for r in node_resources:
-            node_resource_spec = _NodeResourceSpec.of(r["CPU"], r["memory"])
+            node_resource_spec = _NodeResourceSpec.of(
+                cpu=r["CPU"], gpu=r.get("GPU", 0), mem=r["memory"]
+            )
             nodes_resource_spec_count[node_resource_spec] += 1
 
         return nodes_resource_spec_count
 
-    def _get_cluster_cpu_and_mem_util(self) -> Tuple[Optional[float], Optional[float]]:
-        """Return CPU and memory utilization of the cluster, or None if
-        no data was reported in the last `cluster_util_avg_window_s` seconds or
-        `_cluster_util_check_interval_s` seconds have not yet passed since the
-        last check.
-
-        We only consider CPU and memory utilization. Because for now we assume GPUs are
-        only used by actor pools. GPU node scaling will be handled by
-        `try_scale_up_or_down_actor_pool`.
-        """
+    def try_trigger_scaling(self):
+        # Note, should call this method before checking `_last_request_time`,
+        # in order to update the average cluster utilization.
         now = time.time()
         if (
             now - self._last_cluster_util_check_time
@@ -157,48 +149,21 @@ def _get_cluster_cpu_and_mem_util(self) -> Tuple[Optional[float], Optional[float
             # Update observed resource utilization
             self._last_cluster_util_check_time = now
 
-            cur_resource_usage = self._resource_manager.get_global_usage()
-            global_limits = self._resource_manager.get_global_limits()
-
-            if global_limits.cpu:
-                cpu_util = cur_resource_usage.cpu / global_limits.cpu
-            else:
-                cpu_util = 0
-            if global_limits.object_store_memory:
-                mem_util = (
-                    cur_resource_usage.object_store_memory
-                    / global_limits.object_store_memory
-                )
-            else:
-                mem_util = 0
-
-            self._cluster_cpu_util_calculator.report(cpu_util)
-            self._cluster_mem_util_calculator.report(mem_util)
-
-        avg_cpu_util = self._cluster_cpu_util_calculator.get_average()
-        avg_mem_util = self._cluster_mem_util_calculator.get_average()
-
-        return avg_cpu_util, avg_mem_util
-
-    def try_trigger_scaling(self):
-        # Note, should call this method before checking `_last_request_time`,
-        # in order to update the average cluster utilization.
-        cpu_util, mem_util = self._get_cluster_cpu_and_mem_util()
+            self._resource_utilization_calculator.observe()
 
         # Limit the frequency of autoscaling requests.
-        now = time.time()
         if now - self._last_request_time < self.MIN_GAP_BETWEEN_AUTOSCALING_REQUESTS:
             return
 
-        cpu_util = cpu_util or 0
-        mem_util = mem_util or 0
+        util = self._resource_utilization_calculator.get()
         if (
-            cpu_util < self._cluster_scaling_up_util_threshold
-            and mem_util < self._cluster_scaling_up_util_threshold
+            util.cpu < self._cluster_scaling_up_util_threshold
+            and util.gpu < self._cluster_scaling_up_util_threshold
+            and util.object_store_memory < self._cluster_scaling_up_util_threshold
         ):
             logger.debug(
                 "Cluster utilization is below threshold: "
-                f"CPU={cpu_util:.2f}, memory={mem_util:.2f}."
+                f"CPU={util.cpu:.2f}, GPU={util.gpu:.2f}, memory={util.object_store_memory:.2f}."
             )
             # Still send an empty request when upscaling is not needed,
             # to renew our registration on AutoscalingCoordinator.
@@ -211,10 +176,10 @@ def try_trigger_scaling(self):
         if logger.isEnabledFor(logging.DEBUG):
             debug_msg = (
                 "Scaling up cluster. Current utilization: "
-                f"CPU={cpu_util:.2f}, memory={mem_util:.2f}."
+                f"CPU={util.cpu:.2f}, GPU={util.gpu:.2f}, object_store_memory={util.object_store_memory:.2f}."
                 " Requesting resources:"
             )
-        # TODO(hchen): We scale up all CPU nodes by the same delta for now.
+        # TODO(hchen): We scale up all nodes by the same delta for now.
         # We may want to distinguish different node types based on their individual
         # utilization.
         for node_resource_spec, count in node_resource_spec_count.items():
diff --git a/python/ray/data/_internal/cluster_autoscaler/resource_utilization_gauge.py b/python/ray/data/_internal/cluster_autoscaler/resource_utilization_gauge.py
new file mode 100644
index 000000000000..0c4ae2698e9f
--- /dev/null
+++ b/python/ray/data/_internal/cluster_autoscaler/resource_utilization_gauge.py
@@ -0,0 +1,73 @@
+import abc
+
+from ray.data._internal.average_calculator import TimeWindowAverageCalculator
+from ray.data._internal.execution.interfaces import ExecutionResources
+from ray.data._internal.execution.resource_manager import ResourceManager
+
+ClusterUtil = ExecutionResources
+
+
+class ResourceUtilizationGauge(abc.ABC):
+    @abc.abstractmethod
+    def observe(self):
+        """Observe the cluster utilization."""
+        ...
+
+    @abc.abstractmethod
+    def get(self) -> ClusterUtil:
+        """Get the resource cluster utilization."""
+        ...
+
+
+class RollingLogicalUtilizationGauge(ResourceUtilizationGauge):
+
+    # Default time window in seconds to calculate the average of cluster utilization.
+    DEFAULT_CLUSTER_UTIL_AVG_WINDOW_S: int = 10
+
+    def __init__(
+        self,
+        resource_manager: ResourceManager,
+        *,
+        cluster_util_avg_window_s: float = DEFAULT_CLUSTER_UTIL_AVG_WINDOW_S,
+    ):
+        self._resource_manager = resource_manager
+
+        self._cluster_cpu_util_calculator = TimeWindowAverageCalculator(
+            cluster_util_avg_window_s
+        )
+        self._cluster_gpu_util_calculator = TimeWindowAverageCalculator(
+            cluster_util_avg_window_s
+        )
+        self._cluster_obj_mem_util_calculator = TimeWindowAverageCalculator(
+            cluster_util_avg_window_s
+        )
+
+    def observe(self):
+        """Report the cluster utilization based on global usage / global limits."""
+
+        def save_div(numerator, denominator):
+            if not denominator:
+                return 0
+            else:
+                return numerator / denominator
+
+        global_usage = self._resource_manager.get_global_usage()
+        global_limits = self._resource_manager.get_global_limits()
+
+        cpu_util = save_div(global_usage.cpu, global_limits.cpu)
+        gpu_util = save_div(global_usage.gpu, global_limits.gpu)
+        obj_store_mem_util = save_div(
+            global_usage.object_store_memory, global_limits.object_store_memory
+        )
+
+        self._cluster_cpu_util_calculator.report(cpu_util)
+        self._cluster_gpu_util_calculator.report(gpu_util)
+        self._cluster_obj_mem_util_calculator.report(obj_store_mem_util)
+
+    def get(self) -> ExecutionResources:
+        """Get the average cluster utilization based on global usage / global limits."""
+        return ExecutionResources(
+            cpu=self._cluster_cpu_util_calculator.get_average(),
+            gpu=self._cluster_gpu_util_calculator.get_average(),
+            object_store_memory=self._cluster_obj_mem_util_calculator.get_average(),
+        )
diff --git a/python/ray/data/tests/test_default_cluster_autoscaler_v2.py b/python/ray/data/tests/test_default_cluster_autoscaler_v2.py
index 0b93a832629f..b787f467f6ef 100644
--- a/python/ray/data/tests/test_default_cluster_autoscaler_v2.py
+++ b/python/ray/data/tests/test_default_cluster_autoscaler_v2.py
@@ -1,4 +1,3 @@
-import unittest
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -8,6 +7,21 @@
     DefaultClusterAutoscalerV2,
     _NodeResourceSpec,
 )
+from ray.data._internal.cluster_autoscaler.resource_utilization_gauge import (
+    ResourceUtilizationGauge,
+)
+from ray.data._internal.execution.interfaces.execution_options import ExecutionResources
+
+
+class StubUtilizationGauge(ResourceUtilizationGauge):
+    def __init__(self, utilization: ExecutionResources):
+        self._utilization = utilization
+
+    def observe(self):
+        pass
+
+    def get(self):
+        return self._utilization
 
 
 @pytest.fixture(autouse=True)
@@ -26,7 +40,7 @@ def patch_autoscaling_coordinator():
             yield
 
 
-class TestClusterAutoscaling(unittest.TestCase):
+class TestClusterAutoscaling:
     """Tests for cluster autoscaling functions in DefaultClusterAutoscalerV2."""
 
     def setup_class(self):
@@ -94,36 +108,54 @@ def test_get_node_resource_spec_and_count(self):
 
         expected = {
             _NodeResourceSpec.of(
-                self._node_type1["CPU"], self._node_type1["memory"]
+                cpu=self._node_type1["CPU"],
+                gpu=self._node_type1.get("GPU", 0),
+                mem=self._node_type1["memory"],
             ): 2,
             _NodeResourceSpec.of(
-                self._node_type2["CPU"], self._node_type2["memory"]
+                cpu=self._node_type2["CPU"],
+                gpu=self._node_type2.get("GPU", 0),
+                mem=self._node_type2["memory"],
+            ): 1,
+            _NodeResourceSpec.of(
+                cpu=self._node_type3["CPU"],
+                gpu=self._node_type3.get("GPU", 0),
+                mem=self._node_type3["memory"],
             ): 1,
         }
 
         with patch("ray.nodes", return_value=node_table):
             assert autoscaler._get_node_resource_spec_and_count() == expected
 
+    @pytest.mark.parametrize("cpu_util", [0.5, 0.75])
+    @pytest.mark.parametrize("gpu_util", [0.5, 0.75])
+    @pytest.mark.parametrize("mem_util", [0.5, 0.75])
     @patch(
-        "ray.data._internal.cluster_autoscaler.default_cluster_autoscaler_v2.DefaultClusterAutoscalerV2._send_resource_request",  # noqa: E501
-    )
-    def test_try_scale_up_cluster(self, _send_resource_request):
+        "ray.data._internal.cluster_autoscaler.default_cluster_autoscaler_v2.DefaultClusterAutoscalerV2._send_resource_request"
+    )  # noqa: E501
+    def test_try_scale_up_cluster(
+        self, _send_resource_request, cpu_util, gpu_util, mem_util
+    ):
+
         # Test _try_scale_up_cluster
+        scale_up_threshold = 0.75
         scale_up_delta = 1
+        utilization = ExecutionResources(
+            cpu=cpu_util, gpu=gpu_util, object_store_memory=mem_util
+        )
+
         autoscaler = DefaultClusterAutoscalerV2(
             topology=MagicMock(),
             resource_manager=MagicMock(),
             execution_id="test_execution_id",
             cluster_scaling_up_delta=scale_up_delta,
+            resource_utilization_calculator=StubUtilizationGauge(utilization),
+            cluster_scaling_up_util_threshold=scale_up_threshold,
         )
         _send_resource_request.assert_called_with([])
 
-        resource_spec1 = _NodeResourceSpec.of(
-            self._node_type1["CPU"], self._node_type1["memory"]
-        )
-        resource_spec2 = _NodeResourceSpec.of(
-            self._node_type2["CPU"], self._node_type2["memory"]
-        )
+        resource_spec1 = _NodeResourceSpec.of(cpu=4, gpu=0, mem=1000)
+        resource_spec2 = _NodeResourceSpec.of(cpu=8, gpu=1, mem=1000)
         autoscaler._get_node_resource_spec_and_count = MagicMock(
             return_value={
                 resource_spec1: 2,
@@ -131,40 +163,39 @@ def test_try_scale_up_cluster(self, _send_resource_request):
             },
         )
 
-        # Test different CPU/memory utilization combinations.
-        scale_up_threshold = (
-            DefaultClusterAutoscalerV2.DEFAULT_CLUSTER_SCALING_UP_UTIL_THRESHOLD
+        autoscaler.try_trigger_scaling()
+
+        # Should scale up if any resource is above the threshold.
+        should_scale_up = (
+            cpu_util >= scale_up_threshold
+            or gpu_util >= scale_up_threshold
+            or mem_util >= scale_up_threshold
         )
-        for cpu_util in [scale_up_threshold / 2, scale_up_threshold]:
-            for mem_util in [scale_up_threshold / 2, scale_up_threshold]:
-                # Should scale up if either CPU or memory utilization is above
-                # the threshold.
-                should_scale_up = (
-                    cpu_util >= scale_up_threshold or mem_util >= scale_up_threshold
-                )
-                autoscaler._get_cluster_cpu_and_mem_util = MagicMock(
-                    return_value=(cpu_util, mem_util),
-                )
-                autoscaler.try_trigger_scaling()
-                if not should_scale_up:
-                    _send_resource_request.assert_called_with([])
-                else:
-                    expected_resource_request = [
-                        {
-                            "CPU": self._node_type1["CPU"],
-                            "memory": self._node_type1["memory"],
-                        }
-                    ] * (2 + 1)
-                    expected_resource_request.extend(
-                        [
-                            {
-                                "CPU": self._node_type2["CPU"],
-                                "memory": self._node_type2["memory"],
-                            }
-                        ]
-                        * (1 + 1)
-                    )
-                    _send_resource_request.assert_called_with(expected_resource_request)
+        if not should_scale_up:
+            _send_resource_request.assert_called_with([])
+        else:
+            expected_num_resource_spec1_requested = 2 + scale_up_delta
+            expected_resource_request = [
+                {
+                    "CPU": resource_spec1.cpu,
+                    "GPU": resource_spec1.gpu,
+                    "memory": resource_spec1.mem,
+                }
+            ] * expected_num_resource_spec1_requested
+
+            expected_num_resource_spec2_requested = 1 + scale_up_delta
+            expected_resource_request.extend(
+                [
+                    {
+                        "CPU": resource_spec2.cpu,
+                        "GPU": resource_spec2.gpu,
+                        "memory": resource_spec2.mem,
+                    }
+                ]
+                * expected_num_resource_spec2_requested
+            )
+
+            _send_resource_request.assert_called_with(expected_resource_request)
 
 
 if __name__ == "__main__":