import abc
from pathlib import Path
from typing import Any, Callable, List, Optional, Set, Tuple, Union
import numpy as np
import torch
from torch.nn import Module
from torch.utils.data.dataloader import DataLoader
from ._logging import PathLike
from .approxapp import ApproxKnob, KnobsT
from .modeledapp import (
ICostModel,
IQoSModel,
LinearCostModel,
ModeledApp,
QoSModelP1,
QoSModelP2,
)
from .torchutil import ModuleIndexer, get_summary, move_to_device_recursively
[docs]class TorchApproxKnob(ApproxKnob):
"""Defines an approximation knob that knows
its own expected speedup ratio and what Modules it can apply to,
and can be applied to a torch.nn.Module to return an approximated Module."""
@property
@abc.abstractmethod
def deterministic(self) -> bool:
"""Returns true if approx knob does not contain randomness."""
pass
@property
@abc.abstractmethod
def expected_speedup(self) -> float:
"""The speedup this knob is expected to provide. Used for cost prediction."""
pass
[docs] @abc.abstractmethod
def is_applicable(self, op: Module) -> bool:
"""Returns True if this knob can be applied to this Module.
:param op: the module to check availability for.
:type op: torch.nn.Module
:rtype: torch.nn.Module
"""
pass
[docs] @abc.abstractmethod
def apply(self, op: Module) -> Module:
"""Applies knob to a Module and returns an approximated Module.
:param op: the module to apply approximation on.
:type op: torch.nn.Module
:rtype: torch.nn.Module
"""
pass
_default_device = f"cuda" if torch.cuda.is_available() else "cpu"
[docs]class TorchApp(ModeledApp, abc.ABC):
r"""Adaptor for approximable PyTorch Modules with tensor output.
A TorchApp stores the PyTorch Module, datasets for tuning and calibration,
set of available `TorchApproxKnob` each of which may be applied to some layer in the Module,
and the quality of service (QoS) metric of application (e.g., accuracy).
It provides empirical tuning and predictive tuning capability,
automatically supporting `.modeledapp.LinearCostModel`,
`.modeledapp.QoSModelP1`, and `.modeledapp.QoSModelP2`.
In contrast to `.approxapp.ApproxApp` and `.modeledapp.ModeledApp`,
there should be no need to inherit from `TorchApp` in most use cases.
:param app_name: Name of the application, which is used as an identifier in tuning sessions, etc.
:param module: The PyTorch module to tune.
:param tune_dataloader: A `torch.utils.data.Dataset` dataset to use as inputs to module during tuning.
:param test_dataloader: A `torch.utils.data.Dataset` dataset used for QoS testing
(see `test_configs` parameter of `ApproxModeledTuner.tune`).
:param knobs: A set of `TorchApproxKnob` to be considered. Each knob has an `is_applicable()` method
which is used to determine which layer it can apply to.
`.approxes.get_knobs_from_file` returns a set of builtin knobs that will exactly fit here.
:param tensor_to_qos: QoS metric function which computes QoS from the module's output.
`.torchutil.accuracy` computes the classification accuracy which can be applied here.
:param combine_qos: A function to combine each batch's QoS into one value.
When QoS is Classification Accuracy, this will most likely be `numpy.mean`
(which is the default value).
:param target_device: The target device that this application should be tuned on.
:param torch_device: The PyTorch device where the model inference is run on.
This device should be able to run the implementations of the knobs
available for this app on `target_device`.
:param model_storage_folder: A folder to store the serialized QoS models into.
`QoSModelP1` will be serialized into ``model_storage_folder / "p1.pkl"``,
and `QoSModelP2` into ``model_storage_folder / "p2.json"``.
"""
def __init__(
self,
app_name: str,
module: Module,
tune_dataloader: DataLoader,
test_dataloader: DataLoader,
knobs: Set[TorchApproxKnob],
tensor_to_qos: Callable[[torch.Tensor, Any], float],
combine_qos: Callable[[np.ndarray], float] = np.mean,
target_device: str = None,
torch_device: Union[torch.device, str] = _default_device,
model_storage_folder: Optional[PathLike] = None,
) -> None:
self.app_name = app_name
self.module = module
self.tune_loader = tune_dataloader
self.test_loader = test_dataloader
self.name_to_knob = {
k.name: k for k in self._check_and_filter_knob(knobs, target_device)
}
self.tensor_to_qos = tensor_to_qos
self.combine_qos = combine_qos
self.device = torch_device
self.model_storage = (
Path(model_storage_folder) if model_storage_folder else None
)
self.module = self.module.to(torch_device)
self.midx = ModuleIndexer(module)
self._op_costs = {}
op_knobs = {}
self._knob_speedups = {k.name: k.expected_speedup for k in knobs}
modules = self.midx.name_to_module
summary = get_summary(self.module, (self._sample_input(),))
for op_name, op in modules.items():
this_knobs = [
knob for knob in self.name_to_knob.values() if knob.is_applicable(op)
]
assert this_knobs
op_knobs[op_name] = this_knobs
self._op_costs[op_name] = summary.loc[op_name, "flops"]
# Init parent class last
super().__init__(op_knobs, target_device)
@property
def name(self) -> str:
"""Returns the name of application."""
return self.app_name
def get_models(self) -> List[Union[ICostModel, IQoSModel]]:
"""Returns a list of predictive tuning models.
TorchApp in particular derives 1 performance model (LinearCostModel)
and 2 QoS models (QoSModelP1, QoSModelP2) automatically.
"""
def batched_valset_qos(tensor_output: torch.Tensor):
dataset_len = len(self.tune_loader.dataset)
assert len(tensor_output) == dataset_len
begin = 0
qoses = []
for _, target in self.tune_loader:
end = begin + len(target)
target = move_to_device_recursively(target, self.device)
qos = self.tensor_to_qos(tensor_output[begin:end], target)
qoses.append(qos)
begin = end
return self.combine_qos(np.array(qoses))
p1_storage = self.model_storage / "p1.pkl" if self.model_storage else None
p2_storage = self.model_storage / "p2.json" if self.model_storage else None
return [
LinearCostModel(self, self._op_costs, self._knob_speedups),
QoSModelP1(
self, self._get_raw_output_valset, batched_valset_qos, p1_storage
),
QoSModelP2(self, p2_storage),
]
@torch.no_grad()
def empirical_measure_qos_cost(
self, with_approxes: KnobsT, is_test: bool, progress: bool = False
) -> Tuple[float, float]:
"""Measure the QoS and performance of Module with given approximation
empirically (i.e., by running the Module on the dataset)."""
from time import time
from tqdm import tqdm
dataloader = self.test_loader if is_test else self.tune_loader
if progress:
dataloader = tqdm(dataloader)
approxed = self._apply_knobs(with_approxes)
qoses = []
time_begin = time()
for inputs, targets in dataloader:
inputs = move_to_device_recursively(inputs, self.device)
targets = move_to_device_recursively(targets, self.device)
outputs = approxed(inputs)
qoses.append(self.tensor_to_qos(outputs, targets))
time_end = time()
qos = float(self.combine_qos(np.array(qoses)))
return qos, time_end - time_begin
def __repr__(self) -> str:
class_name = self.__class__.__name__
module_class_name = type(self.module).__name__
return (
f'{class_name}"{self.name}"(module={module_class_name}, '
f"num_op={len(self.op_knobs)}, num_knob={len(self.name_to_knob)})"
)
@torch.no_grad()
def _get_raw_output_valset(self, with_approxes: KnobsT):
approxed = self._apply_knobs(with_approxes)
all_outputs = []
for inputs, _ in self.tune_loader:
inputs = move_to_device_recursively(inputs, self.device)
outputs = approxed(inputs)
all_outputs.append(outputs)
return torch.cat(all_outputs, dim=0)
@staticmethod
def _check_and_filter_knob(
knobs: Set[TorchApproxKnob], device: Optional[str]
) -> Set[TorchApproxKnob]:
baseline = ApproxKnob.unique_baseline(knobs)
if baseline not in knobs:
knobs.add(baseline)
if not device:
return knobs
return {knob for knob in knobs if knob.exists_on_device(device)}
def _apply_knobs(self, knobs: KnobsT) -> Module:
import copy
module_indexer = copy.deepcopy(self.midx)
for op_name, knob_name in knobs.items():
knob = self.name_to_knob[knob_name]
module_indexer[op_name] = knob.apply(module_indexer[op_name])
return module_indexer.module
def _sample_input(self):
inputs, _ = next(iter(DataLoader(self.tune_loader.dataset, batch_size=1)))
return inputs.to(self.device)