Quantization Backend Configuration¶
FX Graph Mode Quantization allows the user to configure various quantization behaviors of an op in order to match the expectation of their backend.
In the future, this document will contain a detailed spec of these configurations.
Default values for native configurations¶
Below is the output of the configuration for quantization of ops in fbgemm and qnnpack (PyTorch’s default quantized backends).
Results:
{
'pattern': <class 'torch.nn.modules.pooling.AdaptiveAvgPool1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method adaptive_avg_pool1d of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function adaptive_avg_pool2d at 0x7f8679535e60>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AdaptiveAvgPool3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function adaptive_avg_pool3d at 0x7f8679535ef0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in function add>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in function add>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<built-in method relu of type object at 0x7f870fa24980>, <built-in function add>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <built-in function add>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method add of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in method add of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<built-in method relu of type object at 0x7f870fa24980>, <built-in method add of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <built-in method add of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AvgPool1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method avg_pool1d of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AvgPool2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function avg_pool2d>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.AvgPool3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function avg_pool3d>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.batchnorm.BatchNorm2d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.BNReLU2d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b5f0>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <class 'torch.nn.modules.batchnorm.BatchNorm2d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.BNReLU2d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b680>,
},
{
'pattern': <class 'torch.nn.modules.batchnorm.BatchNorm2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.batchnorm.BatchNorm3d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.BNReLU3d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b710>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <class 'torch.nn.modules.batchnorm.BatchNorm3d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.BNReLU3d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b7a0>,
},
{
'pattern': <class 'torch.nn.modules.batchnorm.BatchNorm3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.BNReLU2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.BNReLU3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <built-in method cat of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method clamp of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': clamp,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': contiguous,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.conv.Conv1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'qat_module': <class 'torch.ao.nn.qat.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.ao.nn.qat.modules.conv.Conv1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv1d'>,
},
{
'pattern': <built-in method conv1d of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'input_type_to_index': {'weight': 1, 'bias': 2},
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.conv.Conv1d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU1d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8716fa6d40>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <class 'torch.nn.modules.conv.Conv1d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU1d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f8f440>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method conv1d of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in method conv1d of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.Conv1d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBn1d'>,
'fuser_method': <function reverse2.<locals>.reversed at 0x7f8677f8fb00>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.Conv1d'>)),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU1d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7f8677f8fb90>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.Conv1d'>)),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU1d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7f8677f8fc20>,
},
{
'pattern': <class 'torch.nn.modules.conv.Conv2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'qat_module': <class 'torch.ao.nn.qat.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.ao.nn.qat.modules.conv.Conv2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv2d'>,
},
{
'pattern': <built-in method conv2d of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'input_type_to_index': {'weight': 1, 'bias': 2},
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.conv.Conv2d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU2d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f8fd40>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <class 'torch.nn.modules.conv.Conv2d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU2d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f8fdd0>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method conv2d of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in method conv2d of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.Conv2d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBn2d'>,
'fuser_method': <function reverse2.<locals>.reversed at 0x7f8677f8fe60>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.Conv2d'>)),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU2d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7f8677f8fef0>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.Conv2d'>)),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU2d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7f8677f8ff80>,
},
{
'pattern': <class 'torch.nn.modules.conv.Conv3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'qat_module': <class 'torch.ao.nn.qat.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.ao.nn.qat.modules.conv.Conv3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv3d'>,
},
{
'pattern': <built-in method conv3d of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'input_type_to_index': {'weight': 1, 'bias': 2},
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.conv.Conv3d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU3d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b0e0>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <class 'torch.nn.modules.conv.Conv3d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU3d'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b170>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method conv3d of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in method conv3d of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.Conv3d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBn3d'>,
'fuser_method': <function reverse2.<locals>.reversed at 0x7f8677f9b200>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.Conv3d'>)),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU3d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7f8677f9b290>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.Conv3d'>)),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU3d'>,
'fuser_method': <function reverse3.<locals>.reversed at 0x7f8677f9b320>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBn1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBn2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBn3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBn3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvBnReLU3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvBnReLU3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU1d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv1d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU2d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv2d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv2d'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.ConvReLU3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU3d'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.conv_fused.ConvReLU3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.Conv3d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.Conv3d'>,
},
{
'pattern': <class 'torch.nn.modules.conv.ConvTranspose1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.ConvTranspose1d'>,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.conv.ConvTranspose1d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.ConvTranspose1d'>,
'fuser_method': <function reverse2.<locals>.reversed at 0x7f8677f8fcb0>,
},
{
'pattern': <class 'torch.nn.modules.conv.ConvTranspose2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose2d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.ConvTranspose2d'>,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm2d'>, <class 'torch.nn.modules.conv.ConvTranspose2d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose2d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.ConvTranspose2d'>,
'fuser_method': <function reverse2.<locals>.reversed at 0x7f8677f9b050>,
},
{
'pattern': <class 'torch.nn.modules.conv.ConvTranspose3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose3d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.ConvTranspose3d'>,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm3d'>, <class 'torch.nn.modules.conv.ConvTranspose3d'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.conv.ConvTranspose3d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.conv.ConvTranspose3d'>,
'fuser_method': <function reverse2.<locals>.reversed at 0x7f8677f9b3b0>,
},
{
'pattern': detach,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': detach_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.dropout.Dropout'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function dropout at 0x7f8679535f80>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.ELU'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function elu at 0x7f867953a5f0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.sparse.Embedding'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint4x2, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.sparse.Embedding'>,
'qat_module': <class 'torch.ao.nn.qat.modules.embedding_ops.Embedding'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.sparse.Embedding'>,
'input_output_observed': False,
},
{
'pattern': <class 'torch.ao.nn.qat.modules.embedding_ops.Embedding'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint4x2, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.sparse.Embedding'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.sparse.Embedding'>,
'input_output_observed': False,
},
{
'pattern': <class 'torch.nn.modules.sparse.EmbeddingBag'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint4x2, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.sparse.EmbeddingBag'>,
'qat_module': <class 'torch.ao.nn.qat.modules.embedding_ops.EmbeddingBag'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.sparse.EmbeddingBag'>,
'input_output_observed': False,
},
{
'pattern': <class 'torch.ao.nn.qat.modules.embedding_ops.EmbeddingBag'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.quint4x2, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.sparse.EmbeddingBag'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.sparse.EmbeddingBag'>,
'input_output_observed': False,
},
{
'pattern': <built-in method flatten of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function floordiv>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function group_norm at 0x7f867953d4d0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'input_type_to_index': {'weight': 2, 'bias': 3},
},
{
'pattern': <class 'torch.nn.modules.rnn.GRUCell'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.GRUCell'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.rnn.GRUCell'>,
},
{
'pattern': <class 'torch.nn.modules.activation.Hardsigmoid'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <function hardsigmoid at 0x7f867953add0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': hardsigmoid,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': hardsigmoid_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <class 'torch.nn.modules.activation.Hardswish'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function hardswish at 0x7f867953af80>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Hardtanh'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function hardtanh at 0x7f867953a4d0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in function hardtanh_>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.linear.Identity'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function instance_norm at 0x7f867953d3b0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'input_type_to_index': {'weight': 3, 'bias': 4},
},
{
'pattern': <class 'torch.nn.modules.instancenorm.InstanceNorm1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.instancenorm.InstanceNorm2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.instancenorm.InstanceNorm3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function interpolate at 0x7f86794b53b0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.normalization.LayerNorm'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function layer_norm at 0x7f867953d440>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'input_type_to_index': {'weight': 2, 'bias': 3},
},
{
'pattern': <class 'torch.nn.modules.activation.LeakyReLU'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <function leaky_relu at 0x7f867953a7a0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.linear.Linear'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'qat_module': <class 'torch.ao.nn.qat.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.ao.nn.qat.modules.linear.Linear'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.linear.Linear'>,
},
{
'pattern': <built-in function linear>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'input_type_to_index': {'weight': 1, 'bias': 2},
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.linear.Linear'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.LinearReLU'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b440>,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <class 'torch.nn.modules.linear.Linear'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.LinearReLU'>,
'fuser_method': <function reverse_sequential_wrapper2.<locals>.fuser_method at 0x7f8677f9b4d0>,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in function linear>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in function linear>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.batchnorm.BatchNorm1d'>, <class 'torch.nn.modules.linear.Linear'>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'fused_module': <class 'torch.ao.nn.intrinsic.modules.fused.LinearBn1d'>,
'fuser_method': <function reverse2.<locals>.reversed at 0x7f8677f9b560>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.LinearBn1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.linear_fused.LinearBn1d'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.linear_fused.LinearBn1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.ao.nn.intrinsic.modules.fused.LinearReLU'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'qat_module': <class 'torch.nn.intrinsic.qat.modules.linear_relu.LinearReLU'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.nn.intrinsic.qat.modules.linear_relu.LinearReLU'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.linear.Linear'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.linear.Linear'>,
},
{
'pattern': <class 'torch.nn.modules.rnn.LSTM'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.LSTM'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.rnn.LSTM'>,
},
{
'pattern': <class 'torch.nn.modules.rnn.LSTMCell'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.LSTMCell'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.rnn.LSTMCell'>,
},
{
'pattern': <built-in method matmul of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.MaxPool1d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': torch.nn.functional.max_pool1d,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.MaxPool2d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': torch.nn.functional.max_pool2d,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.pooling.MaxPool3d'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': torch.nn.functional.max_pool3d,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method mean of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': mean,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in function mul>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in function mul>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<built-in method relu of type object at 0x7f870fa24980>, <built-in function mul>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <built-in function mul>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<class 'torch.nn.modules.activation.ReLU'>, <built-in method mul of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<function relu at 0x7f867953a3b0>, <built-in method mul of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': (<built-in method relu of type object at 0x7f870fa24980>, <built-in method mul of type object at 0x7f870fa24980>),
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <built-in method mul of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'num_tensor_args_to_observation_type': {
0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': permute,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.PReLU'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.ReLU'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function relu at 0x7f867953a3b0>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': relu,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': relu_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.ReLU6'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <function relu6 at 0x7f867953a560>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': repeat,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method repeat_interleave of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': repeat_interleave,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': reshape,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': resize_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.rnn.RNNCell'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
{
'input_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.float32, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.float16, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
'is_dynamic': True,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'root_module': <class 'torch.nn.modules.rnn.RNNCell'>,
'reference_quantized_module_for_root': <class 'torch.ao.nn.quantized.reference.modules.rnn.RNNCell'>,
},
{
'pattern': shape,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Sigmoid'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method sigmoid of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': sigmoid,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': sigmoid_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': size,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Softmax'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.00390625, zero_point=0, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method squeeze of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': squeeze,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': squeeze_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method stack of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <class 'torch.nn.modules.activation.Tanh'>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method tanh of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': tanh,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': tanh_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'weight_dtype': DTypeWithConstraints(dtype=torch.qint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'bias_dtype': torch.float32,
},
],
'observation_type': ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
'overwrite_output_fake_quantize': functools.partial(<class 'torch.ao.quantization.fake_quantize.FixedQParamsFakeQuantize'>, observer=functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){}){},
'overwrite_output_observer': functools.partial(<class 'torch.ao.quantization.observer.FixedQParamsObserver'>, scale=0.0078125, zero_point=128, dtype=torch.quint8, quant_min=0, quant_max=255){},
},
{
'pattern': <built-in method transpose of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': transpose,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': <built-in method unsqueeze of type object at 0x7f870fa24980>,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': unsqueeze,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': unsqueeze_,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
},
{
'pattern': view,
'dtype_configs': [
{
'input_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
'output_dtype': DTypeWithConstraints(dtype=torch.quint8, quant_min_lower_bound=None, quant_max_upper_bound=None, scale_min_lower_bound=None, scale_max_upper_bound=None),
},
],
'observation_type': ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
}