13
13
# See the License for the specific language governing permissions and
14
14
# limitations under the License.
15
15
import gc
16
- import unittest
16
+ import inspect
17
17
18
18
import torch
19
19
23
23
24
24
@require_torch_gpu
25
25
@slow
26
- class QuantCompileTests ( unittest . TestCase ) :
26
+ class QuantCompileTests :
27
27
@property
28
28
def quantization_config (self ):
29
29
raise NotImplementedError (
@@ -50,30 +50,26 @@ def _init_pipeline(self, quantization_config, torch_dtype):
50
50
)
51
51
return pipe
52
52
53
- def _test_torch_compile (self , quantization_config , torch_dtype = torch .bfloat16 ):
54
- pipe = self ._init_pipeline (quantization_config , torch_dtype ).to ("cuda" )
55
- # import to ensure fullgraph True
53
+ def _test_torch_compile (self , torch_dtype = torch .bfloat16 ):
54
+ pipe = self ._init_pipeline (self . quantization_config , torch_dtype ).to ("cuda" )
55
+ # `fullgraph=True` ensures no graph breaks
56
56
pipe .transformer .compile (fullgraph = True )
57
57
58
- for _ in range (2 ):
59
- # small resolutions to ensure speedy execution.
60
- pipe ("a dog" , num_inference_steps = 3 , max_sequence_length = 16 , height = 256 , width = 256 )
58
+ # small resolutions to ensure speedy execution.
59
+ pipe ("a dog" , num_inference_steps = 2 , max_sequence_length = 16 , height = 256 , width = 256 )
61
60
62
- def _test_torch_compile_with_cpu_offload (self , quantization_config , torch_dtype = torch .bfloat16 ):
63
- pipe = self ._init_pipeline (quantization_config , torch_dtype )
61
+ def _test_torch_compile_with_cpu_offload (self , torch_dtype = torch .bfloat16 ):
62
+ pipe = self ._init_pipeline (self . quantization_config , torch_dtype )
64
63
pipe .enable_model_cpu_offload ()
65
64
pipe .transformer .compile ()
66
65
67
- for _ in range (2 ):
68
- # small resolutions to ensure speedy execution.
69
- pipe ("a dog" , num_inference_steps = 3 , max_sequence_length = 16 , height = 256 , width = 256 )
66
+ # small resolutions to ensure speedy execution.
67
+ pipe ("a dog" , num_inference_steps = 2 , max_sequence_length = 16 , height = 256 , width = 256 )
70
68
71
- def _test_torch_compile_with_group_offload_leaf (
72
- self , quantization_config , torch_dtype = torch .bfloat16 , * , use_stream : bool = False
73
- ):
74
- torch ._dynamo .config .cache_size_limit = 10000
69
+ def _test_torch_compile_with_group_offload_leaf (self , torch_dtype = torch .bfloat16 , * , use_stream : bool = False ):
70
+ torch ._dynamo .config .cache_size_limit = 1000
75
71
76
- pipe = self ._init_pipeline (quantization_config , torch_dtype )
72
+ pipe = self ._init_pipeline (self . quantization_config , torch_dtype )
77
73
group_offload_kwargs = {
78
74
"onload_device" : torch .device ("cuda" ),
79
75
"offload_device" : torch .device ("cpu" ),
@@ -87,6 +83,17 @@ def _test_torch_compile_with_group_offload_leaf(
87
83
if torch .device (component .device ).type == "cpu" :
88
84
component .to ("cuda" )
89
85
90
- for _ in range (2 ):
91
- # small resolutions to ensure speedy execution.
92
- pipe ("a dog" , num_inference_steps = 3 , max_sequence_length = 16 , height = 256 , width = 256 )
86
+ # small resolutions to ensure speedy execution.
87
+ pipe ("a dog" , num_inference_steps = 2 , max_sequence_length = 16 , height = 256 , width = 256 )
88
+
89
+ def test_torch_compile (self ):
90
+ self ._test_torch_compile ()
91
+
92
+ def test_torch_compile_with_cpu_offload (self ):
93
+ self ._test_torch_compile_with_cpu_offload ()
94
+
95
+ def test_torch_compile_with_group_offload_leaf (self , use_stream = False ):
96
+ for cls in inspect .getmro (self .__class__ ):
97
+ if "test_torch_compile_with_group_offload_leaf" in cls .__dict__ and cls is not QuantCompileTests :
98
+ return
99
+ self ._test_torch_compile_with_group_offload_leaf (use_stream = use_stream )
0 commit comments