File tree Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Expand file tree Collapse file tree 1 file changed +8
-1
lines changed Original file line number Diff line number Diff line change @@ -15,7 +15,14 @@ def cuda_version_and_device_count() -> Tuple[str, int]:
15
15
except Exception as e :
16
16
_logger .warning ("Error checking CUDA version with nvidia-smi: %s" , e )
17
17
return ("" , 0 )
18
- dm = xml .dom .minidom .parseString (out ) # nosec
18
+
19
+ # Apparently nvidia-smi is not safe to call concurrently.
20
+ # With --parallel, sometimes the returned XML will contain
21
+ # <process_name>\xff...\xff</process_name>
22
+ # and xml.dom.minidom.parseString will raise
23
+ # "xml.parsers.expat.ExpatError: not well-formed (invalid token)"
24
+ out_no_xff = out .replace (b'\xff ' , b'' )
25
+ dm = xml .dom .minidom .parseString (out_no_xff ) # nosec
19
26
20
27
ag = dm .getElementsByTagName ("attached_gpus" )
21
28
if len (ag ) < 1 or ag [0 ].firstChild is None :
You can’t perform that action at this time.
0 commit comments