@@ -1734,6 +1734,31 @@ func (v *VGPUDevices) validate() error {
17341734}
17351735
17361736func (v * VGPUDevices ) runValidation () error {
1737+ nvpci := nvpci .New ()
1738+ GPUDevices , err := nvpci .GetGPUs ()
1739+ if err != nil {
1740+ return fmt .Errorf ("error checking for GPU devices on the host: %w" , err )
1741+ }
1742+
1743+ mdevBusPath := "/sys/class/mdev_bus"
1744+ entries , err := os .ReadDir (mdevBusPath )
1745+ if err != nil {
1746+ return fmt .Errorf ("unable to read mdev_bus directory: %v" , err )
1747+ }
1748+
1749+ if len (entries ) == 0 {
1750+ for _ , device := range GPUDevices {
1751+ if device .SriovInfo .PhysicalFunction == nil {
1752+ continue
1753+ }
1754+ totalVF := int (device .SriovInfo .PhysicalFunction .TotalVFs )
1755+ if totalVF > 0 {
1756+ log .Infof ("Found GPU device with SR-IOV VFs: %s (TotalVFs: %d)" , device .Address , totalVF )
1757+ return nil
1758+ }
1759+ }
1760+ }
1761+
17371762 nvmdev := nvmdev .New ()
17381763 vGPUDevices , err := nvmdev .GetAllDevices ()
17391764 if err != nil {
@@ -1746,14 +1771,14 @@ func (v *VGPUDevices) runValidation() error {
17461771 return fmt .Errorf ("no vGPU devices found" )
17471772 }
17481773
1749- log .Infof ("Found %d vGPU devices" , numDevices )
1774+ log .Infof ("Found %d MDEV vGPU devices" , numDevices )
17501775 return nil
17511776 }
17521777
17531778 for {
17541779 numDevices := len (vGPUDevices )
17551780 if numDevices > 0 {
1756- log .Infof ("Found %d vGPU devices" , numDevices )
1781+ log .Infof ("Found %d MDEV vGPU devices" , numDevices )
17571782 return nil
17581783 }
17591784 log .Infof ("No vGPU devices found, retrying after %d seconds" , sleepIntervalSecondsFlag )
0 commit comments