Skip to content

Exporting video frame to Pytorch tensor

Roman Arzumanyan edited this page May 6, 2020 · 2 revisions

VPF supports on-GPU export between video frames and Pytorch tensors:

    import PyNvCodec as nvc
    import PytorchNvCodec as pnvc

    gpuID = 0

    nvDec = nvc.PyNvDecoder('path_to_video_file', gpuID)
    to_rgb = nvc.PySurfaceConverter(nvDec.Width(), nvDec.Height(), nvc.PixelFormat.NV12, nvc.PixelFormat.RGB, gpuID)
    to_planar = nvc.PySurfaceConverter(nvDec.Width(), nvDec.Height(), nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR, gpuID)

    while True:
        # Obtain NV12 decoded surface from decoder;
        rawSurface = nvDec.DecodeSingleSurface()
        if (rawSurface.Empty()):
            break

        # Convert to RGB interleaved;
        rgb_byte = to_rgb.Execute(rawSurface)

        # Convert to RGB planar because that's what to_tensor + normalize are doing;
        rgb_planar = to_planar.Execute(rgb_byte)

        # Create torch tensor from it and reshape because
        # pnvc.makefromDevicePtrUint8 creates just a chunk of CUDA memory
        # and then copies data from plane pointer to allocated chunk;
        surfPlane = rgb_planar.PlanePtr()
        surface_tensor = pnvc.makefromDevicePtrUint8(surfPlane.GpuMem(), surfPlane.Width(), surfPlane.Height(), surfPlane.Pitch(), surfPlane.ElemSize())
        surface_tensor.resize_(3, target_h, target_w)

        # This is optional and depends on what you NN expects to take as input
        # Normalize to range desired by NN. Originally it's 
        # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        # But we scale that to [0;255] input, so we multiply normalization coefficients
        surface_tensor = surface_tensor.type(dtype=torch.cuda.FloatTensor)
        mean = torch.tensor([123.675, 116.28, 103.53], dtype=torch.float32, device='cuda')
        std = torch.tensor([58.395, 57.12, 65.025], dtype=torch.float32, device='cuda')