Skip to content

Commit

Permalink
UWP MediaCapture: Option for NV-12 encoded images or BGRA images (#274)
Browse files Browse the repository at this point in the history
* refactor: specify MediaEncodingSubtype in the frame reader so that there's no need for software bitmap conversion of the pixel data. This currently has the bool flags for OutputEncodedImage and OutputImage unique, meaning only one of the emitters can output at a time, while prioritizing the EncodedImage first if both bools are true.

* fix: it could either be the encoded image or encoded image camera view that wins as the emitter for nv12.

* fix: build with the code analysis enabled.

* feat: throw an unsupported exception for specifying both encoded and regular image emitters.
  • Loading branch information
austinbhale committed Mar 23, 2023
1 parent 9db656a commit 4b525f1
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 4 deletions.
12 changes: 12 additions & 0 deletions Sources/Imaging/Microsoft.Psi.Imaging/Image.cs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,18 @@ public void CopyFrom(BitmapData bitmapData)
this.UnmanagedBuffer.CopyFrom(bitmapData.Scan0, numBytes);
}

/// <summary>
/// Copies the image contents from a memory pointer.
/// </summary>
/// <param name="source">Memory pointer from which to copy data.</param>
/// <param name="size">The maximum number of bytes to copy.</param>
/// <remarks><para>The method copies data from the memory pointer into the image.
/// The image must be allocated and must have the same size.</para></remarks>
public void CopyFrom(IntPtr source, int size)
{
this.UnmanagedBuffer.CopyFrom(source, size);
}

/// <summary>
/// Copies the image contents from a specified bitmap.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ namespace Microsoft.Psi.MixedReality.MediaCapture
using Windows.Graphics.Imaging;
using Windows.Media.Capture;
using Windows.Media.Capture.Frames;
using Windows.Media.MediaProperties;

/// <summary>
/// Photo/video (PV) camera source component.
Expand Down Expand Up @@ -48,13 +49,32 @@ public PhotoVideoCamera(Pipeline pipeline, PhotoVideoCameraConfiguration configu
this.pipeline = pipeline;
this.configuration = configuration ?? new PhotoVideoCameraConfiguration();

static void ValidateImageOutput(PhotoVideoCameraConfiguration.StreamSettings streamSettings)
{
if (streamSettings is not null
&& (streamSettings.OutputEncodedImage || streamSettings.OutputEncodedImageCameraView)
&& (streamSettings.OutputImage || streamSettings.OutputImageCameraView))
{
throw new NotSupportedException(
"Emitters for outputting both encoded and regular images simultaneously is not supported. " +
"Please specify whether or not the images should be encoded.");
}
}

ValidateImageOutput(this.configuration.PreviewStreamSettings);
ValidateImageOutput(this.configuration.VideoStreamSettings);

this.VideoImage = pipeline.CreateEmitter<Shared<Image>>(this, nameof(this.VideoImage));
this.VideoEncodedImage = pipeline.CreateEmitter<Shared<EncodedImage>>(this, nameof(this.VideoEncodedImage));
this.VideoIntrinsics = pipeline.CreateEmitter<ICameraIntrinsics>(this, nameof(this.VideoIntrinsics));
this.VideoPose = pipeline.CreateEmitter<CoordinateSystem>(this, nameof(this.VideoPose));
this.VideoImageCameraView = pipeline.CreateEmitter<ImageCameraView>(this, nameof(this.VideoImageCameraView));
this.VideoEncodedImageCameraView = pipeline.CreateEmitter<EncodedImageCameraView>(this, nameof(this.VideoEncodedImageCameraView));
this.PreviewImage = pipeline.CreateEmitter<Shared<Image>>(this, nameof(this.PreviewImage));
this.PreviewEncodedImage = pipeline.CreateEmitter<Shared<EncodedImage>>(this, nameof(this.PreviewEncodedImage));
this.PreviewIntrinsics = pipeline.CreateEmitter<ICameraIntrinsics>(this, nameof(this.PreviewIntrinsics));
this.PreviewPose = pipeline.CreateEmitter<CoordinateSystem>(this, nameof(this.PreviewPose));
this.PreviewImageCameraView = pipeline.CreateEmitter<ImageCameraView>(this, nameof(this.PreviewImageCameraView));
this.PreviewEncodedImageCameraView = pipeline.CreateEmitter<EncodedImageCameraView>(this, nameof(this.PreviewEncodedImageCameraView));

// Call this here (rather than in the Start() method, which is executed on the thread pool) to
Expand All @@ -65,6 +85,11 @@ public PhotoVideoCamera(Pipeline pipeline, PhotoVideoCameraConfiguration configu
this.initMediaCaptureTask = this.InitializeMediaCaptureAsync();
}

/// <summary>
/// Gets the BGRA-converted image stream.
/// </summary>
public Emitter<Shared<Image>> VideoImage { get; }

/// <summary>
/// Gets the original video NV12-encoded image stream.
/// </summary>
Expand All @@ -80,11 +105,21 @@ public PhotoVideoCamera(Pipeline pipeline, PhotoVideoCameraConfiguration configu
/// </summary>
public Emitter<ICameraIntrinsics> VideoIntrinsics { get; }

/// <summary>
/// Gets the BGRA-converted image camera view.
/// </summary>
public Emitter<ImageCameraView> VideoImageCameraView { get; }

/// <summary>
/// Gets the original video NV12-encoded image camera view.
/// </summary>
public Emitter<EncodedImageCameraView> VideoEncodedImageCameraView { get; }

/// <summary>
/// Gets the BGRA-converted preview image stream.
/// </summary>
public Emitter<Shared<Image>> PreviewImage { get; }

/// <summary>
/// Gets the original preview NV12-encoded image stream.
/// </summary>
Expand All @@ -100,6 +135,11 @@ public PhotoVideoCamera(Pipeline pipeline, PhotoVideoCameraConfiguration configu
/// </summary>
public Emitter<ICameraIntrinsics> PreviewIntrinsics { get; }

/// <summary>
/// Gets the preview BGRA-converted image camera view.
/// </summary>
public Emitter<ImageCameraView> PreviewImageCameraView { get; }

/// <summary>
/// Gets the original preview NV12-encoded image camera view.
/// </summary>
Expand Down Expand Up @@ -149,9 +189,11 @@ public async void Start(Action<DateTime> notifyCompletionTime)
// (if configured) are then posted on the respective output emitters.
this.videoFrameHandler = this.CreateMediaFrameHandler(
this.configuration.VideoStreamSettings,
this.VideoImage,
this.VideoEncodedImage,
this.VideoIntrinsics,
this.VideoPose,
this.VideoImageCameraView,
this.VideoEncodedImageCameraView);

this.videoFrameReader.FrameArrived += this.videoFrameHandler;
Expand Down Expand Up @@ -179,9 +221,11 @@ public async void Start(Action<DateTime> notifyCompletionTime)
// (if configured) are then posted on the respective output emitters.
this.previewFrameHandler = this.CreateMediaFrameHandler(
this.configuration.PreviewStreamSettings,
this.PreviewImage,
this.PreviewEncodedImage,
this.PreviewIntrinsics,
this.PreviewPose,
this.PreviewImageCameraView,
this.PreviewEncodedImageCameraView);

this.previewFrameReader.FrameArrived += this.previewFrameHandler;
Expand Down Expand Up @@ -266,6 +310,7 @@ private async Task InitializeMediaCaptureAsync()
this.configuration.VideoStreamSettings.ImageWidth,
this.configuration.VideoStreamSettings.ImageHeight,
this.configuration.VideoStreamSettings.FrameRate,
this.configuration.VideoStreamSettings.OutputEncodedImage || this.configuration.VideoStreamSettings.OutputEncodedImageCameraView ? MediaEncodingSubtypes.Nv12 : MediaEncodingSubtypes.Bgra8,
MediaStreamType.VideoRecord);

if (this.videoFrameReader == null)
Expand All @@ -282,6 +327,7 @@ private async Task InitializeMediaCaptureAsync()
this.configuration.PreviewStreamSettings.ImageWidth,
this.configuration.PreviewStreamSettings.ImageHeight,
this.configuration.PreviewStreamSettings.FrameRate,
this.configuration.PreviewStreamSettings.OutputEncodedImage || this.configuration.PreviewStreamSettings.OutputEncodedImageCameraView ? MediaEncodingSubtypes.Nv12 : MediaEncodingSubtypes.Bgra8,
MediaStreamType.VideoPreview);

if (this.previewFrameReader == null)
Expand Down Expand Up @@ -439,9 +485,10 @@ private async Task<MediaCaptureInitializationSettings> CreateMediaCaptureSetting
/// <param name="targetWidth">The requested capture frame width.</param>
/// <param name="targetHeight">The requested capture frame height.</param>
/// <param name="targetFrameRate">The requested capture frame rate.</param>
/// <param name="targetSubtype">The requested media encoding subtype.</param>
/// <param name="targetStreamType">The requested capture stream type.</param>
/// <returns>A task representing the asynchronous operation.</returns>
private async Task<MediaFrameReader> CreateMediaFrameReaderAsync(MediaFrameSourceGroup sourceGroup, int targetWidth, int targetHeight, int targetFrameRate, MediaStreamType targetStreamType)
private async Task<MediaFrameReader> CreateMediaFrameReaderAsync(MediaFrameSourceGroup sourceGroup, int targetWidth, int targetHeight, int targetFrameRate, string targetSubtype, MediaStreamType targetStreamType)
{
// Search all color frame sources of the requested stream type (Video or Preview)
foreach (var sourceInfo in sourceGroup.SourceInfos
Expand All @@ -459,7 +506,7 @@ private async Task<MediaFrameReader> CreateMediaFrameReaderAsync(MediaFrameSourc
{
// Found a frame source for the requested format - create the frame reader
await frameSource.SetFormatAsync(format);
return await this.mediaCapture.CreateFrameReaderAsync(frameSource);
return await this.mediaCapture.CreateFrameReaderAsync(frameSource, targetSubtype);
}
}
}
Expand All @@ -472,16 +519,20 @@ private async Task<MediaFrameReader> CreateMediaFrameReaderAsync(MediaFrameSourc
/// Creates an event handler that handles the FrameArrived event of the MediaFrameReader.
/// </summary>
/// <param name="streamSettings">The stream settings.</param>
/// <param name="imageStream">The stream on which to post the output image.</param>
/// <param name="encodedImageStream">The stream on which to post the output encoded image.</param>
/// <param name="intrinsicsStream">The stream on which to post the camera intrinsics.</param>
/// <param name="poseStream">The stream on which to post the camera pose.</param>
/// <param name="imageCameraViewStream">The stream on which to post the image camera view.</param>
/// <param name="encodedImageCameraViewStream">The stream on which to post the encoded image camera view.</param>
/// <returns>The event handler.</returns>
private TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> CreateMediaFrameHandler(
PhotoVideoCameraConfiguration.StreamSettings streamSettings,
Emitter<Shared<Image>> imageStream,
Emitter<Shared<EncodedImage>> encodedImageStream,
Emitter<ICameraIntrinsics> intrinsicsStream,
Emitter<CoordinateSystem> poseStream,
Emitter<ImageCameraView> imageCameraViewStream,
Emitter<EncodedImageCameraView> encodedImageCameraViewStream)
{
return (sender, args) =>
Expand All @@ -495,7 +546,7 @@ private async Task<MediaFrameReader> CreateMediaFrameReaderAsync(MediaFrameSourc
// Compute the camera intrinsics if needed
var cameraIntrinsics = default(ICameraIntrinsics);
if (streamSettings.OutputCameraIntrinsics || streamSettings.OutputEncodedImageCameraView)
if (streamSettings.OutputCameraIntrinsics || streamSettings.OutputImageCameraView || streamSettings.OutputEncodedImageCameraView)
{
cameraIntrinsics = this.GetCameraIntrinsics(frame);
}
Expand All @@ -508,7 +559,7 @@ private async Task<MediaFrameReader> CreateMediaFrameReaderAsync(MediaFrameSourc
// Compute the camera pose if needed
var cameraPose = default(CoordinateSystem);
if (streamSettings.OutputPose || streamSettings.OutputEncodedImageCameraView)
if (streamSettings.OutputPose || streamSettings.OutputImageCameraView || streamSettings.OutputEncodedImageCameraView)
{
// Convert the frame coordinate system to world pose in psi basis
cameraPose = frame.CoordinateSystem?.TryConvertSpatialCoordinateSystemToPsiCoordinateSystem();
Expand Down Expand Up @@ -559,6 +610,39 @@ private async Task<MediaFrameReader> CreateMediaFrameReaderAsync(MediaFrameSourc
encodedImageCameraViewStream.Post(encodedImageCameraView, originatingTime);
}
}
if (streamSettings.OutputImage || streamSettings.OutputImageCameraView)
{
// Accessing the VideoMediaFrame.SoftwareBitmap property creates a strong reference
// which needs to be Disposed, per the remarks here:
// https://docs.microsoft.com/en-us/uwp/api/windows.media.capture.frames.mediaframereference?view=winrt-19041#remarks
using var frameBitmap = frame.VideoMediaFrame.SoftwareBitmap;
using var sharedImage = ImagePool.GetOrCreate(frameBitmap.PixelWidth, frameBitmap.PixelHeight, PixelFormat.BGRA_32bpp);
// Copy bitmap data into the shared image
unsafe
{
using var input = frameBitmap.LockBuffer(BitmapBufferAccessMode.Read);
using var inputReference = input.CreateReference();
((UnsafeNative.IMemoryBufferByteAccess)inputReference).GetBuffer(out byte* imageData, out uint size);
// Copy BGRA bytes directly
sharedImage.Resource.CopyFrom((IntPtr)imageData, (int)size);
}
// Post image stream
if (streamSettings.OutputImage)
{
imageStream.Post(sharedImage, originatingTime);
}
// Post the image camera view stream if requested
if (streamSettings.OutputImageCameraView)
{
using var imageCameraView = new ImageCameraView(sharedImage, cameraIntrinsics, cameraPose);
imageCameraViewStream.Post(imageCameraView, originatingTime);
}
}
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ public StreamSettings()
/// </summary>
public int ImageHeight { get; set; } = 720;

/// <summary>
/// Gets or sets a value indicating whether the BGRA-converted image is emitted.
/// </summary>
public bool OutputImage { get; set; } = false;

/// <summary>
/// Gets or sets a value indicating whether the original NV12-encoded image is emitted.
/// </summary>
Expand All @@ -116,6 +121,11 @@ public StreamSettings()
/// </summary>
public bool OutputPose { get; set; } = true;

/// <summary>
/// Gets or sets a value indicating whether the BGRA-converted image camera view is emitted.
/// </summary>
public bool OutputImageCameraView { get; set; } = false;

/// <summary>
/// Gets or sets a value indicating whether the original NV12-encoded camera view is emitted.
/// </summary>
Expand Down

0 comments on commit 4b525f1

Please sign in to comment.