diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d1d0909cac009..fc4f2beeea01e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -145,6 +145,31 @@ jobs:
       - name: Check wasm
         run: cargo check --target wasm32-unknown-unknown
 
+  build-wasm-atomics:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    needs: build
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            target/
+          key: ubuntu-assets-cargo-build-wasm-nightly-${{ hashFiles('**/Cargo.toml') }}
+      - uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: ${{ env.NIGHTLY_TOOLCHAIN }}
+          targets: wasm32-unknown-unknown
+          components: rust-src
+      - name: Check wasm
+        run: cargo check --target wasm32-unknown-unknown -Z build-std=std,panic_abort
+        env:
+          RUSTFLAGS: "-C target-feature=+atomics,+bulk-memory"
+
   markdownlint:
     runs-on: ubuntu-latest
     timeout-minutes: 30
diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml
index 2b1783e9114e1..1ab049a05bef1 100644
--- a/crates/bevy_render/Cargo.toml
+++ b/crates/bevy_render/Cargo.toml
@@ -64,7 +64,9 @@ image = { version = "0.24", default-features = false }
 # misc
 codespan-reporting = "0.11.0"
 # `fragile-send-sync-non-atomic-wasm` feature means we can't use WASM threads for rendering
-# It is enabled for now to avoid having to do a significant overhaul of the renderer just for wasm
+# It is enabled for now to avoid having to do a significant overhaul of the renderer just for wasm.
+# When the 'atomics' feature is enabled `fragile-send-sync-non-atomic` does nothing
+# and Bevy instead wraps `wgpu` types to verify they are not used off their origin thread.
 wgpu = { version = "0.19.3", default-features = false, features = [
   "wgsl",
   "dx12",
@@ -118,6 +120,9 @@ web-sys = { version = "0.3.67", features = [
 ] }
 wasm-bindgen = "0.2"
 
+[target.'cfg(all(target_arch = "wasm32", target_feature = "atomics"))'.dependencies]
+send_wrapper = "0.6.0"
+
 [lints]
 workspace = true
 
diff --git a/crates/bevy_render/src/lib.rs b/crates/bevy_render/src/lib.rs
index 290b2cabcea73..02d0be4889eda 100644
--- a/crates/bevy_render/src/lib.rs
+++ b/crates/bevy_render/src/lib.rs
@@ -58,6 +58,7 @@ use globals::GlobalsPlugin;
 use renderer::{RenderAdapter, RenderAdapterInfo, RenderDevice, RenderQueue};
 
 use crate::deterministic::DeterministicRenderingConfig;
+use crate::renderer::WgpuWrapper;
 use crate::{
     camera::CameraPlugin,
     mesh::{morph::MorphPlugin, Mesh, MeshPlugin},
@@ -301,7 +302,7 @@ impl Plugin for RenderPlugin {
                             queue,
                             adapter_info,
                             render_adapter,
-                            RenderInstance(Arc::new(instance)),
+                            RenderInstance(Arc::new(WgpuWrapper::new(instance))),
                         ));
                     };
                     // In wasm, spawn a task and detach it for execution
diff --git a/crates/bevy_render/src/render_resource/resource_macros.rs b/crates/bevy_render/src/render_resource/resource_macros.rs
index de2ea0ec00e58..c027a92e2873f 100644
--- a/crates/bevy_render/src/render_resource/resource_macros.rs
+++ b/crates/bevy_render/src/render_resource/resource_macros.rs
@@ -9,16 +9,25 @@
 #[macro_export]
 macro_rules! render_resource_wrapper {
     ($wrapper_type:ident, $wgpu_type:ty) => {
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
         #[derive(Debug)]
         // SAFETY: while self is live, self.0 comes from `into_raw` of an Arc<$wgpu_type> with a strong ref.
         pub struct $wrapper_type(*const ());
 
+        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+        #[derive(Debug)]
+        pub struct $wrapper_type(send_wrapper::SendWrapper<*const ()>);
+
         impl $wrapper_type {
             pub fn new(value: $wgpu_type) -> Self {
                 let arc = std::sync::Arc::new(value);
                 let value_ptr = std::sync::Arc::into_raw(arc);
                 let unit_ptr = value_ptr.cast::<()>();
-                Self(unit_ptr)
+
+                #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
+                return Self(unit_ptr);
+                #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+                return Self(send_wrapper::SendWrapper::new(unit_ptr));
             }
 
             pub fn try_unwrap(self) -> Option<$wgpu_type> {
@@ -53,13 +62,16 @@ macro_rules! render_resource_wrapper {
             }
         }
 
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
         // SAFETY: We manually implement Send and Sync, which is valid for Arc<T> when T: Send + Sync.
         // We ensure correctness by checking that $wgpu_type does implement Send and Sync.
         // If in future there is a case where a wrapper is required for a non-send/sync type
         // we can implement a macro variant that omits these manual Send + Sync impls
         unsafe impl Send for $wrapper_type {}
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
         // SAFETY: As explained above, we ensure correctness by checking that $wgpu_type implements Send and Sync.
         unsafe impl Sync for $wrapper_type {}
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
         const _: () = {
             trait AssertSendSyncBound: Send + Sync {}
             impl AssertSendSyncBound for $wgpu_type {}
@@ -75,7 +87,14 @@ macro_rules! render_resource_wrapper {
                 std::mem::forget(arc);
                 let cloned_value_ptr = std::sync::Arc::into_raw(cloned);
                 let cloned_unit_ptr = cloned_value_ptr.cast::<()>();
-                Self(cloned_unit_ptr)
+
+                #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
+                return Self(cloned_unit_ptr);
+
+                // Note: this implementation means that this Clone will panic
+                // when called off the wgpu thread.
+                #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+                return Self(send_wrapper::SendWrapper::new(cloned_unit_ptr));
             }
         }
     };
@@ -85,16 +104,28 @@ macro_rules! render_resource_wrapper {
 #[macro_export]
 macro_rules! render_resource_wrapper {
     ($wrapper_type:ident, $wgpu_type:ty) => {
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
         #[derive(Clone, Debug)]
         pub struct $wrapper_type(std::sync::Arc<$wgpu_type>);
+        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+        #[derive(Clone, Debug)]
+        pub struct $wrapper_type(std::sync::Arc<send_wrapper::SendWrapper<$wgpu_type>>);
 
         impl $wrapper_type {
             pub fn new(value: $wgpu_type) -> Self {
-                Self(std::sync::Arc::new(value))
+                #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
+                return Self(std::sync::Arc::new(value));
+
+                #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+                return Self(std::sync::Arc::new(send_wrapper::SendWrapper::new(value)));
             }
 
             pub fn try_unwrap(self) -> Option<$wgpu_type> {
-                std::sync::Arc::try_unwrap(self.0).ok()
+                #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
+                return std::sync::Arc::try_unwrap(self.0).ok();
+
+                #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+                return std::sync::Arc::try_unwrap(self.0).ok().map(|p| p.take());
             }
         }
 
@@ -106,6 +137,7 @@ macro_rules! render_resource_wrapper {
             }
         }
 
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
         const _: () = {
             trait AssertSendSyncBound: Send + Sync {}
             impl AssertSendSyncBound for $wgpu_type {}
diff --git a/crates/bevy_render/src/renderer/mod.rs b/crates/bevy_render/src/renderer/mod.rs
index 92eada8f238d4..3f1620ae876fe 100644
--- a/crates/bevy_render/src/renderer/mod.rs
+++ b/crates/bevy_render/src/renderer/mod.rs
@@ -117,23 +117,54 @@ pub fn render_system(world: &mut World, state: &mut SystemState<Query<Entity, Wi
     }
 }
 
+/// A wrapper to safely make `wgpu` types Send / Sync on web with atomics enabled.
+/// On web with `atomics` enabled the inner value can only be accessed
+/// or dropped on the `wgpu` thread or else a panic will occur.
+/// On other platforms the wrapper simply contains the wrapped value.
+#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
+#[derive(Debug, Clone, Deref, DerefMut)]
+pub struct WgpuWrapper<T>(T);
+#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+#[derive(Debug, Clone, Deref, DerefMut)]
+pub struct WgpuWrapper<T>(send_wrapper::SendWrapper<T>);
+
+// SAFETY: SendWrapper is always Send + Sync.
+#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+unsafe impl<T> Send for WgpuWrapper<T> {}
+#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+unsafe impl<T> Sync for WgpuWrapper<T> {}
+
+#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
+impl<T> WgpuWrapper<T> {
+    pub fn new(t: T) -> Self {
+        Self(t)
+    }
+}
+
+#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+impl<T> WgpuWrapper<T> {
+    pub fn new(t: T) -> Self {
+        Self(send_wrapper::SendWrapper::new(t))
+    }
+}
+
 /// This queue is used to enqueue tasks for the GPU to execute asynchronously.
 #[derive(Resource, Clone, Deref, DerefMut)]
-pub struct RenderQueue(pub Arc<Queue>);
+pub struct RenderQueue(pub Arc<WgpuWrapper<Queue>>);
 
 /// The handle to the physical device being used for rendering.
 /// See [`Adapter`] for more info.
 #[derive(Resource, Clone, Debug, Deref, DerefMut)]
-pub struct RenderAdapter(pub Arc<Adapter>);
+pub struct RenderAdapter(pub Arc<WgpuWrapper<Adapter>>);
 
 /// The GPU instance is used to initialize the [`RenderQueue`] and [`RenderDevice`],
 /// as well as to create [`WindowSurfaces`](crate::view::window::WindowSurfaces).
 #[derive(Resource, Clone, Deref, DerefMut)]
-pub struct RenderInstance(pub Arc<Instance>);
+pub struct RenderInstance(pub Arc<WgpuWrapper<Instance>>);
 
 /// The [`AdapterInfo`] of the adapter in use by the renderer.
 #[derive(Resource, Clone, Deref, DerefMut)]
-pub struct RenderAdapterInfo(pub AdapterInfo);
+pub struct RenderAdapterInfo(pub WgpuWrapper<AdapterInfo>);
 
 const GPU_NOT_FOUND_ERROR_MESSAGE: &str = if cfg!(target_os = "linux") {
     "Unable to find a GPU! Make sure you have installed required drivers! For extra information, see: https://github.com/bevyengine/bevy/blob/latest/docs/linux_dependencies.md"
@@ -300,12 +331,12 @@ pub async fn initialize_renderer(
         )
         .await
         .unwrap();
-    let queue = Arc::new(queue);
-    let adapter = Arc::new(adapter);
+    let queue = Arc::new(WgpuWrapper::new(queue));
+    let adapter = Arc::new(WgpuWrapper::new(adapter));
     (
         RenderDevice::from(device),
         RenderQueue(queue),
-        RenderAdapterInfo(adapter_info),
+        RenderAdapterInfo(WgpuWrapper::new(adapter_info)),
         RenderAdapter(adapter),
     )
 }
@@ -403,7 +434,10 @@ impl<'w> RenderContext<'w> {
     /// buffer.
     pub fn add_command_buffer_generation_task(
         &mut self,
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
         task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w + Send,
+        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+        task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w,
     ) {
         self.flush_encoder();
 
@@ -425,28 +459,46 @@ impl<'w> RenderContext<'w> {
         self.flush_encoder();
 
         let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len());
-        let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
-            for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
-                match queued_command_buffer {
-                    QueuedCommandBuffer::Ready(command_buffer) => {
-                        command_buffers.push((i, command_buffer));
-                    }
-                    QueuedCommandBuffer::Task(command_buffer_generation_task) => {
-                        let render_device = self.render_device.clone();
-                        if self.force_serial {
-                            command_buffers
-                                .push((i, command_buffer_generation_task(render_device)));
-                        } else {
-                            task_pool.spawn(async move {
-                                (i, command_buffer_generation_task(render_device))
-                            });
+
+        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
+        {
+            let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
+                for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate()
+                {
+                    match queued_command_buffer {
+                        QueuedCommandBuffer::Ready(command_buffer) => {
+                            command_buffers.push((i, command_buffer));
+                        }
+                        QueuedCommandBuffer::Task(command_buffer_generation_task) => {
+                            let render_device = self.render_device.clone();
+                            if self.force_serial {
+                                command_buffers
+                                    .push((i, command_buffer_generation_task(render_device)));
+                            } else {
+                                task_pool.spawn(async move {
+                                    (i, command_buffer_generation_task(render_device))
+                                });
+                            }
                         }
                     }
                 }
+            });
+            command_buffers.append(&mut task_based_command_buffers);
+        }
+
+        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+        for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
+            match queued_command_buffer {
+                QueuedCommandBuffer::Ready(command_buffer) => {
+                    command_buffers.push((i, command_buffer));
+                }
+                QueuedCommandBuffer::Task(command_buffer_generation_task) => {
+                    let render_device = self.render_device.clone();
+                    command_buffers.push((i, command_buffer_generation_task(render_device)));
+                }
             }
-        });
+        }
 
-        command_buffers.append(&mut task_based_command_buffers);
         command_buffers.sort_unstable_by_key(|(i, _)| *i);
 
         let mut command_buffers = command_buffers
@@ -481,5 +533,8 @@ impl<'w> RenderContext<'w> {
 
 enum QueuedCommandBuffer<'w> {
     Ready(CommandBuffer),
+    #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
     Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w + Send>),
+    #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
+    Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w>),
 }
diff --git a/crates/bevy_render/src/renderer/render_device.rs b/crates/bevy_render/src/renderer/render_device.rs
index 45bccf0bbe667..1c0b26b912a42 100644
--- a/crates/bevy_render/src/renderer/render_device.rs
+++ b/crates/bevy_render/src/renderer/render_device.rs
@@ -11,19 +11,20 @@ use wgpu::{
 use super::RenderQueue;
 
 use crate::render_resource::resource_macros::*;
+use crate::WgpuWrapper;
 
 render_resource_wrapper!(ErasedRenderDevice, wgpu::Device);
 
 /// This GPU device is responsible for the creation of most rendering and compute resources.
 #[derive(Resource, Clone)]
 pub struct RenderDevice {
-    device: ErasedRenderDevice,
+    device: WgpuWrapper<ErasedRenderDevice>,
 }
 
 impl From<wgpu::Device> for RenderDevice {
     fn from(device: wgpu::Device) -> Self {
         Self {
-            device: ErasedRenderDevice::new(device),
+            device: WgpuWrapper::new(ErasedRenderDevice::new(device)),
         }
     }
 }
diff --git a/crates/bevy_render/src/view/window/mod.rs b/crates/bevy_render/src/view/window/mod.rs
index 64d089cee0f7c..ddb0f77f98aef 100644
--- a/crates/bevy_render/src/view/window/mod.rs
+++ b/crates/bevy_render/src/view/window/mod.rs
@@ -4,7 +4,7 @@ use crate::{
     },
     renderer::{RenderAdapter, RenderDevice, RenderInstance},
     texture::TextureFormatPixelInfo,
-    Extract, ExtractSchedule, Render, RenderApp, RenderSet,
+    Extract, ExtractSchedule, Render, RenderApp, RenderSet, WgpuWrapper,
 };
 use bevy_app::{App, Plugin};
 use bevy_ecs::{entity::EntityHashMap, prelude::*};
@@ -198,7 +198,7 @@ fn extract_windows(
 
 struct SurfaceData {
     // TODO: what lifetime should this be?
-    surface: wgpu::Surface<'static>,
+    surface: WgpuWrapper<wgpu::Surface<'static>>,
     configuration: SurfaceConfiguration,
 }
 
@@ -488,7 +488,7 @@ pub fn create_surfaces(
                 render_device.configure_surface(&surface, &configuration);
 
                 SurfaceData {
-                    surface,
+                    surface: WgpuWrapper::new(surface),
                     configuration,
                 }
             });