GraphiteEditor · Ayush2k02 · Mar 18, 2026 · Mar 18, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/editor/src/node_graph_executor/runtime.rs b/editor/src/node_graph_executor/runtime.rs
@@ -352,7 +352,7 @@ impl NodeRuntime {
 								&executor.context.device,
 								&vello::wgpu::SurfaceConfiguration {
 									usage: vello::wgpu::TextureUsages::RENDER_ATTACHMENT | vello::wgpu::TextureUsages::COPY_DST,
-									format: vello::wgpu::TextureFormat::Rgba8Unorm,
+									format: surface.surface.format,
 									width: physical_resolution.x,
 									height: physical_resolution.y,
 									present_mode: surface_caps.present_modes[0],
@@ -365,21 +365,29 @@ impl NodeRuntime {
 							let surface_texture = surface_inner.get_current_texture().expect("Failed to get surface texture");
 							self.current_viewport_texture = Some(image_texture.clone());
 
-							encoder.copy_texture_to_texture(
-								vello::wgpu::TexelCopyTextureInfoBase {
-									texture: image_texture.texture.as_ref(),
-									mip_level: 0,
-									origin: Default::default(),
-									aspect: Default::default(),
-								},
-								vello::wgpu::TexelCopyTextureInfoBase {
-									texture: &surface_texture.texture,
-									mip_level: 0,
-									origin: Default::default(),
-									aspect: Default::default(),
-								},
-								image_texture.texture.size(),
-							);
+							// Only use the blitter if formats differ, otherwise use efficient direct copy
+							if surface.surface.format == vello::wgpu::TextureFormat::Rgba8Unorm {
+								// Same format as Vello's output - use direct texture copy
+								encoder.copy_texture_to_texture(
+									vello::wgpu::TexelCopyTextureInfoBase {
+										texture: image_texture.texture.as_ref(),
+										mip_level: 0,
+										origin: Default::default(),
+										aspect: Default::default(),
+									},
+									vello::wgpu::TexelCopyTextureInfoBase {
+										texture: &surface_texture.texture,
+										mip_level: 0,
+										origin: Default::default(),
+										aspect: Default::default(),
+									},
+									image_texture.texture.size(),
+								);
+							} else {
+								// Different format (e.g., Firefox's Bgra8Unorm on Mac) - use cached blitter for conversion
+								let target_view = surface_texture.texture.create_view(&vello::wgpu::TextureViewDescriptor::default());
+								surface.surface.blitter.copy(&executor.context.device, &mut encoder, image_texture.texture.as_ref(), &target_view);
+							}
 
 							executor.context.queue.submit([encoder.finish()]);
 							surface_texture.present();

diff --git a/node-graph/libraries/wgpu-executor/src/lib.rs b/node-graph/libraries/wgpu-executor/src/lib.rs
@@ -13,7 +13,6 @@ use glam::UVec2;
 use graphene_application_io::{ApplicationIo, EditorApi, SurfaceHandle, SurfaceId};
 use std::sync::Arc;
 use vello::{AaConfig, AaSupport, RenderParams, Renderer, RendererOptions, Scene};
-use wgpu::util::TextureBlitter;
 use wgpu::{Origin3d, TextureAspect};
 
 pub use context::Context as WgpuContext;
@@ -48,7 +47,234 @@ pub type WgpuWindow = Arc<SurfaceHandle<WindowHandle>>;
 pub struct Surface {
 	pub inner: wgpu::Surface<'static>,
 	pub target_texture: Mutex<Option<TargetTexture>>,
-	pub blitter: TextureBlitter,
+	pub blitter: CachedBlitter,
+	pub format: wgpu::TextureFormat,
+}
+
+/// A texture blitter that caches its bind group to avoid recreating it every frame.
+///
+/// The standard wgpu `TextureBlitter` creates a new bind group on every `copy()` call,
+/// which causes excessive GPU resource allocation during viewport panning. This blitter
+/// maintains a persistent intermediate texture (recreated only on size change) and a cached
+/// bind group bound to it. Each frame, the source is copied into the persistent texture
+/// via `copy_texture_to_texture` (same format, no bind groups), then the cached bind group
+/// is used for the format-converting render pass.
+pub struct CachedBlitter {
+	pipeline: wgpu::RenderPipeline,
+	bind_group_layout: wgpu::BindGroupLayout,
+	sampler: wgpu::Sampler,
+	cache: std::sync::Mutex<Option<BlitCache>>,
+}
+
+struct BlitCache {
+	source_texture: wgpu::Texture,
+	bind_group: wgpu::BindGroup,
+	size: wgpu::Extent3d,
+}
+
+const BLIT_SHADER: &str = r"
+struct VertexOutput {
+    @builtin(position) position: vec4<f32>,
+    @location(0) tex_coords: vec2<f32>,
+}
+
+@vertex
+fn vs_main(@builtin(vertex_index) vi: u32) -> VertexOutput {
+    var out: VertexOutput;
+    out.tex_coords = vec2<f32>(
+        f32((vi << 1u) & 2u),
+        f32(vi & 2u),
+    );
+    out.position = vec4<f32>(out.tex_coords * 2.0 - 1.0, 0.0, 1.0);
+    out.tex_coords.y = 1.0 - out.tex_coords.y;
+    return out;
+}
+
+@group(0) @binding(0)
+var src_texture: texture_2d<f32>;
+@group(0) @binding(1)
+var src_sampler: sampler;
+
+@fragment
+fn fs_main(vs: VertexOutput) -> @location(0) vec4<f32> {
+    return textureSample(src_texture, src_sampler, vs.tex_coords);
+}
+";
+
+impl CachedBlitter {
+	pub fn new(device: &wgpu::Device, format: wgpu::TextureFormat) -> Self {
+		let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
+			label: Some("CachedBlitter::sampler"),
+			address_mode_u: wgpu::AddressMode::ClampToEdge,
+			address_mode_v: wgpu::AddressMode::ClampToEdge,
+			address_mode_w: wgpu::AddressMode::ClampToEdge,
+			mag_filter: wgpu::FilterMode::Nearest,
+			..Default::default()
+		});
+
+		let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+			label: Some("CachedBlitter::bind_group_layout"),
+			entries: &[
+				wgpu::BindGroupLayoutEntry {
+					binding: 0,
+					visibility: wgpu::ShaderStages::FRAGMENT,
+					ty: wgpu::BindingType::Texture {
+						sample_type: wgpu::TextureSampleType::Float { filterable: false },
+						view_dimension: wgpu::TextureViewDimension::D2,
+						multisampled: false,
+					},
+					count: None,
+				},
+				wgpu::BindGroupLayoutEntry {
+					binding: 1,
+					visibility: wgpu::ShaderStages::FRAGMENT,
+					ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+					count: None,
+				},
+			],
+		});
+
+		let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+			label: Some("CachedBlitter::pipeline_layout"),
+			bind_group_layouts: &[&bind_group_layout],
+			push_constant_ranges: &[],
+		});
+
+		let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
+			label: Some("CachedBlitter::shader"),
+			source: wgpu::ShaderSource::Wgsl(BLIT_SHADER.into()),
+		});
+
+		let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+			label: Some("CachedBlitter::pipeline"),
+			layout: Some(&pipeline_layout),
+			vertex: wgpu::VertexState {
+				module: &shader,
+				entry_point: Some("vs_main"),
+				compilation_options: wgpu::PipelineCompilationOptions::default(),
+				buffers: &[],
+			},
+			primitive: wgpu::PrimitiveState {
+				topology: wgpu::PrimitiveTopology::TriangleList,
+				..Default::default()
+			},
+			depth_stencil: None,
+			multisample: wgpu::MultisampleState::default(),
+			fragment: Some(wgpu::FragmentState {
+				module: &shader,
+				entry_point: Some("fs_main"),
+				compilation_options: wgpu::PipelineCompilationOptions::default(),
+				targets: &[Some(wgpu::ColorTargetState {
+					format,
+					blend: None,
+					write_mask: wgpu::ColorWrites::ALL,
+				})],
+			}),
+			multiview: None,
+			cache: None,
+		});
+
+		Self {
+			pipeline,
+			bind_group_layout,
+			sampler,
+			cache: std::sync::Mutex::new(None),
+		}
+	}
+
+	/// Copies the source texture to the target with format conversion, using a cached bind group.
+	///
+	/// Internally maintains a persistent intermediate texture. Each frame:
+	/// 1. Copies `source` → intermediate via `copy_texture_to_texture` (same format, no bind groups)
+	/// 2. Blits intermediate → `target` via a render pass with the cached bind group
+	///
+	/// The bind group and intermediate texture are only recreated when the source size changes.
+	pub fn copy(
+		&self,
+		device: &wgpu::Device,
+		encoder: &mut wgpu::CommandEncoder,
+		source: &wgpu::Texture,
+		target: &wgpu::TextureView,
+	) {
+		let size = source.size();
+
+		// Take cache out of mutex to avoid holding the lock during GPU operations
+		let mut cache = self.cache.lock().unwrap().take();
+
+		// Recreate the persistent texture and bind group if size changed
+		if !matches!(&cache, Some(c) if c.size == size) {
+			let texture = device.create_texture(&wgpu::TextureDescriptor {
+				label: Some("CachedBlitter::intermediate"),
+				size,
+				mip_level_count: 1,
+				sample_count: 1,
+				dimension: wgpu::TextureDimension::D2,
+				format: VELLO_SURFACE_FORMAT,
+				usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
+				view_formats: &[],
+			});
+			let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+			let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
+				label: Some("CachedBlitter::bind_group"),
+				layout: &self.bind_group_layout,
+				entries: &[
+					wgpu::BindGroupEntry {
+						binding: 0,
+						resource: wgpu::BindingResource::TextureView(&view),
+					},
+					wgpu::BindGroupEntry {
+						binding: 1,
+						resource: wgpu::BindingResource::Sampler(&self.sampler),
+					},
+				],
+			});
+			cache = Some(BlitCache { source_texture: texture, bind_group, size });
+		}
+
+		let c = cache.as_ref().unwrap();
+
+		// Copy source → persistent intermediate texture (same format, no bind group creation)
+		encoder.copy_texture_to_texture(
+			wgpu::TexelCopyTextureInfoBase {
+				texture: source,
+				mip_level: 0,
+				origin: Default::default(),
+				aspect: Default::default(),
+			},
+			wgpu::TexelCopyTextureInfoBase {
+				texture: &c.source_texture,
+				mip_level: 0,
+				origin: Default::default(),
+				aspect: Default::default(),
+			},
+			size,
+		);
+
+		// Blit intermediate → target with format conversion using the cached bind group
+		{
+			let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+				label: Some("CachedBlitter::pass"),
+				color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+					view: target,
+					depth_slice: None,
+					resolve_target: None,
+					ops: wgpu::Operations {
+						load: wgpu::LoadOp::Load,
+						store: wgpu::StoreOp::Store,
+					},
+				})],
+				depth_stencil_attachment: None,
+				timestamp_writes: None,
+				occlusion_query_set: None,
+			});
+			pass.set_pipeline(&self.pipeline);
+			pass.set_bind_group(0, &c.bind_group, &[]);
+			pass.draw(0..3, 0..1);
+		}
+
+		// Put cache back for next frame
+		*self.cache.lock().unwrap() = cache;
+	}
 }
 
 #[derive(Clone, Debug)]
@@ -173,13 +399,17 @@ impl WgpuExecutor {
 	}
 
 	pub fn create_surface_inner(&self, surface: wgpu::Surface<'static>, window_id: SurfaceId) -> Result<SurfaceHandle<Surface>> {
-		let blitter = TextureBlitter::new(&self.context.device, VELLO_SURFACE_FORMAT);
+		// Use the surface's preferred format (Firefox prefers Bgra8Unorm, Chrome prefers Rgba8Unorm)
+		let surface_caps = surface.get_capabilities(&self.context.adapter);
+		let surface_format = surface_caps.formats[0];
+		let blitter = CachedBlitter::new(&self.context.device, surface_format);
 		Ok(SurfaceHandle {
 			window_id,
 			surface: Surface {
 				inner: surface,
 				target_texture: Mutex::new(None),
 				blitter,
+				format: surface_format,
 			},
 		})
 	}