Skip to content
40 changes: 24 additions & 16 deletions editor/src/node_graph_executor/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ impl NodeRuntime {
&executor.context.device,
&vello::wgpu::SurfaceConfiguration {
usage: vello::wgpu::TextureUsages::RENDER_ATTACHMENT | vello::wgpu::TextureUsages::COPY_DST,
format: vello::wgpu::TextureFormat::Rgba8Unorm,
format: surface.surface.format,
width: physical_resolution.x,
height: physical_resolution.y,
present_mode: surface_caps.present_modes[0],
Expand All @@ -365,21 +365,29 @@ impl NodeRuntime {
let surface_texture = surface_inner.get_current_texture().expect("Failed to get surface texture");
self.current_viewport_texture = Some(image_texture.clone());

encoder.copy_texture_to_texture(
vello::wgpu::TexelCopyTextureInfoBase {
texture: image_texture.texture.as_ref(),
mip_level: 0,
origin: Default::default(),
aspect: Default::default(),
},
vello::wgpu::TexelCopyTextureInfoBase {
texture: &surface_texture.texture,
mip_level: 0,
origin: Default::default(),
aspect: Default::default(),
},
image_texture.texture.size(),
);
// Only use the blitter if formats differ, otherwise use efficient direct copy
if surface.surface.format == vello::wgpu::TextureFormat::Rgba8Unorm {
// Same format as Vello's output - use direct texture copy
encoder.copy_texture_to_texture(
vello::wgpu::TexelCopyTextureInfoBase {
texture: image_texture.texture.as_ref(),
mip_level: 0,
origin: Default::default(),
aspect: Default::default(),
},
vello::wgpu::TexelCopyTextureInfoBase {
texture: &surface_texture.texture,
mip_level: 0,
origin: Default::default(),
aspect: Default::default(),
},
image_texture.texture.size(),
);
} else {
// Different format (e.g., Firefox's Bgra8Unorm on Mac) - use cached blitter for conversion
let target_view = surface_texture.texture.create_view(&vello::wgpu::TextureViewDescriptor::default());
surface.surface.blitter.copy(&executor.context.device, &mut encoder, image_texture.texture.as_ref(), &target_view);
}

executor.context.queue.submit([encoder.finish()]);
surface_texture.present();
Expand Down
236 changes: 233 additions & 3 deletions node-graph/libraries/wgpu-executor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use glam::UVec2;
use graphene_application_io::{ApplicationIo, EditorApi, SurfaceHandle, SurfaceId};
use std::sync::Arc;
use vello::{AaConfig, AaSupport, RenderParams, Renderer, RendererOptions, Scene};
use wgpu::util::TextureBlitter;
use wgpu::{Origin3d, TextureAspect};

pub use context::Context as WgpuContext;
Expand Down Expand Up @@ -48,7 +47,234 @@ pub type WgpuWindow = Arc<SurfaceHandle<WindowHandle>>;
pub struct Surface {
pub inner: wgpu::Surface<'static>,
pub target_texture: Mutex<Option<TargetTexture>>,
pub blitter: TextureBlitter,
pub blitter: CachedBlitter,
pub format: wgpu::TextureFormat,
}

/// A texture blitter that caches its bind group to avoid recreating it every frame.
///
/// The standard wgpu `TextureBlitter` creates a new bind group on every `copy()` call,
/// which causes excessive GPU resource allocation during viewport panning. This blitter
/// maintains a persistent intermediate texture (recreated only on size change) and a cached
/// bind group bound to it. Each frame, the source is copied into the persistent texture
/// via `copy_texture_to_texture` (same format, no bind groups), then the cached bind group
/// is used for the format-converting render pass.
pub struct CachedBlitter {
pipeline: wgpu::RenderPipeline,
bind_group_layout: wgpu::BindGroupLayout,
sampler: wgpu::Sampler,
cache: std::sync::Mutex<Option<BlitCache>>,
}

struct BlitCache {
source_texture: wgpu::Texture,
bind_group: wgpu::BindGroup,
size: wgpu::Extent3d,
}

const BLIT_SHADER: &str = r"
struct VertexOutput {
@builtin(position) position: vec4<f32>,
@location(0) tex_coords: vec2<f32>,
}

@vertex
fn vs_main(@builtin(vertex_index) vi: u32) -> VertexOutput {
var out: VertexOutput;
out.tex_coords = vec2<f32>(
f32((vi << 1u) & 2u),
f32(vi & 2u),
);
out.position = vec4<f32>(out.tex_coords * 2.0 - 1.0, 0.0, 1.0);
out.tex_coords.y = 1.0 - out.tex_coords.y;
return out;
}

@group(0) @binding(0)
var src_texture: texture_2d<f32>;
@group(0) @binding(1)
var src_sampler: sampler;

@fragment
fn fs_main(vs: VertexOutput) -> @location(0) vec4<f32> {
return textureSample(src_texture, src_sampler, vs.tex_coords);
}
";

impl CachedBlitter {
pub fn new(device: &wgpu::Device, format: wgpu::TextureFormat) -> Self {
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
label: Some("CachedBlitter::sampler"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Nearest,
..Default::default()
});

let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: Some("CachedBlitter::bind_group_layout"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: false },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
count: None,
},
],
});

let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("CachedBlitter::pipeline_layout"),
bind_group_layouts: &[&bind_group_layout],
push_constant_ranges: &[],
});

let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("CachedBlitter::shader"),
source: wgpu::ShaderSource::Wgsl(BLIT_SHADER.into()),
});

let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
label: Some("CachedBlitter::pipeline"),
layout: Some(&pipeline_layout),
vertex: wgpu::VertexState {
module: &shader,
entry_point: Some("vs_main"),
compilation_options: wgpu::PipelineCompilationOptions::default(),
buffers: &[],
},
primitive: wgpu::PrimitiveState {
topology: wgpu::PrimitiveTopology::TriangleList,
..Default::default()
},
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: Some("fs_main"),
compilation_options: wgpu::PipelineCompilationOptions::default(),
targets: &[Some(wgpu::ColorTargetState {
format,
blend: None,
write_mask: wgpu::ColorWrites::ALL,
})],
}),
multiview: None,
cache: None,
});

Self {
pipeline,
bind_group_layout,
sampler,
cache: std::sync::Mutex::new(None),
}
}

/// Copies the source texture to the target with format conversion, using a cached bind group.
///
/// Internally maintains a persistent intermediate texture. Each frame:
/// 1. Copies `source` → intermediate via `copy_texture_to_texture` (same format, no bind groups)
/// 2. Blits intermediate → `target` via a render pass with the cached bind group
///
/// The bind group and intermediate texture are only recreated when the source size changes.
pub fn copy(
&self,
device: &wgpu::Device,
encoder: &mut wgpu::CommandEncoder,
source: &wgpu::Texture,
target: &wgpu::TextureView,
) {
let size = source.size();

// Take cache out of mutex to avoid holding the lock during GPU operations
let mut cache = self.cache.lock().unwrap().take();

// Recreate the persistent texture and bind group if size changed
if !matches!(&cache, Some(c) if c.size == size) {
let texture = device.create_texture(&wgpu::TextureDescriptor {
label: Some("CachedBlitter::intermediate"),
size,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: VELLO_SURFACE_FORMAT,
usage: wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::TEXTURE_BINDING,
view_formats: &[],
});
let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("CachedBlitter::bind_group"),
layout: &self.bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(&view),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&self.sampler),
},
],
});
cache = Some(BlitCache { source_texture: texture, bind_group, size });
}

let c = cache.as_ref().unwrap();

// Copy source → persistent intermediate texture (same format, no bind group creation)
encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfoBase {
texture: source,
mip_level: 0,
origin: Default::default(),
aspect: Default::default(),
},
wgpu::TexelCopyTextureInfoBase {
texture: &c.source_texture,
mip_level: 0,
origin: Default::default(),
aspect: Default::default(),
},
size,
);

// Blit intermediate → target with format conversion using the cached bind group
{
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("CachedBlitter::pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: target,
depth_slice: None,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Load,
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
pass.set_pipeline(&self.pipeline);
pass.set_bind_group(0, &c.bind_group, &[]);
pass.draw(0..3, 0..1);
}

// Put cache back for next frame
*self.cache.lock().unwrap() = cache;
}
}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -173,13 +399,17 @@ impl WgpuExecutor {
}

pub fn create_surface_inner(&self, surface: wgpu::Surface<'static>, window_id: SurfaceId) -> Result<SurfaceHandle<Surface>> {
let blitter = TextureBlitter::new(&self.context.device, VELLO_SURFACE_FORMAT);
// Use the surface's preferred format (Firefox prefers Bgra8Unorm, Chrome prefers Rgba8Unorm)
let surface_caps = surface.get_capabilities(&self.context.adapter);
let surface_format = surface_caps.formats[0];
let blitter = CachedBlitter::new(&self.context.device, surface_format);
Ok(SurfaceHandle {
window_id,
surface: Surface {
inner: surface,
target_texture: Mutex::new(None),
blitter,
format: surface_format,
},
})
}
Expand Down