How to do deferred rendering

Alexander_krack · December 26, 2025, 10:24am

I am currently trying to make a multipass renderer to include tiled light culling. the steps should go as follow:

Raw depth render buffer pass (no lighting has been done yet)
↓
Feed depth buffer into compute shader and calculate tiled lights into a 16 by 16 texture
↓
Feed it into my render pipeline to efficiently cull the lights.

so far, the stages don’t seem to be executing in this order. I can’t figure out why. Every stage is running in parallel so far and I don’t know how to make every pass run one at a time.

this is some of my code so far. note that I have built some custom classes like for the frame buffer:

winx = 16
winy = 16

color_tex = p3d.Texture()
color_tex.setup_2d_texture(
    winx, winy,
    p3d.Texture.T_unsigned_byte,
    p3d.Texture.F_rgba8
)
color_tex.setMinfilter(p3d.SamplerState.FT_nearest)
color_tex.setMagfilter(p3d.SamplerState.FT_nearest)

depth_tex = p3d.Texture()
depth_tex.setup_2d_texture(
    winx, winy,
    p3d.Texture.T_unsigned_byte,
    p3d.Texture.F_depth_component
)
depth_tex.setMinfilter(p3d.SamplerState.FT_nearest)
depth_tex.setMagfilter(p3d.SamplerState.FT_nearest)
depth_tex.setWrapU(p3d.SamplerState.WM_clamp)
depth_tex.setWrapV(p3d.SamplerState.WM_clamp)

tex = p3d.Texture("compute_tex")
tex.setup_2d_texture(
    winx, winy,
    p3d.Texture.T_float,
    p3d.Texture.F_rgba32
)
tex.setMinfilter(p3d.SamplerState.FT_nearest)
tex.setMagfilter(p3d.SamplerState.FT_nearest)

manager = GraphicsBuffer(application.base.win, application.base.cam)
buffer = manager.createBuffer('defered', size = (winx, winy), colortex = color_tex, depthtex = depth_tex, depthbits = True, sort = -20)

buffer.getDisplayRegion(0).setSort(-20)
buffer.setSort(-20)
buffer.setChildSort(-20)



empty = NodePath("new")
empty.setLightOff(True)
empty.setBin('fixed', -20)

state = empty.getState()
GraphicsBufferManager.cameras[0].node().setInitialState(state)

#GraphicsBufferManager.cameras[0].setShaderOff(True)
#GraphicsBufferManager.cameras[0].node().setTagStateKey("NOLIGHT")
#GraphicsBufferManager.cameras[0].node().setTagState("NOLIGHT", empty.getState())
#application.base.cam.node().setCameraMask(BitMask32.bit(0))
#GraphicsBufferManager.cameras[0].node().setCameraMask(BitMask32.bit(1))

ground.tags = ['cheese']
ground.tags.append('balls')

print('GROUND TAGS', ground.tags, ground.getTagKeys())
print("Camera:",GraphicsBufferManager.cameras[0].node())
print('light shadow sort:', light._light.getShadowBufferSort())


##COMPUTE SHADER STUFF


in_quad = Entity(model = Quad(), parent = camera.ui, texture = color_tex, scale = (0.2, 0.12), position = (-0.6, -0.4))
out_quad = Entity(model = Quad(), parent = camera.ui, texture = tex, scale = (0.2, 0.12), position = (0.2, -0.4))


compute = ComputeShaderNode('compute.glsl')
compute.np.setBin('fixed', -20)
#application.base.win.getGsg().useShader(compute.compute_shader)

print(color_tex.getFormat(), winx // 16, winy // 16)
print((color_tex.getXSize(), color_tex.getXSize()), (tex.getXSize(), tex.getYSize()))
print(depth_tex.getFormat())

compute.np.setShaderInput("tiled_texture", tex, False)
compute.np.setShaderInput("color_texture", color_tex)
compute.np.setShaderInput("depth_texture", depth_tex)

render.setShaderInput('lit_texture', tex)
render.setShaderInput('win_size2', Vec2(window.screen_resolution[0], window.screen_resolution[1]))

from panda3d.core import Mat4, LMatrix4f

compute.dispatch(winx // 8, winy // 8, 1)

def update_compute(task):
    lens = application.base.cam.node().getLens()
    proj_mat = lens.getProjectionMat()

    inv_proj_mat = LMatrix4f(proj_mat)
    inv_proj_mat.invertInPlace()

    compute.np.setShaderInput('biased_inverse_projection_matrix', inv_proj_mat)

    #sattr = compute.get_attrib(ShaderAttrib)
    #buffer.getEngine().dispatch_compute((winx // 8, winy // 8, 1),sattr,buffer.get_gsg())

    task.cont

application.base.taskMgr.add(update_compute, sort = -20)

Compute Shader Code:

#version 430

layout (local_size_x = 8, local_size_y = 8) in;

layout(rgba8, binding = 0) writeonly uniform image2D tiled_texture;
uniform sampler2D color_texture;
uniform sampler2D depth_texture;

uniform mat4 biased_inverse_projection_matrix;

uniform float camera_far;
uniform float camera_near;
uniform vec3 camera_world_position;

uniform struct p3d_LightSourceParameters {
    vec4 position;
    vec4 color;
    vec3 attenuation;

    vec3 spotDirection;
    float spotExponent;
    float spotCosCutoff;

    sampler2DShadow shadowMap;
    mat4 shadowViewMatrix;
} p3d_LightSource[10];


float shadow_caster_contrib(sampler2DShadow shadowmap, vec4 shadowpos) {
    vec3 light_space_coords = shadowpos.xyz / shadowpos.w;
    light_space_coords.z -= 0.01;

    float shadow = texture(shadowmap, light_space_coords);

    return shadow;
}


vec4 calculate_view_position(vec2 texture_coordinate, float depth_from_depth_buffer)
{
    vec4 view_position = vec4(biased_inverse_projection_matrix[0][0] * texture_coordinate.x + biased_inverse_projection_matrix[3][0],
                          biased_inverse_projection_matrix[1][1] * texture_coordinate.y + biased_inverse_projection_matrix[3][1],
                          -1.0,
                          biased_inverse_projection_matrix[2][3] * depth_from_depth_buffer + biased_inverse_projection_matrix[3][3]);

    return view_position;
}


void main() {
    ivec2 pixel = ivec2(gl_GlobalInvocationID.xy);
    ivec2 texSize = textureSize(depth_texture, 0);
    vec2 uv = (vec2(pixel) + 0.5) / vec2(texSize);

    vec4 color = texture(color_texture, uv);
    vec4 depth = texture(depth_texture, uv);

    float depth_distance = (2.0 * camera_near * camera_far) / (camera_far + camera_near - depth.r * (camera_far - camera_near));

    //imageStore(tiled_texture, pixel, vec4(0.0, 0.0, 0.0, 1.0));

    vec4 view_position = calculate_view_position(uv * 2.0 - 1.0,
    depth.r * 2.0 - 1.0);

    vec3 v_view_position = view_position.xyz / view_position.w;
    vec3 v = normalize(-v_view_position);

    for (int i = 0; i < p3d_LightSource.length(); ++i) {
        vec3 lightcol = p3d_LightSource[i].color.rgb;

        vec3 light_pos = p3d_LightSource[i].position.xyz - v_view_position * p3d_LightSource[i].position.w;

        vec3 l = normalize(light_pos);

        float spotcos = dot(normalize(p3d_LightSource[i].spotDirection), -l);

        vec3 h = normalize(l + v);
        float dist = length(light_pos);
        vec3 att_const = p3d_LightSource[i].attenuation;
        float attenuation_factor = 1.0 / max(att_const.x + att_const.y * dist + att_const.z * dist * dist, 1e-6);

        float spotcutoff = p3d_LightSource[i].spotCosCutoff;
        float spotexponent = p3d_LightSource[i].spotExponent;

        float shadowSpot = 0.0;

        float spot = smoothstep(spotcutoff, 1.0, spotcos);
        float powSpot = pow(spot, max(spotexponent, 0.01));
        float fallback = mix(1.0, smoothstep(spotcutoff, spotcutoff, spotcos), step(0.0, spotcutoff));
        shadowSpot = mix(fallback, powSpot, step(0.0, spotexponent));

        vec4 v_shadow_pos = p3d_LightSource[i].shadowViewMatrix * view_position;

        float shadow_caster = shadow_caster_contrib(p3d_LightSource[i].shadowMap, v_shadow_pos);
        float shadow = shadowSpot * shadow_caster * attenuation_factor;

        imageStore(tiled_texture, pixel, vec4(shadow, shadow, shadow, 1.0));
        break;
    }
    memoryBarrier();
    barrier();
}

Note that GraphicsBuffer is jsut a graphics buffer creator and GraphicsBufferManager just stores the cameras and buffer textures.

The glsl code in short reconstructs the depth buffer into a view position where we can then calculate the lights.

currently I’m just testing one light and writing it to a texture that is 16 by 16. the texture then gets feed into the render pipeline and checked if a light intersects that tile / fragment.

If you need information / code to figure out the issue, let me know. I just think in one message this is already too much text to read.