Water caustics

Here’s a small snippet to compute water caustics for a given depth:

Result:

It’s far from beeing perfect, there is some blur missing, but it’s more a concept than a final solution.
All computation is done on the GPU using compute shaders.

Code:

from panda3d.core import *


loadPrcFileData("", """
win-size 512 512
""")

import direct.directbase.DirectStart

resultTex = Texture("result")
resultTex.setup2dTexture(512, 512, Texture.TFloat, Texture.FRgba)

tempTex = Texture("temp")
tempTex.setup2dTexture(512, 512, Texture.TInt, Texture.FR32)

sourceTex = loader.loadTexture("source.png")
sourceTex.setMinfilter(Texture.FTLinear)
sourceTex.setMagfilter(Texture.FTLinear)
sourceTex.setWrapU(Texture.WMRepeat)
sourceTex.setWrapV(Texture.WMRepeat)

tempDispTex = Texture("tempDisp")
tempDispTex.setup2dTexture(512, 512, Texture.TFloat, Texture.FRgba)

clearTexture = """
#version 430

layout (local_size_x = 16, local_size_y = 16) in;

uniform writeonly iimage2D dest;
uniform writeonly image2D tempDisplace;
uniform sampler2D source;

uniform float frameTime;

void main() {
  ivec2 texelCoords = ivec2(gl_GlobalInvocationID.xy);
  imageStore(dest, texelCoords, ivec4(0));

  vec2 texcoord = vec2(texelCoords) / 512.0;

  float speed = 0.2;
  float tspeed = frameTime * speed;

  float h0 = texture(source, texcoord*0.1 + tspeed * vec2(0.04, 0.03)).x;
  float h1 = texture(source, texcoord*0.15 - tspeed * vec2(0.05, -0.07)).x;
  float h2 = texture(source, texcoord*0.2 - tspeed * vec2(0.02, 0.08)).x;
  float h3 = texture(source, texcoord*0.25 - tspeed * vec2(-0.09, 0.03)).x;
  float h = (h0 + h1 + h2 + h3) * 0.25;


  imageStore(tempDisplace, texelCoords, vec4(h));

}

"""

computeCaustics = """
#version 430

layout (local_size_x = 16, local_size_y = 16) in;

uniform sampler2D source;
layout(r32i) uniform iimage2D dest;

void main() {
  vec2 texelCoords = vec2(gl_GlobalInvocationID.xy) / 4096.0;

  float h0 = texture(source, texelCoords).x;
  float hx = texture(source, texelCoords + vec2(1.0/512.0,0)).x;
  float hy = texture(source, texelCoords + vec2(0,1.0/512.0)).x;

  vec3 normal = normalize(vec3(h0 - hx, h0 - hy, 16.0 / 512.0));
  vec3 sunVector = normalize(vec3(1,1,1));

  vec3 refractVector = refract(sunVector, normal, 0.1);
  vec3 refractStart = vec3(texelCoords * 512.0, 0);
  vec3 refractEnd = refractStart + refractVector * 50.0;
  ivec2 storeCoords = ivec2(round(fract(refractEnd / 512.0) * 512.0));


  imageAtomicAdd(dest, storeCoords, 1);
}

"""

convertCaustics = """
#version 430

layout (local_size_x = 16, local_size_y = 16) in;

layout(r32i) uniform iimage2D source;
uniform writeonly image2D dest;

void main() {
  ivec2 texelCoords = ivec2(gl_GlobalInvocationID.xy);
  int result = imageLoad(source, texelCoords).x;
  //result = max(40, result);
  //result -= 40;

  imageStore(dest, texelCoords, vec4(vec3(result) / 255.0,1.0));
}

"""

computeCausticsShader = Shader.makeCompute(Shader.SLGLSL, computeCaustics)
convertCausticsShader = Shader.makeCompute(Shader.SLGLSL, convertCaustics)
clearTextureShader = Shader.makeCompute(Shader.SLGLSL, clearTexture)


def computeCaustics(task = None):
    # Clear texture first
    cn = NodePath("clearNode")
    cn.setShader(clearTextureShader)
    cn.setShaderInput("dest", tempTex)
    cn.setShaderInput("tempDisplace", tempDispTex)
    cn.setShaderInput("frameTime", globalClock.getFrameTime())

    cn.setShaderInput("source", sourceTex)

    sattr = cn.getAttrib(ShaderAttrib)
    base.graphicsEngine.dispatchCompute(
        (512/16, 512/16, 1), sattr, base.win.getGsg())

    # Compute caustics
    cn = NodePath("computeNode")
    cn.setShader(computeCausticsShader)
    cn.setShaderInput("dest", tempTex)
    cn.setShaderInput("source", tempDispTex)

    sattr = cn.getAttrib(ShaderAttrib)

    base.graphicsEngine.dispatchCompute(
        (4096/16, 4096/16, 1), sattr, base.win.getGsg())

    # Convert caustics
    cn = NodePath("convertNode")
    cn.setShader(convertCausticsShader)
    cn.setShaderInput("source", tempTex)
    cn.setShaderInput("dest", resultTex)

    sattr = cn.getAttrib(ShaderAttrib)

    base.graphicsEngine.dispatchCompute(
        (512/16, 512/16, 1), sattr, base.win.getGsg())

    if task is not None:
        return task.cont

base.addTask(computeCaustics, "computeCaustics")

# Write data
# base.graphicsEngine.extractTextureData(resultTex, base.win.getGsg())
# resultTex.write("result.png")

# Visualize data
cm = CardMaker("")
cm.setFrameFullscreenQuad()
cn = aspect2d.attachNewNode(cm.generate())
cn.setTexture(resultTex)


run()

The application expects a source.png with the size of 512x512 to be in the working directory. For the screenshot above I used (rescaled to 512x512 before):

http://1.bp.blogspot.com/-WPOu0DEx0sE/TnVq6rwECoI/AAAAAAAAB_I/fyFU4_8LeGc/s1600/Nomeradona_Pool+Water_displace.jpg

This looks nice.

Nice!

Is there a particular reason to use compute shaders instead of a render-to-texture approach?

(Compute shaders are only available for OpenGL 4.3 and later, while it would be nice to be able to run this on older hardware, too.)

Yes, because atomic operations are required with the technique I use. I compute the normals from the displacement map, compute the refraction ray from the sun light (the sun direction is currently hard-coded), and compute where the ray intersects with the ground plane. Then I transform the position back to texture-coordinates and do an atomic add at that pixel.

You could use render-to-texture, however it requires image load store anyway, which is 4.3.

I see. Thanks for the explanation!