[UPDATE] - Fur - Deferred shading

Fxaa looks ok !
Doesn’t work with basic shaders.

Performance hit : about 20 fps with gtx260 @1024x768

with :

without :

shader (ported from geeks3D glsl) :

//Cg

varying vec4 posPos;
uniform float FXAA_SUBPIX_SHIFT = 1.0/4.0;
const float rt_w = 1024; // resolution width
const float rt_h = 768; // resolution height

void vshader(float4 vtx_position : POSITION,
             out float4 l_position : POSITION,
             out float2 l_texcoord : TEXCOORD0,
             out float4 l_posPos : TEXCOORD1,
             uniform float4 texpad_color,
             uniform float4x4 mat_modelproj)
{
  l_position = mul(mat_modelproj, vtx_position);
  l_texcoord = (vtx_position.xz * texpad_color.xy) + texpad_color.xy;
 
  vec2 rcpFrame = vec2(1.0/rt_w, 1.0/rt_h);
  posPos.xy = l_texcoord.xy;
  posPos.zw = l_texcoord.xy - (rcpFrame * (0.5 + FXAA_SUBPIX_SHIFT));

  l_posPos = posPos;
}


uniform float vx_offset;
uniform float FXAA_SPAN_MAX = 8.0;
uniform float FXAA_REDUCE_MUL = 1.0/8.0;

#define FxaaInt2 ivec2
#define FxaaFloat2 vec2
#define FxaaTexLod0(t, p) texture2DLod(t, p, 0.0)
#define FxaaTexOff(t, p, o, r) tex2Dlod(t, float4(p.xy + (o * rcpFrame), 0, 0))

vec3 FxaaPixelShader(
  vec4 posPos, // Output of FxaaVertexShader interpolated across screen.
  sampler2D tex, // Input texture.
  vec2 rcpFrame) // Constant {1.0/frameWidth, 1.0/frameHeight}.
{
/*---------------------------------------------------------*/
    #define FXAA_REDUCE_MIN   (1.0/128.0)
    //#define FXAA_REDUCE_MUL   (1.0/8.0)
    //#define FXAA_SPAN_MAX     8.0
/*---------------------------------------------------------*/
    vec3 rgbNW = FxaaTexLod0(tex, posPos.zw).xyz;
    vec3 rgbNE = FxaaTexOff(tex, posPos.zw, FxaaInt2(1,0), rcpFrame.xy).xyz;
    vec3 rgbSW = FxaaTexOff(tex, posPos.zw, FxaaInt2(0,1), rcpFrame.xy).xyz;
    vec3 rgbSE = FxaaTexOff(tex, posPos.zw, FxaaInt2(1,1), rcpFrame.xy).xyz;
    vec3 rgbM  = FxaaTexLod0(tex, posPos.xy).xyz;
/*---------------------------------------------------------*/
    vec3 luma = vec3(0.299, 0.587, 0.114);
    float lumaNW = dot(rgbNW, luma);
    float lumaNE = dot(rgbNE, luma);
    float lumaSW = dot(rgbSW, luma);
    float lumaSE = dot(rgbSE, luma);
    float lumaM  = dot(rgbM,  luma);
/*---------------------------------------------------------*/
    float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
    float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
/*---------------------------------------------------------*/
    vec2 dir;
    dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
    dir.y =  ((lumaNW + lumaSW) - (lumaNE + lumaSE));
/*---------------------------------------------------------*/
    float dirReduce = max(
        (lumaNW + lumaNE + lumaSW + lumaSE) * (0.25 * FXAA_REDUCE_MUL),
        FXAA_REDUCE_MIN);
    float rcpDirMin = 1.0/(min(abs(dir.x), abs(dir.y)) + dirReduce);
    dir = min(FxaaFloat2( FXAA_SPAN_MAX,  FXAA_SPAN_MAX),
          max(FxaaFloat2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
          dir * rcpDirMin)) * rcpFrame.xy;
/*--------------------------------------------------------*/
    vec3 rgbA = (1.0/2.0) * (
        FxaaTexLod0(tex, posPos.xy + dir * (1.0/3.0 - 0.5)).xyz +
        FxaaTexLod0(tex, posPos.xy + dir * (2.0/3.0 - 0.5)).xyz);
    vec3 rgbB = rgbA * (1.0/2.0) + (1.0/4.0) * (
        FxaaTexLod0(tex, posPos.xy + dir * (0.0/3.0 - 0.5)).xyz +
        FxaaTexLod0(tex, posPos.xy + dir * (3.0/3.0 - 0.5)).xyz);
    float lumaB = dot(rgbB, luma);
    if((lumaB < lumaMin) || (lumaB > lumaMax)) return rgbA;
    return rgbB; }

vec4 PostFX(float4 pos, sampler2D tex, vec2 uv, float time)
{
  vec4 c = vec4(0.0);
  vec2 rcpFrame = vec2(1.0/rt_w, 1.0/rt_h);
  c.rgb = FxaaPixelShader(pos, tex, rcpFrame);
  //c.rgb = 1.0 - texture2D(tex, pos.xy).rgb;
  c.a = 1.0;
  return c;
}

void fshader(float2 l_texcoord : TEXCOORD0,
             float4 l_posPos : TEXCOORD1,
             uniform sampler2D k_color : TEXUNIT0,
             uniform float k_active,
             uniform float4 texpix_color,
             uniform float4 texpad_color,
             out float4 o_color : COLOR)
{
  float4 without_fxaa = tex2D(k_color, l_texcoord);

  vec2 uv = l_texcoord.st;
  o_color = PostFX(l_posPos, k_color, uv, 0.0);

  if(k_active == 0) o_color = without_fxaa;
}

To use it, render whole scene to a fullscreen quad and just pass your colormap (linearly filtered) as input :

quad.setShader(loader.loadShader("fxaa.cg"))
quad.setShaderInput("color", color_map)
quad.setShaderInput("active", 1)

Still needs more testing with proper scene, I don’t know why the texture2DLod function doesn’t throw error (should be tex2Dlod in cg).

Anon : I don’t know how to do this atm, I’d prefer a real programmer to do it :stuck_out_tongue: