2009-12-23, 20:47
I've been playing around with the bicubic filtering, trying to make it work better with the open drivers. As part of this I converted the current GLSL filter to ARB syntax (it doesn't have any control flow, so it's fully possible).
I don't have the time to do anything constructive with it right now, but if someone else wants to I can at least post the code.
It's a fairly large program, but if I counted things correctly then it is still within the minimum requirements for ARB fragment programs (i.e. it should run on all hw).
I don't have the time to do anything constructive with it right now, but if someone else wants to I can at least post the code.
Code:
!!ARBfp1.0
# w, h
PARAM texdim = program.local[0];
# x, y, -x, -y
PARAM step = program.local[1];
TEMP coord, coord2;
TEMP weightx, weighty;
TEMP t1, t2, t3, t4;
TEMP p1, p2, p3, p4;
TEMP o1, o2, o3, o4;
TEMP out;
# Convert to image coords
MUL coord.xy, fragment.texcoord[0], texdim;
# Load lookup
TEX weightx, coord.x, texture[2], 2D;
TEX weighty, coord.y, texture[2], 2D;
# Compensate for [0, 1] clamping
MAD weightx, weightx, {2.0, 2.0, 2.0, 2.0} , {-1.0, -1.0, -1.0, -1.0};
MAD weighty, weighty, {2.0, 2.0, 2.0, 2.0} , {-1.0, -1.0, -1.0, -1.0};
# First x pass
ADD coord.xyw, fragment.texcoord[0].xyxy, step.zwzw;
MOV coord.z, fragment.texcoord[0].x;
ADD coord2.xyzw, fragment.texcoord[0].xyxy, step.xwxw;
ADD coord2.z, coord2.z, step.x;
TEX t1, coord.xyzw, texture[0], 2D;
TEX t2, coord.zwxy, texture[0], 2D;
TEX t3, coord2.xyzw, texture[0], 2D;
TEX t4, coord2.zwxy, texture[0], 2D;
MUL p1, t1, weightx.r;
MAD p1, t2, weightx.g, p1;
MAD p1, t3, weightx.b, p1;
MAD p1, t4, weightx.a, p1;
# Second x pass
ADD coord.yw, coord, step.y;
ADD coord2.yw, coord2, step.y;
TEX t1, coord.xyzw, texture[0], 2D;
TEX t2, coord.zwxy, texture[0], 2D;
TEX t3, coord2.xyzw, texture[0], 2D;
TEX t4, coord2.zwxy, texture[0], 2D;
MUL p2, t1, weightx.r;
MAD p2, t2, weightx.g, p2;
MAD p2, t3, weightx.b, p2;
MAD p2, t4, weightx.a, p2;
# Third x pass
ADD coord.yw, coord, step.y;
ADD coord2.yw, coord2, step.y;
TEX t1, coord.xyzw, texture[0], 2D;
TEX t2, coord.zwxy, texture[0], 2D;
TEX t3, coord2.xyzw, texture[0], 2D;
TEX t4, coord2.zwxy, texture[0], 2D;
MUL p3, t1, weightx.r;
MAD p3, t2, weightx.g, p3;
MAD p3, t3, weightx.b, p3;
MAD p3, t4, weightx.a, p3;
# Fourth x pass
ADD coord.yw, coord, step.y;
ADD coord2.yw, coord2, step.y;
TEX t1, coord.xyzw, texture[0], 2D;
TEX t2, coord.zwxy, texture[0], 2D;
TEX t3, coord2.xyzw, texture[0], 2D;
TEX t4, coord2.zwxy, texture[0], 2D;
MUL p4, t1, weightx.r;
MAD p4, t2, weightx.g, p4;
MAD p4, t3, weightx.b, p4;
MAD p4, t4, weightx.a, p4;
# y pass
MUL out, p1, weighty.r;
MAD out, p2, weighty.g, out;
MAD out, p3, weighty.b, out;
MAD out, p4, weighty.a, out;
MOV result.color, out;
END
It's a fairly large program, but if I counted things correctly then it is still within the minimum requirements for ARB fragment programs (i.e. it should run on all hw).