diff --git a/portable_config/scripts/uosc/elements/Timeline.lua b/portable_config/scripts/uosc/elements/Timeline.lua index eb5bad2d..801f3abb 100644 --- a/portable_config/scripts/uosc/elements/Timeline.lua +++ b/portable_config/scripts/uosc/elements/Timeline.lua @@ -336,8 +336,8 @@ function Timeline:render() end -- End time - if state.duration_or_remaining_time_human then - draw_timeline_text(bbx - spacing, fcy, 6, state.duration_or_remaining_time_human, time_opts) + if state.destination_time_human then + draw_timeline_text(bbx - spacing, fcy, 6, state.destination_time_human, time_opts) end end diff --git a/portable_config/scripts/uosc/main.lua b/portable_config/scripts/uosc/main.lua index 83c01916..1a4536bf 100644 --- a/portable_config/scripts/uosc/main.lua +++ b/portable_config/scripts/uosc/main.lua @@ -1,6 +1,6 @@ --[[ SOURCE_ https://github.com/tomasklaen/uosc/tree/main/scripts -COMMIT_ ea371b5740bd5768e9f4ee0c254c0ae718c61372 +COMMIT_ e66c8fbf88ec788512b4c2adbef72560fb911dfd 极简主义设计驱动的多功能界面脚本群组,兼容 thumbfast 新缩略图引擎 ]]-- @@ -90,7 +90,7 @@ defaults = { foreground_text = '000000', background = '000000', background_text = 'ffffff', - total_time = true, + destination_time = 'playtime-remaining', time_precision = 0, autohide = false, buffered_time_threshold = 60, @@ -292,7 +292,7 @@ state = { speed = 1, duration = nil, -- current media duration time_human = nil, -- current playback time in human format - duration_or_remaining_time_human = nil, -- depends on options.total_time + destination_time_human = nil, -- depends on options.destination_time pause = mp.get_property_native('pause'), chapters = {}, current_chapter = nil, @@ -376,11 +376,15 @@ function update_human_times() state.time_human = format_time(state.time) if state.duration then local speed = state.speed or 1 - state.duration_or_remaining_time_human = format_time( - options.total_time and state.duration or ((state.time - state.duration) / speed) - ) + if options.destination_time == 'playtime-remaining' then + state.destination_time_human = format_time((state.time - state.duration) / speed) + elseif options.destination_time == 'total' then + state.destination_time_human = format_time(state.duration) + else + state.destination_time_human = format_time(state.time - state.duration) + end else - state.duration_or_remaining_time_human = nil + state.destination_time_human = nil end else state.time_human = nil diff --git a/portable_config/shaders/nlmeans.glsl b/portable_config/shaders/nlmeans.glsl index e222f098..074a7ca7 100644 --- a/portable_config/shaders/nlmeans.glsl +++ b/portable_config/shaders/nlmeans.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,6 +100,50 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -125,6 +163,7 @@ vec4 hook() //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!DESC Non-local means (nlmeans.glsl) @@ -251,7 +290,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -363,7 +402,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -508,35 +547,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -546,11 +585,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -601,47 +644,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -656,53 +692,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -860,7 +895,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_anime.glsl b/portable_config/shaders/nlmeans_anime.glsl index 81f384f9..fd989c13 100644 --- a/portable_config/shaders/nlmeans_anime.glsl +++ b/portable_config/shaders/nlmeans_anime.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_anime.glsl) /* User variables @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_anime_hq.glsl b/portable_config/shaders/nlmeans_anime_hq.glsl index e8a28885..b31c3a0a 100644 --- a/portable_config/shaders/nlmeans_anime_hq.glsl +++ b/portable_config/shaders/nlmeans_anime_hq.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_anime_hq.glsl) /* User variables @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_anime_hq_medium.glsl b/portable_config/shaders/nlmeans_anime_hq_medium.glsl index 9d762b99..14cde36b 100644 --- a/portable_config/shaders/nlmeans_anime_hq_medium.glsl +++ b/portable_config/shaders/nlmeans_anime_hq_medium.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,12 +100,27 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF_LUMA +//!SAVE PRERF_LUMA //!WIDTH HOOKED.w 2.0 / //!HEIGHT HOOKED.h 2.0 / @@ -120,6 +129,21 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -251,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -508,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -546,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -601,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -656,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -860,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_anime_medium.glsl b/portable_config/shaders/nlmeans_anime_medium.glsl index 0645f2ef..a1501b68 100644 --- a/portable_config/shaders/nlmeans_anime_medium.glsl +++ b/portable_config/shaders/nlmeans_anime_medium.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,12 +100,27 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF_LUMA +//!SAVE PRERF_LUMA //!WIDTH HOOKED.w 2.0 / //!HEIGHT HOOKED.h 2.0 / @@ -120,6 +129,21 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -251,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -508,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -546,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -601,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -656,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -860,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_heavy.glsl b/portable_config/shaders/nlmeans_heavy.glsl index 07b53716..d9fc45b8 100644 --- a/portable_config/shaders/nlmeans_heavy.glsl +++ b/portable_config/shaders/nlmeans_heavy.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_heavy.glsl) /* User variables @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_hq.glsl b/portable_config/shaders/nlmeans_hq.glsl index 453ac926..55a51188 100644 --- a/portable_config/shaders/nlmeans_hq.glsl +++ b/portable_config/shaders/nlmeans_hq.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_hq.glsl) /* User variables @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_hq_heavy.glsl b/portable_config/shaders/nlmeans_hq_heavy.glsl index de10f21b..e6d22239 100644 --- a/portable_config/shaders/nlmeans_hq_heavy.glsl +++ b/portable_config/shaders/nlmeans_hq_heavy.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_hq_heavy.glsl) /* User variables @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_hq_medium.glsl b/portable_config/shaders/nlmeans_hq_medium.glsl index 238e66cc..e1361d5b 100644 --- a/portable_config/shaders/nlmeans_hq_medium.glsl +++ b/portable_config/shaders/nlmeans_hq_medium.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_hq_medium.glsl) /* User variables @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_hq_sharpen_denoise.glsl b/portable_config/shaders/nlmeans_hq_sharpen_denoise.glsl index b9da1b84..d55e4a49 100644 --- a/portable_config/shaders/nlmeans_hq_sharpen_denoise.glsl +++ b/portable_config/shaders/nlmeans_hq_sharpen_denoise.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_hq_sharpen_denoise.glsl) /* User variables @@ -137,7 +176,7 @@ vec4 hook() * patch/research sizes. */ #ifdef LUMA_raw -#define S 9 +#define S 10 #define P 4 #define R 5 #else @@ -160,7 +199,7 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 1 -#define ASF 1.0 +#define ASF 1.5 #define ASP 2.0 #else #define AS 0 @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_hq_sharpen_only.glsl b/portable_config/shaders/nlmeans_hq_sharpen_only.glsl index fd2288b7..f703a9eb 100644 --- a/portable_config/shaders/nlmeans_hq_sharpen_only.glsl +++ b/portable_config/shaders/nlmeans_hq_sharpen_only.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_hq_sharpen_only.glsl) /* User variables @@ -137,11 +176,11 @@ vec4 hook() * patch/research sizes. */ #ifdef LUMA_raw -#define S 9 +#define S 3 #define P 4 #define R 5 #else -#define S 9 +#define S 3 #define P 3 #define R 5 #endif @@ -160,12 +199,12 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 2 -#define ASF 1.0 -#define ASP 2.0 +#define ASF 4 +#define ASP 1 #else #define AS 2 -#define ASF 1.0 -#define ASP 2.0 +#define ASF 4 +#define ASP 1 #endif /* Starting weight @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_hqx.glsl b/portable_config/shaders/nlmeans_hqx.glsl index dd0b89ee..65d02686 100644 --- a/portable_config/shaders/nlmeans_hqx.glsl +++ b/portable_config/shaders/nlmeans_hqx.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,12 +100,27 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF_LUMA +//!SAVE PRERF_LUMA //!WIDTH HOOKED.w 1.25 / //!HEIGHT HOOKED.h 1.25 / @@ -120,6 +129,21 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -251,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -508,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -546,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -601,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -656,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -860,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_lq.glsl b/portable_config/shaders/nlmeans_lq.glsl index 855bef1d..4c3b1246 100644 --- a/portable_config/shaders/nlmeans_lq.glsl +++ b/portable_config/shaders/nlmeans_lq.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,6 +100,50 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -125,6 +163,7 @@ vec4 hook() //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!DESC Non-local means (nlmeans_lq.glsl) @@ -251,7 +290,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -363,7 +402,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -508,35 +547,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -546,11 +585,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -601,47 +644,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -656,53 +692,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -860,7 +895,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_luma.glsl b/portable_config/shaders/nlmeans_luma.glsl index a94447dd..236290cb 100644 --- a/portable_config/shaders/nlmeans_luma.glsl +++ b/portable_config/shaders/nlmeans_luma.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -95,7 +89,7 @@ //!HOOK LUMA //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -104,6 +98,44 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!BIND HOOKED //!DESC Non-local means (downscale) @@ -119,6 +151,7 @@ vec4 hook() //!HOOK LUMA //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!DESC Non-local means (nlmeans_luma.glsl) @@ -245,7 +278,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -357,7 +390,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -502,35 +535,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -540,11 +573,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -595,47 +632,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -650,53 +680,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -854,7 +883,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_medium.glsl b/portable_config/shaders/nlmeans_medium.glsl index 45d3eb54..e0db5b8c 100644 --- a/portable_config/shaders/nlmeans_medium.glsl +++ b/portable_config/shaders/nlmeans_medium.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,11 +100,56 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!DESC Non-local means (nlmeans_medium.glsl) /* User variables @@ -236,7 +275,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -348,7 +387,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -493,35 +532,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -531,11 +570,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -586,47 +629,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -641,53 +677,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -845,7 +880,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_sharpen_denoise.glsl b/portable_config/shaders/nlmeans_sharpen_denoise.glsl index 6de6b43b..8caf3115 100644 --- a/portable_config/shaders/nlmeans_sharpen_denoise.glsl +++ b/portable_config/shaders/nlmeans_sharpen_denoise.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,6 +100,50 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -125,6 +163,7 @@ vec4 hook() //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!DESC Non-local means (nlmeans_sharpen_denoise.glsl) @@ -152,7 +191,7 @@ vec4 hook() * patch/research sizes. */ #ifdef LUMA_raw -#define S 9 +#define S 10 #define P 3 #define R 5 #else @@ -175,7 +214,7 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 1 -#define ASF 1.0 +#define ASF 1.5 #define ASP 2.0 #else #define AS 0 @@ -251,7 +290,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -363,7 +402,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -508,35 +547,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -546,11 +585,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -601,47 +644,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -656,53 +692,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -860,7 +895,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_sharpen_only.glsl b/portable_config/shaders/nlmeans_sharpen_only.glsl index 0b84104c..714eac94 100644 --- a/portable_config/shaders/nlmeans_sharpen_only.glsl +++ b/portable_config/shaders/nlmeans_sharpen_only.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,6 +100,50 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -125,6 +163,7 @@ vec4 hook() //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!DESC Non-local means (nlmeans_sharpen_only.glsl) @@ -152,11 +191,11 @@ vec4 hook() * patch/research sizes. */ #ifdef LUMA_raw -#define S 9 +#define S 2.25 #define P 3 #define R 5 #else -#define S 9 +#define S 1.50 #define P 3 #define R 5 #endif @@ -175,12 +214,12 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 2 -#define ASF 1.0 -#define ASP 2.0 +#define ASF 4 +#define ASP 1 #else #define AS 2 -#define ASF 1.0 -#define ASP 2.0 +#define ASF 4 +#define ASP 1 #endif /* Starting weight @@ -251,7 +290,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -363,7 +402,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -508,35 +547,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -546,11 +585,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -601,47 +644,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -656,53 +692,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -860,7 +895,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_temporal.glsl b/portable_config/shaders/nlmeans_temporal.glsl index a2ff9f2b..693d1c2d 100644 --- a/portable_config/shaders/nlmeans_temporal.glsl +++ b/portable_config/shaders/nlmeans_temporal.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,6 +100,50 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -125,6 +163,7 @@ vec4 hook() //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!BIND PREV1 //!BIND PREV2 @@ -216,7 +255,7 @@ vec4 hook() * WDP (WD=1): Higher numbers reduce the threshold more for small sample sizes */ #ifdef LUMA_raw -#define WD 2 +#define WD 1 #define WDT 1.0 #define WDP 6.0 #else @@ -254,7 +293,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -366,7 +405,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -511,35 +550,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -549,11 +588,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -610,47 +653,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -665,53 +701,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -872,7 +907,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_temporal_sharpen_denoise.glsl b/portable_config/shaders/nlmeans_temporal_sharpen_denoise.glsl index 60966f8c..14631e10 100644 --- a/portable_config/shaders/nlmeans_temporal_sharpen_denoise.glsl +++ b/portable_config/shaders/nlmeans_temporal_sharpen_denoise.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,6 +100,50 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -125,6 +163,7 @@ vec4 hook() //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!BIND PREV1 //!BIND PREV2 @@ -155,7 +194,7 @@ vec4 hook() * patch/research sizes. */ #ifdef LUMA_raw -#define S 9 +#define S 10 #define P 3 #define R 5 #else @@ -178,7 +217,7 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 1 -#define ASF 1.0 +#define ASF 1.5 #define ASP 2.0 #else #define AS 0 @@ -216,7 +255,7 @@ vec4 hook() * WDP (WD=1): Higher numbers reduce the threshold more for small sample sizes */ #ifdef LUMA_raw -#define WD 2 +#define WD 1 #define WDT 1.0 #define WDP 6.0 #else @@ -254,7 +293,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -366,7 +405,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -511,35 +550,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -549,11 +588,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -610,47 +653,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -665,53 +701,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -872,7 +907,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve diff --git a/portable_config/shaders/nlmeans_temporal_sharpen_only.glsl b/portable_config/shaders/nlmeans_temporal_sharpen_only.glsl index 7d9285b9..aa6f1e8d 100644 --- a/portable_config/shaders/nlmeans_temporal_sharpen_only.glsl +++ b/portable_config/shaders/nlmeans_temporal_sharpen_only.glsl @@ -75,12 +75,6 @@ * - If this is unusually slow then try changing gpu-api and vo * - If it's still slow, try setting RI/RFI to 0. * - * - PS=4:P=3:RI=0:RFI=0:PST=0:M!=1 - * - Performs about the same as the PS=3 version - * - Worse quality, since patch shape is smaller and asymmetric - * - Rotations/reflections not supported - * - Consider this deprecated - * * - PS=6:RI={0,1,3}:RFI={0,1,2} * - Currently the only scalable variant * - Patch shape is asymmetric on two axis @@ -97,7 +91,7 @@ //!HOOK RGB //!BIND HOOKED //!DESC Non-local means (downscale) -//!SAVE RF +//!SAVE PRERF //!WIDTH HOOKED.w 2 / //!HEIGHT HOOKED.h 2 / @@ -106,6 +100,50 @@ vec4 hook() return HOOKED_texOff(0); } +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF +//!SAVE RF +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (downscale) +//!SAVE PRERF_LUMA +//!WIDTH HOOKED.w 1.25 / +//!HEIGHT HOOKED.h 1.25 / + +vec4 hook() +{ + return HOOKED_texOff(0); +} + +//!HOOK LUMA +//!HOOK CHROMA +//!HOOK RGB +//!BIND HOOKED +//!DESC Non-local means (undownscale) +//!BIND PRERF_LUMA +//!SAVE RF_LUMA +//!WIDTH HOOKED.w +//!HEIGHT HOOKED.h + +vec4 hook() +{ + return PRERF_LUMA_texOff(0); +} + //!HOOK LUMA //!HOOK CHROMA //!HOOK RGB @@ -125,6 +163,7 @@ vec4 hook() //!HOOK RGB //!BIND HOOKED //!BIND RF +//!BIND RF_LUMA //!BIND EP_LUMA //!BIND PREV1 //!BIND PREV2 @@ -155,11 +194,11 @@ vec4 hook() * patch/research sizes. */ #ifdef LUMA_raw -#define S 9 +#define S 2.25 #define P 3 #define R 5 #else -#define S 9 +#define S 1.50 #define P 3 #define R 5 #endif @@ -178,12 +217,12 @@ vec4 hook() */ #ifdef LUMA_raw #define AS 2 -#define ASF 1.0 -#define ASP 2.0 +#define ASF 4 +#define ASP 1 #else #define AS 2 -#define ASF 1.0 -#define ASP 2.0 +#define ASF 4 +#define ASP 1 #endif /* Starting weight @@ -216,7 +255,7 @@ vec4 hook() * WDP (WD=1): Higher numbers reduce the threshold more for small sample sizes */ #ifdef LUMA_raw -#define WD 2 +#define WD 1 #define WDT 1.0 #define WDP 6.0 #else @@ -254,7 +293,7 @@ vec4 hook() * * Number of rotations/reflections to try for each patch comparison. Slow, but * improves feature preservation, although adding more rotations/reflections - * gives diminishing returns. + * gives diminishing returns. The most similar rotation/reflection will be used. * * The angle in degrees of each rotation is 360/(RI+1), so RI=1 will do a * single 180 degree rotation, RI=3 will do three 90 degree rotations, etc. @@ -366,7 +405,7 @@ vec4 hook() * factor is set to 3. */ #ifdef LUMA_raw -#define RF 0 +#define RF 1 #else #define RF 1 #endif @@ -511,35 +550,35 @@ const int r_area = R_AREA(R*R); #define PINCR(z,c) (z.c++) #endif -#define P_AREA(a) ((a - PD) * RI1 * RFI1) +#define P_AREA(a) (a - PD) // patch shapes #if P == 0 || P == 1 -#define FOR_PATCH(p) S_1X1(p) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_1X1(p) const int p_area = P_AREA(1); #elif PS == 7 -#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_PLUS(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_PLUS_A(hp,P)); #elif PS == 6 -#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE_EVEN(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #elif PS == 5 -#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRUNC_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,hp)); #elif PS == 4 -#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_TRIANGLE(p,hp,PINCR(p,x)) const int p_area = P_AREA(S_TRIANGLE_A(hp,P)); #elif PS == 3 -#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_DIAMOND(p,hp,PINCR(p,y)) const int p_area = P_AREA(S_DIAMOND_A(hp,P)); #elif PS == 2 -#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_VERTICAL(p,hp,PINCR(p,y)) const int p_area = P_AREA(P); #elif PS == 1 -#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_HORIZONTAL(p,hp,PINCR(p,x)) const int p_area = P_AREA(P); #elif PS == 0 -#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) FOR_ROTATION FOR_REFLECTION +#define FOR_PATCH(p) S_SQUARE(p,hp,PINCR(p,y)) const int p_area = P_AREA(P*P); #endif @@ -549,11 +588,15 @@ const float p_scale = 1.0/p_area; #define load_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) #if RF && defined(LUMA_raw) -#define load2_(off) RF_LUMA_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_LUMA_tex(RF_LUMA_pos + RF_LUMA_pt * vec2(off)) +#define gather_offs(off) (RF_LUMA_mul * vec4(textureGatherOffsets(RF_LUMA_raw, RF_LUMA_pos + vec2(off) * RF_LUMA_pt, offsets))) +#define gather(off) RF_LUMA_gather(RF_LUMA_pos + (off)*RF_LUMA_pt, 0) #elif RF -#define load2_(off) RF_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define load2_(off) RF_tex(RF_pos + RF_pt * vec2(off)) #else #define load2_(off) HOOKED_tex(HOOKED_pos + HOOKED_pt * vec2(off)) +#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) +#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) #endif #if T @@ -610,47 +653,40 @@ vec2 ref(vec2 p, int d) vec4 patch_comparison(vec3 r, vec3 r2) { vec3 p; - vec4 pdiff_sq = vec4(0); + vec4 min_rot = vec4(p_area); - FOR_PATCH(p) { - vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); - vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); + FOR_ROTATION FOR_REFLECTION { + vec4 pdiff_sq = vec4(0); + FOR_PATCH(p) { + vec3 transformed_p = vec3(ref(rot(p.xy, ri), rfi), p.z); + vec4 diff_sq = pow(load(p + r2) - load2(transformed_p + r), vec4(2)); #if PST && P >= PST - float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); - diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); + float pdist = exp(-pow(length(p.xy*PSD)*PSS, 2)); + diff_sq = pow(max(diff_sq, EPSILON), vec4(pdist)); #endif - pdiff_sq += diff_sq; + pdiff_sq += diff_sq; + } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return min_rot * p_scale; } -#define NO_GATHER (PD == 0 && RF == 0) // never textureGather if any of these conditions are false +#define NO_GATHER (PD == 0) // never textureGather if any of these conditions are false #define REGULAR_ROTATIONS (RI == 0 || RI == 1 || RI == 3) -#define gather_offs(off) (HOOKED_mul * vec4(textureGatherOffsets(HOOKED_raw, HOOKED_pos + vec2(off) * HOOKED_pt, offsets))) -#define gather(off) HOOKED_gather(HOOKED_pos + (off)*HOOKED_pt, 0) - -#if defined(LUMA_gather) && (PS == 4 && P == 3) && (RI == 0 && RFI == 0) && PST == 0 && M != 1 && NO_GATHER -// (DEPRECATED) 3x3 triangle patch_comparison_gather -const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,0), ivec2(1,0) }; -vec4 poi_patch = gather_offs(0); -vec4 patch_comparison_gather(vec3 r, vec3 r2) -{ - return vec4(dot(pow(poi_patch - gather_offs(r), vec4(2)), vec4(1)), 0, 0 ,0) * p_scale; -} -#elif defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER +#if defined(LUMA_gather) && ((PS == 3 || PS == 7) && P == 3) && PST == 0 && M != 1 && REGULAR_ROTATIONS && NO_GATHER // 3x3 diamond/plus patch_comparison_gather const ivec2 offsets[4] = { ivec2(0,-1), ivec2(-1,0), ivec2(0,1), ivec2(1,0) }; vec4 poi_patch = gather_offs(0); vec4 patch_comparison_gather(vec3 r, vec3 r2) { - float pdiff_sq = 0; + float min_rot = p_area - 1; vec4 transformer = gather_offs(r); - FOR_ROTATION { FOR_REFLECTION { - pdiff_sq += dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + float diff_sq = dot(pow(poi_patch - transformer, vec4(2)), vec4(1)); + min_rot = min(diff_sq, min_rot); #if RFI switch(rfi) { case 0: transformer = transformer.zyxw; break; @@ -665,53 +701,52 @@ vec4 patch_comparison_gather(vec3 r, vec3 r2) transformer = transformer.zwxy; #endif } - pdiff_sq += pow(poi.x - load(r).x, 2) * RI1 * RFI1; - return vec4(pdiff_sq, 0, 0 ,0) * p_scale; + return vec4(min_rot + pow(poi.x - load2(r).x, 2), 0, 0, 0) * p_scale; } #elif defined(LUMA_gather) && PS == 6 && REGULAR_ROTATIONS && NO_GATHER // tiled even square patch_comparison_gather vec4 patch_comparison_gather(vec3 r, vec3 r2) { vec2 tile; - vec4 pdiff_sq = vec4(0); + float min_rot = p_area; /* gather order: * w z * x y */ - for (tile.x = -hp; tile.x < hp; tile.x+=2) { - for (tile.y = -hp; tile.y < hp; tile.y+=2) { + FOR_ROTATION FOR_REFLECTION { + float pdiff_sq = 0; + for (tile.x = -hp; tile.x < hp; tile.x+=2) for (tile.y = -hp; tile.y < hp; tile.y+=2) { vec4 poi_patch = gather(tile + r2.xy); + vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); - FOR_ROTATION FOR_REFLECTION { - vec4 transformer = gather(ref(rot(tile + 0.5, ri), rfi) - 0.5 + r.xy); #if RI - for (float i = 0; i < ri; i+=90) - transformer = transformer.wxyz; // rotate 90 degrees + for (float i = 0; i < ri; i+=90) + transformer = transformer.wxyz; // rotate 90 degrees #endif #if RFI // XXX output is a little off - switch(rfi) { - case 1: transformer = transformer.zyxw; break; - case 2: transformer = transformer.xwzy; break; - } + switch(rfi) { + case 1: transformer = transformer.zyxw; break; + case 2: transformer = transformer.xwzy; break; + } #endif - vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); + vec4 diff_sq = pow(poi_patch - transformer, vec4(2)); #if PST && P >= PST - vec4 pdist = vec4( - exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), - exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) - ); - diff_sq = pow(max(diff_sq, EPSILON), pdist); -#endif - pdiff_sq.x += dot(diff_sq, vec4(1)); - } + vec4 pdist = vec4( + exp(-pow(length((tile+vec2(0,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,1))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(1,0))*PSD)*PSS, 2)), + exp(-pow(length((tile+vec2(0,0))*PSD)*PSS, 2)) + ); + diff_sq = pow(max(diff_sq, EPSILON), pdist); +#endif + pdiff_sq += dot(diff_sq, vec4(1)); } + min_rot = min(min_rot, pdiff_sq); } - return pdiff_sq * p_scale; + return vec4(min_rot, 0, 0, 0) * p_scale; } #else #define patch_comparison_gather patch_comparison @@ -872,7 +907,7 @@ vec4 hook() #if AS // adaptive sharpening vec4 sharpened = poi + (poi - result) * ASF; - vec4 sharpening_power = pow(old_avg_weight, vec4(ASP)); + vec4 sharpening_power = pow(avg_weight, vec4(ASP)); #endif #if EP // extremes preserve