From 4683fcfbbbe16828b1fd067951cbee74d3c4bac5 Mon Sep 17 00:00:00 2001 From: EdoAlvarezR Date: Tue, 2 Apr 2024 17:17:52 -0500 Subject: [PATCH] Thread _G_Uvortexring instead of the kernel --- src/FLOWPanel_elements.jl | 8 +- src/FLOWPanel_liftingbody.jl | 186 +++++++++++++++++------------------ test/runtests_solvers.jl | 4 + 3 files changed, 98 insertions(+), 100 deletions(-) diff --git a/src/FLOWPanel_elements.jl b/src/FLOWPanel_elements.jl index c08d566..1ddbfb0 100644 --- a/src/FLOWPanel_elements.jl +++ b/src/FLOWPanel_elements.jl @@ -364,8 +364,8 @@ function U_boundvortex( pa1::Number, pa2::Number, pa3::Number, rij3 = pb3 - pa3 # Iterate over targets - # @simd for ti in 1:nt - Threads.@threads for ti in 1:nt + @simd for ti in 1:nt + # Threads.@threads for ti in 1:nt @inbounds begin # ri = x - pi @@ -482,8 +482,8 @@ function U_semiinfinite_vortex( p1::Number, p2::Number, p3::Number, # Iterate over targets - # @simd for ti in 1:nt - Threads.@threads for ti in 1:nt + @simd for ti in 1:nt + # Threads.@threads for ti in 1:nt # Split vortex into bound and semi-infinite sections # p0 = p + [(x-p)⋅d]d diff --git a/src/FLOWPanel_liftingbody.jl b/src/FLOWPanel_liftingbody.jl index bab4638..1500841 100644 --- a/src/FLOWPanel_liftingbody.jl +++ b/src/FLOWPanel_liftingbody.jl @@ -475,108 +475,102 @@ function _G_Uvortexring!(self::RigidWakeBody, end - # Pre-allocate memory for panel calculation - tri_out, tricoor, quadcoor, quad_out, lin, ndivscells, cin = gt.generate_getcellt_args!(self.grid) - - # Build geometric matrix: Panels - for (pj, Gslice) in enumerate(eachcol(G)) - - panel = gt.get_cell_t!(tri_out, tricoor, quadcoor, quad_out, - self.grid, pj, lin, ndivscells, cin) + # Build geometric matrix from panel contributions + panels = 1:self.ncells + chunks = collect(Iterators.partition(panels, max(length(panels) ÷ Threads.nthreads(), 3*Threads.nthreads()))) + + Threads.@threads for chunk in chunks # Distribute panel iteration among all CPU threads + + # Pre-allocate memory for panel calculation + tri_out, tricoor, quadcoor, quad_out, lin, ndivscells, cin = gt.generate_getcellt_args!(self.grid) + + # for (pj, Gslice) in enumerate(eachcol(G)) + for pj in chunk # Iterate over panels + + panel = gt.get_cell_t!(tri_out, tricoor, quadcoor, quad_out, + self.grid, pj, lin, ndivscells, cin) + + U_vortexring( + self.grid._nodes, # All nodes + panel, # Indices of nodes that make this panel + 1.0, # Unitary strength + CPs, # Targets + view(G, :, pj); # Velocity of j-th panel on every CP + # Gslice; + dot_with=normals, # Normal of every CP + offset=self.kerneloffset, # Offset of kernel to avoid singularities + cutoff=self.kernelcutoff, # Kernel cutoff to avoid singularities + optargs... + ) - U_vortexring( - self.grid._nodes, # All nodes - panel, # Indices of nodes that make this panel - 1.0, # Unitary strength - CPs, # Targets - # view(G, :, pj); # Velocity of j-th panel on every CP - Gslice; - dot_with=normals, # Normal of every CP - offset=self.kerneloffset, # Offset of kernel to avoid singularities - cutoff=self.kernelcutoff, # Kernel cutoff to avoid singularities - optargs... - ) + end end - #= - # Back-diagonal correction to avoid matrix singularity in closed geometries. - # Seee Eq. 2.19 in Lewis, R. (1991), "Vortex Element Methods for Fluid - # Dynamic Analysis of Engineering Systems" - - println("PROTOTYPE BACK DIAGONAL CORRECTION") - # TODO: Remove memory allocation associated with areas - areas = calc_areas(self) - - for m in 1:size(G, 2) - - rowi = size(G, 1) + 1 - m - - val = 0 - for n in 1:size(G, 1) - if n != rowi - - val += G[n, m] * areas[n] - - end - end - - # G[rowi, m] -= val/areas[rowi] - G[rowi, m] = -val/areas[rowi] - - end - =# # Add wake contributions - TE = zeros(Int, 2) - for (ei, (pi, nia, nib, pj, nja, njb)) in enumerate(eachcol(self.shedding)) # Iterate over wake-shedding panels - - # Fetch nodes of upper wake panel - panel = gt.get_cell_t!(tri_out, tricoor, quadcoor, quad_out, - self.grid, pi, lin, ndivscells, cin) - - # Indicate nodes in the upper shedding edge - TE[1] = panel[nia] - TE[2] = panel[nib] - da1, da2, da3 = Das[1, ei], Das[2, ei], Das[3, ei] - db1, db2, db3 = Dbs[1, ei], Dbs[2, ei], Dbs[3, ei] - - U_semiinfinite_horseshoe( - self.grid._nodes, # All nodes - TE, # Indices of nodes that make the shedding edge - da1, da2, da3, # Semi-infinite direction da - db1, db2, db3, # Semi-infinite direction db - 1.0, # Unitary strength - CPs, # Targets - view(G, :, pi); # Velocity of upper wake panel on every CP - dot_with=normals, # Normal of every CP - offset=self.kerneloffset, # Offset of kernel to avoid singularities - cutoff=self.kernelcutoff, # Kernel cutoff to avoid singularities - optargs... - ) - - if pj != -1 - # Fetch nodes of lower wake panel - panel = gt.get_cell_t!(tri_out, tricoor, quadcoor, quad_out, - self.grid, pj, lin, ndivscells, cin) - - # Indicate nodes in the lower shedding edge - TE[1] = panel[nja] - TE[2] = panel[njb] + sheddings = 1:self.nsheddings + chunks = collect(Iterators.partition(sheddings, max(length(sheddings) ÷ Threads.nthreads(), 3*Threads.nthreads()))) + + Threads.@threads for chunk in chunks # Distribute wake panel iteration among all CPU threads + + # Pre-allocate memory for panel calculation + TE = zeros(Int, 2) + _tri_out, _tricoor, _quadcoor, _quad_out, _lin, _ndivscells, _cin = gt.generate_getcellt_args!(self.grid) + + # for (ei, (pi, nia, nib, pj, nja, njb)) in enumerate(eachcol(self.shedding)) + for ei in chunk # Iterate over wake-shedding panels + + pi, nia, nib, pj, nja, njb = view(self.shedding, :, ei) + + # Fetch nodes of upper wake panel + panel = gt.get_cell_t!(_tri_out, _tricoor, _quadcoor, _quad_out, + self.grid, pi, _lin, _ndivscells, _cin) + + # Indicate nodes in the upper shedding edge + TE[1] = panel[nia] + TE[2] = panel[nib] + da1, da2, da3 = Das[1, ei], Das[2, ei], Das[3, ei] + db1, db2, db3 = Dbs[1, ei], Dbs[2, ei], Dbs[3, ei] + + U_semiinfinite_horseshoe( + self.grid._nodes, # All nodes + TE, # Indices of nodes that make the shedding edge + da1, da2, da3, # Semi-infinite direction da + db1, db2, db3, # Semi-infinite direction db + 1.0, # Unitary strength + CPs, # Targets + view(G, :, pi); # Velocity of upper wake panel on every CP + dot_with=normals, # Normal of every CP + offset=self.kerneloffset, # Offset of kernel to avoid singularities + cutoff=self.kernelcutoff, # Kernel cutoff to avoid singularities + optargs... + ) + + if pj != -1 + # Fetch nodes of lower wake panel + panel = gt.get_cell_t!(_tri_out, _tricoor, _quadcoor, _quad_out, + self.grid, pj, _lin, _ndivscells, _cin) + + # Indicate nodes in the lower shedding edge + TE[1] = panel[nja] + TE[2] = panel[njb] + + U_semiinfinite_horseshoe( + self.grid._nodes, # All nodes + TE, # Indices of nodes that make the shedding edge + db1, db2, db3, # Semi-infinite direction da (flipped in lower panel) + da1, da2, da3, # Semi-infinite direction db + 1.0, # Unitary strength + CPs, # Targets + view(G, :, pj); # Velocity of lower wake panel on every CP + dot_with=normals, # Normal of every CP + offset=self.kerneloffset, # Offset of kernel to avoid singularities + cutoff=self.kernelcutoff, # Kernel cutoff to avoid singularities + optargs... + ) + end + end - U_semiinfinite_horseshoe( - self.grid._nodes, # All nodes - TE, # Indices of nodes that make the shedding edge - db1, db2, db3, # Semi-infinite direction da (flipped in lower panel) - da1, da2, da3, # Semi-infinite direction db - 1.0, # Unitary strength - CPs, # Targets - view(G, :, pj); # Velocity of lower wake panel on every CP - dot_with=normals, # Normal of every CP - offset=self.kerneloffset, # Offset of kernel to avoid singularities - cutoff=self.kernelcutoff, # Kernel cutoff to avoid singularities - optargs... - ) - end end end diff --git a/test/runtests_solvers.jl b/test/runtests_solvers.jl index 0220673..70ea016 100644 --- a/test/runtests_solvers.jl +++ b/test/runtests_solvers.jl @@ -93,6 +93,8 @@ solvers_to_test = ( rfl_NDIVS=rfl_NDIVS, delim=",", span_NDIVS=span_NDIVS_l, + verify_spline=false, + verify_rflspline=false, b_low=-1.0, b_up=0.0 ) @@ -104,6 +106,8 @@ solvers_to_test = ( rfl_NDIVS=rfl_NDIVS, delim=",", span_NDIVS=span_NDIVS_r, + verify_spline=false, + verify_rflspline=false, b_low=1.0, b_up=0.0, )