|
1 | 1 | using LibCEED, Printf |
2 | 2 |
|
3 | | -include("common.jl") |
| 3 | +include("common.jl") # defines helper functions: create_tensor_h1_lagrange_basis, |
| 4 | + # build_cartesian_restriction, get_cartesian_mesh_size, etc. |
4 | 5 |
|
5 | 6 | function transform_mesh_coords!(dim, mesh_size, mesh_coords) |
6 | | - @witharray coords = mesh_coords begin |
| 7 | + @witharray coords = mesh_coords begin |
7 | 8 | if dim == 1 |
8 | 9 | for i = 1:mesh_size |
9 | | - # map [0,1] to [0,1] varying the mesh density |
10 | 10 | coords[i] = 0.5 + 1.0/sqrt(3.0)*sin((2.0/3.0)*pi*(coords[i] - 0.5)) |
11 | 11 | end |
12 | 12 | exact_volume = 1.0 |
13 | 13 | else |
14 | | - num_nodes = mesh_size÷dim |
| 14 | + num_nodes = mesh_size ÷ dim |
15 | 15 | @inbounds @simd for i = 1:num_nodes |
16 | | - # map (x,y) from [0,1]x[0,1] to the quarter annulus with polar |
17 | | - # coordinates, (r,phi) in [1,2]x[0,pi/2] with area = 3/4*pi |
18 | | - u = coords[i] |
19 | | - v = coords[i+num_nodes] |
20 | | - u = 1.0 + u |
21 | | - v = pi/2*v |
22 | | - coords[i] = u*cos(v) |
23 | | - coords[i+num_nodes] = u*sin(v) |
| 16 | + u = coords[i] # ξ ∈ [0,1] |
| 17 | + v = coords[i+num_nodes] # η ∈ [0,1] |
| 18 | + r = 1.0 + u |
| 19 | + θ = (pi/2) * v |
| 20 | + coords[i] = r * cos(θ) |
| 21 | + coords[i+num_nodes] = r * sin(θ) |
24 | 22 | end |
25 | | - exact_volume = 3.0/4.0*pi |
| 23 | + exact_volume = 3.0/4.0 * pi |
26 | 24 | end |
27 | 25 | return exact_volume |
28 | 26 | end |
29 | 27 | end |
30 | 28 |
|
31 | | -function run_ex3(; ceed_spec, dim, mesh_order, sol_order, num_qpts, prob_size, gallery) |
32 | | - # Main implementation goes here |
33 | | - ncompx = dim |
| 29 | +function run_ex3(; ceed_spec="/cpu/self", dim=2, mesh_order=3, sol_order=3, |
| 30 | + num_qpts=4, prob_size=-1, gallery=false) |
| 31 | + |
34 | 32 | prob_size < 0 && (prob_size = 256*1024) |
| 33 | + ncompx = dim |
35 | 34 |
|
36 | 35 | ceed = Ceed(ceed_spec) |
37 | | - mesh_basis = |
38 | | - create_tensor_h1_lagrange_basis(ceed, dim, ncompx, mesh_order + 1, num_qpts, GAUSS) |
39 | | - sol_basis = |
40 | | - create_tensor_h1_lagrange_basis(ceed, dim, 1, sol_order + 1, num_qpts, GAUSS) |
41 | 36 |
|
42 | | - nxyz = get_cartesian_mesh_size(dim, sol_order, prob_size) |
43 | | - println("Mesh size:",nxyz) |
| 37 | + |
| 38 | + # Bases |
| 39 | + |
| 40 | + mesh_basis = create_tensor_h1_lagrange_basis(ceed, dim, ncompx, mesh_order + 1, num_qpts, GAUSS) |
| 41 | + sol_basis = create_tensor_h1_lagrange_basis(ceed, dim, 1, sol_order + 1, num_qpts, GAUSS) |
| 42 | + |
| 43 | + |
| 44 | + # Mesh size & restrictions |
| 45 | + |
| 46 | + nxyz = get_cartesian_mesh_size(dim, sol_order, prob_size) |
| 47 | + println("Mesh size: ", nxyz) |
44 | 48 |
|
45 | | - #Build CeedElemRestriction objects describing the mesh and solution discrete |
46 | | - # mesh_rstr: for building (no qdata restriction needed) |
47 | 49 | mesh_size, mesh_rstr, _ = build_cartesian_restriction(ceed, dim, nxyz, mesh_order, ncompx, num_qpts) |
48 | 50 |
|
49 | | - # sol_rstr + sol_rstr_i: for solving |
50 | | - sol_size, sol_rstr, sol_rstr_i = build_cartesian_restriction( |
51 | | - ceed, dim, nxyz, sol_order, 1, num_qpts, mode=RestrictionAndStrided |
52 | | - ) |
| 51 | + sol_size, sol_rstr, _ = build_cartesian_restriction( |
| 52 | + ceed, dim, nxyz, sol_order, 1, num_qpts, mode=RestrictionAndStrided) |
| 53 | + |
| 54 | + # Physical mesh coordinates + exact volume |
53 | 55 |
|
54 | | - # mesh_coords |
55 | 56 | mesh_coords = CeedVector(ceed, mesh_size) |
56 | 57 | set_cartesian_mesh_coords!(dim, nxyz, mesh_order, mesh_coords) |
57 | 58 | exact_vol = transform_mesh_coords!(dim, mesh_size, mesh_coords) |
58 | 59 |
|
59 | | - #Create the Q-function that builds the mass operator ( i.e it computes the quadrature data) and set its context data. |
60 | | - num_q_comp = 1 + div(dim*(dim+1), 2) |
| 60 | + |
| 61 | + num_q_comp = 1 + div(dim*(dim + 1), 2) # 1 for detJ (mass), rest for symmetric JᵀJ |
| 62 | + num_elem = prod(nxyz) |
| 63 | + elem_qpts = num_qpts^dim |
| 64 | + |
| 65 | + qdata_size = num_elem * elem_qpts * num_q_comp |
| 66 | + qdata = CeedVector(ceed, qdata_size) |
| 67 | + qdata[] = 0.0 |
| 68 | + |
| 69 | + # Strided restriction: one "node" per quadrature point, num_q_comp components |
| 70 | + qdata_rstr = create_elem_restriction_strided( |
| 71 | + ceed, |
| 72 | + num_elem, |
| 73 | + elem_qpts, # nodes per element = quadrature points |
| 74 | + num_q_comp, # components per quadrature point |
| 75 | + qdata_size, |
| 76 | + CeedInt[1, num_q_comp, num_q_comp * elem_qpts] # strides: [comp, elem, total] |
| 77 | + ) |
| 78 | + |
61 | 79 |
|
62 | 80 | @interior_qf build_qfunc = ( |
63 | 81 | ceed, |
64 | 82 | dim=dim, |
65 | | - (dx, :in, EVAL_GRAD, dim, dim), # ← THIS LINE: dx input |
66 | | - (weights, :in, EVAL_WEIGHT), # ← weights input |
67 | | - (qdata, :out, EVAL_NONE, num_q_comp), # ← qdata output |
| 83 | + (dx, :in, EVAL_GRAD, dim, dim), |
| 84 | + (weights, :in, EVAL_WEIGHT), |
| 85 | + (qdata, :out, EVAL_NONE, num_q_comp), |
68 | 86 | begin |
69 | | - # Compute determinant |
70 | | - det_J = det(dx) |
71 | | - |
72 | | - # Store mass component |
73 | | - qdata[1] = weights * det_J |
74 | | - |
75 | | - # Store diffusion components (J^T * J) |
76 | | - idx = 2 |
| 87 | + # det(J) |
| 88 | + if dim == 1 |
| 89 | + detJ = dx[1,1] |
| 90 | + elseif dim == 2 |
| 91 | + detJ = dx[1,1]*dx[2,2] - dx[1,2]*dx[2,1] |
| 92 | + else |
| 93 | + detJ = (dx[1,1]*(dx[2,2]*dx[3,3] - dx[2,3]*dx[3,2]) - |
| 94 | + dx[1,2]*(dx[2,1]*dx[3,3] - dx[2,3]*dx[3,1]) + |
| 95 | + dx[1,3]*(dx[2,1]*dx[3,2] - dx[2,2]*dx[3,1])) |
| 96 | + end |
| 97 | + |
| 98 | + qdata[1] = weights * detJ # mass contribution |
| 99 | + |
| 100 | + # Upper triangle of JᵀJ (symmetric storage) |
| 101 | + k = 2 |
77 | 102 | for i = 1:dim |
78 | 103 | for j = i:dim |
79 | | - qdata[idx] = dx[:, i]' * dx[:, j] |
80 | | - idx += 1 |
| 104 | + s = zero(eltype(dx)) |
| 105 | + @inbounds @simd for m = 1:dim |
| 106 | + s += dx[m,i] * dx[m,j] |
| 107 | + end |
| 108 | + qdata[k] = s |
| 109 | + k += 1 |
81 | 110 | end |
82 | 111 | end |
83 | | - end, |
| 112 | + end |
84 | 113 | ) |
85 | 114 |
|
86 | | - #Create the operator that builds the quadrature data for the mass operator |
87 | 115 | build_oper = Operator( |
88 | 116 | ceed, |
89 | 117 | qf=build_qfunc, |
90 | 118 | fields=[ |
91 | | - (:dx, mesh_rstr, mesh_basis, CeedVectorActive()), |
| 119 | + (:dx, mesh_rstr, mesh_basis, mesh_coords), |
92 | 120 | (:weights, ElemRestrictionNone(), mesh_basis, CeedVectorNone()), |
93 | | - (:qdata, sol_rstr_i, BasisNone(), CeedVectorActive()), |
| 121 | + (:qdata, qdata_rstr, BasisNone(), qdata), |
94 | 122 | ], |
95 | 123 | ) |
96 | 124 |
|
97 | | - # Apply to get qdata |
98 | | - elem_qpts = num_qpts^dim |
99 | | - num_elem = prod(nxyz) |
100 | | - qdata = CeedVector(ceed, num_elem * elem_qpts * num_q_comp) |
101 | | - print("Computing the quadrature data for the mass operator ...") |
| 125 | + print("Computing quadrature data ... ") |
102 | 126 | flush(stdout) |
103 | 127 | apply!(build_oper, mesh_coords, qdata) |
104 | | - println(" done.") |
| 128 | + println("done.") |
105 | 129 |
|
106 | | - #Create QFunction for applying the mass+diffusion operator |
107 | 130 | @interior_qf apply_qfunc = ( |
108 | 131 | ceed, |
109 | 132 | dim=dim, |
110 | | - (u, :in, EVAL_INTERP), |
111 | | - (du, :in, EVAL_GRAD, dim), |
112 | | - (qdata, :in, EVAL_NONE, num_q_comp), |
113 | | - (v, :out, EVAL_INTERP), |
114 | | - (dv, :out, EVAL_GRAD, dim), |
| 133 | + (u, :in, EVAL_INTERP), |
| 134 | + (du, :in, EVAL_GRAD, dim), |
| 135 | + (qdata, :in, EVAL_NONE, num_q_comp), |
| 136 | + (v, :out, EVAL_INTERP), |
| 137 | + (dv, :out, EVAL_GRAD, dim), |
115 | 138 | begin |
116 | | - # Apply mass: v = qdata[1] * u |
117 | | - v .= qdata[1] .* u |
118 | | - |
119 | | - # Apply diffusion: dv = (qdata[2:end]) * du |
120 | | - # The qdata contains the symmetric diffusion tensor (J^T*J) |
121 | | - # dv_i = sum_j (J^T*J)_{i,j} * du_j |
122 | | - |
123 | | - # For efficiency, rebuild the matrix from stored components |
| 139 | + v .= qdata[1] .* u # mass part |
| 140 | + |
| 141 | + # diffusion part: reconstruct symmetric matrix from upper triangle |
124 | 142 | idx = 2 |
125 | 143 | for i = 1:dim |
126 | | - dv_i = 0.0 |
| 144 | + acc = zero(eltype(dv)) |
127 | 145 | for j = 1:dim |
128 | | - # Reconstruct symmetric matrix element |
129 | 146 | if j >= i |
130 | | - mat_idx = idx + div((j-1)*j, 2) + (i-1) |
| 147 | + mat_idx = idx + (j*(j-1))÷2 + (i-1) |
131 | 148 | else |
132 | | - mat_idx = idx + div((i-1)*i, 2) + (j-1) |
| 149 | + mat_idx = idx + (i*(i-1))÷2 + (j-1) |
133 | 150 | end |
134 | | - dv_i += qdata[mat_idx] * du[j] |
| 151 | + acc += qdata[mat_idx] * du[j] |
135 | 152 | end |
136 | | - dv[i] = dv_i |
| 153 | + dv[i] = acc |
137 | 154 | end |
138 | | - end, |
| 155 | + end |
139 | 156 | ) |
| 157 | + |
140 | 158 | apply_oper = Operator( |
141 | | - ceed, |
142 | | - qf=apply_qfunc, |
143 | | - fields=[ |
144 | | - (:u, sol_rstr, sol_basis, CeedVectorActive()), |
145 | | - (:du, sol_rstr, sol_basis, CeedVectorActive()), |
146 | | - (:qdata, sol_rstr_i, BasisNone(), qdata), |
147 | | - (:v, sol_rstr, sol_basis, CeedVectorActive()), |
148 | | - (:dv, sol_rstr, sol_basis, CeedVectorActive()), |
149 | | - ], |
150 | | -) |
| 159 | + ceed, |
| 160 | + qf=apply_qfunc, |
| 161 | + fields=[ |
| 162 | + (:u, sol_rstr, sol_basis, CeedVectorActive()), |
| 163 | + (:du, sol_rstr, sol_basis, CeedVectorActive()), |
| 164 | + (:qdata, qdata_rstr, BasisNone(), qdata), |
| 165 | + (:v, sol_rstr, sol_basis, CeedVectorActive()), |
| 166 | + (:dv, sol_rstr, sol_basis, CeedVectorActive()), |
| 167 | + ], |
| 168 | + ) |
151 | 169 |
|
152 | | -# # Compute the mesh volume using the massdiff operator |
153 | | - print("Computing the mesh volume using the formula: vol = 1^T * (M + K) * 1...") |
154 | | - flush(stdout) |
155 | | - |
156 | | - u = CeedVector(ceed, sol_size) |
157 | | - v = CeedVector(ceed, sol_size) |
158 | | - u[] = 1.0 |
159 | | - |
160 | | - # Apply operator |
161 | | - apply!(apply_oper, u, v) |
162 | 170 |
|
163 | | - # Compute volume |
164 | | - vol = witharray_read(sum, v, MEM_HOST) |
165 | | - |
166 | | - @printf("Exact mesh volume : % .14g\n", exact_vol) |
167 | | - @printf("Computed mesh volume : % .14g\n", vol) |
168 | | - @printf("Volume error : % .14g\n", vol - exact_vol) |
| 171 | + print("Computing volume via 1ᵀ (M + K) 1 ... ") |
| 172 | + flush(stdout) |
| 173 | + u_vec = CeedVector(ceed, sol_size) |
| 174 | + v_vec = CeedVector(ceed, sol_size) |
| 175 | + |
| 176 | + u_vec[] = 1.0 |
| 177 | + apply!(apply_oper, u_vec, v_vec) |
| 178 | + |
| 179 | + computed_vol = witharray_read(sum, v_vec, MEM_HOST) |
| 180 | + @printf("Exact mesh volume : %.14g\n", exact_vol) |
| 181 | + @printf("Computed mesh volume : %.14g\n", computed_vol) |
| 182 | + @printf("Volume error : %.14g\n", computed_vol - exact_vol) |
169 | 183 | end |
170 | 184 |
|
171 | | -# Entry point |
| 185 | + |
| 186 | +# Run |
| 187 | + |
172 | 188 | run_ex3( |
173 | | - ceed_spec="/cpu/self", |
174 | | - dim=2, |
175 | | - mesh_order=2, |
176 | | - sol_order=2, |
177 | | - num_qpts=3, |
178 | | - prob_size=-1, |
179 | | - gallery=false, |
| 189 | + ceed_spec = "/cpu/self", # use "/gpu/cuda" or "/gpu/hip" if available |
| 190 | + dim = 2, |
| 191 | + mesh_order = 3, |
| 192 | + sol_order = 3, |
| 193 | + num_qpts = 4, |
| 194 | + prob_size = -1, # -1 → auto-size (~256K dofs) |
180 | 195 | ) |
0 commit comments