|
28 | 28 | #include "beam_jit_common.hpp" |
29 | 29 | #include "beam_jit_bs.hpp" |
30 | 30 |
|
| 31 | +#include <iterator> |
| 32 | +#include <numeric> |
| 33 | + |
31 | 34 | extern "C" |
32 | 35 | { |
33 | 36 | #include "beam_file.h" |
@@ -86,70 +89,153 @@ std::vector<BscSegment> beam_jit_bsc_init(const Span<ArgVal> &args) { |
86 | 89 | return segments; |
87 | 90 | } |
88 | 91 |
|
| 92 | +template<typename It> |
| 93 | +static auto fold_group(std::vector<BscSegment> &segs, It first, It last) { |
| 94 | + auto &back = segs.emplace_back(*first); |
| 95 | + |
| 96 | + back.action = BscSegment::action::ACCUMULATE_FIRST; |
| 97 | + |
| 98 | + return std::accumulate(std::next(first), |
| 99 | + last, |
| 100 | + back.effectiveSize, |
| 101 | + [&segs](Sint acc, const BscSegment &seg) { |
| 102 | + auto &back = segs.emplace_back(seg); |
| 103 | + |
| 104 | + back.action = BscSegment::action::ACCUMULATE; |
| 105 | + |
| 106 | + return acc + back.effectiveSize; |
| 107 | + }); |
| 108 | +} |
| 109 | + |
| 110 | +static void push_group(std::vector<BscSegment> &segs, |
| 111 | + std::vector<BscSegment>::const_iterator start, |
| 112 | + std::vector<BscSegment>::const_iterator end) { |
| 113 | + if (start < end) { |
| 114 | + auto groupSize = ((start->flags & BSF_LITTLE) != 0) |
| 115 | + ? fold_group(segs, |
| 116 | + std::make_reverse_iterator(end), |
| 117 | + std::make_reverse_iterator(start)) |
| 118 | + : fold_group(segs, start, end); |
| 119 | + |
| 120 | + auto &seg = segs.emplace_back(); |
| 121 | + |
| 122 | + seg.type = am_integer; |
| 123 | + seg.action = BscSegment::action::STORE; |
| 124 | + seg.effectiveSize = groupSize; |
| 125 | + seg.flags = start->flags; |
| 126 | + } |
| 127 | +} |
| 128 | + |
| 129 | +/* |
| 130 | + * Combine small segments into a group so that the values for the |
| 131 | + * segments can be combined into an accumulator register and then |
| 132 | + * written to memory. Here is an example in Erlang illustrating the |
| 133 | + * idea. Consider this binary construction example: |
| 134 | + * |
| 135 | + * <<A:16/big, B:32/big, C:16/big>> |
| 136 | + * |
| 137 | + * This can be rewritten as follows: |
| 138 | + * |
| 139 | + * Acc0 = A, |
| 140 | + * Acc1 = (Acc0 bsl 32) bor B, |
| 141 | + * Acc = (Acc1 bsl 16) bor C, |
| 142 | + * <<Acc:64/big>> |
| 143 | + * |
| 144 | + * Translated to native code, this is faster because the accumulating |
| 145 | + * is done in a CPU register, and then the result is written to memory. |
| 146 | + * For big-endian segments, this rewrite works even if sizes are not |
| 147 | + * byte-sized. For example: |
| 148 | + * |
| 149 | + * <<A:6, B:6, C:6, D:6>> |
| 150 | + * |
| 151 | + * Little-endian segments can be optimized in a similar way. Consider: |
| 152 | + * |
| 153 | + * <<A:16/little, B:32/little, C:16/little>> |
| 154 | + * |
| 155 | + * This can be rewritten like so: |
| 156 | + * |
| 157 | + * Acc0 = C, |
| 158 | + * Acc1 = (Acc0 bsl 32) bor B, |
| 159 | + * Acc = (Acc1 bsl 16) bor A, |
| 160 | + * <<Acc:64/little>> |
| 161 | + * |
| 162 | + * However, for little-endian segments, this rewriting will only work |
| 163 | + * if all segment sizes but the last one are byte-sized. |
| 164 | + */ |
| 165 | + |
89 | 166 | std::vector<BscSegment> beam_jit_bsc_combine_segments( |
90 | 167 | const std::vector<BscSegment> segments) { |
91 | 168 | std::vector<BscSegment> segs; |
92 | 169 |
|
93 | | - for (auto seg : segments) { |
| 170 | + auto group = segments.cend(); |
| 171 | + Sint combinedSize = 0; |
| 172 | + |
| 173 | + for (auto it = segments.cbegin(); it != segments.cend(); it++) { |
| 174 | + auto &seg = *it; |
| 175 | + |
94 | 176 | switch (seg.type) { |
95 | 177 | case am_integer: { |
96 | 178 | if (!(0 < seg.effectiveSize && seg.effectiveSize <= 64)) { |
97 | 179 | /* Unknown or too large size. Handle using the default |
98 | 180 | * DIRECT action. */ |
| 181 | + push_group(segs, group, it); |
| 182 | + group = segments.cend(); |
| 183 | + |
99 | 184 | segs.push_back(seg); |
100 | 185 | continue; |
101 | 186 | } |
102 | 187 |
|
103 | | - if (seg.flags & BSF_LITTLE || segs.size() == 0 || |
104 | | - segs.back().action == BscSegment::action::DIRECT) { |
105 | | - /* There are no previous compatible ACCUMULATE / STORE |
106 | | - * actions. Create the first ones. */ |
107 | | - seg.action = BscSegment::action::ACCUMULATE_FIRST; |
108 | | - segs.push_back(seg); |
109 | | - seg.action = BscSegment::action::STORE; |
110 | | - segs.push_back(seg); |
| 188 | + /* The current segment has a known size not exceeding 64 |
| 189 | + * bits. Try to add it to the current group. */ |
| 190 | + if (group == segments.cend()) { |
| 191 | + group = it; |
| 192 | + |
| 193 | + combinedSize = seg.effectiveSize; |
111 | 194 | continue; |
112 | 195 | } |
113 | 196 |
|
114 | | - auto prev = segs.back(); |
115 | | - if (prev.flags & BSF_LITTLE) { |
116 | | - /* Little-endian segments cannot be combined with other |
117 | | - * segments. Create new ACCUMULATE_FIRST / STORE actions. */ |
118 | | - seg.action = BscSegment::action::ACCUMULATE_FIRST; |
119 | | - segs.push_back(seg); |
120 | | - seg.action = BscSegment::action::STORE; |
121 | | - segs.push_back(seg); |
| 197 | + /* There is already at least one segment in the group. |
| 198 | + * Append the current segment to the group only if it is |
| 199 | + * compatible and will fit. */ |
| 200 | + |
| 201 | + bool sameEndian = |
| 202 | + (seg.flags & BSF_LITTLE) == (group->flags & BSF_LITTLE); |
| 203 | + |
| 204 | + /* Big-endian segments can always be grouped (if the size |
| 205 | + * does not exceed 64 bits). Little-endian segments can |
| 206 | + * only be grouped if all but the last segment are |
| 207 | + * byte-sized. */ |
| 208 | + bool suitableSizes = |
| 209 | + ((seg.flags & BSF_LITTLE) == 0 || combinedSize % 8 == 0); |
| 210 | + |
| 211 | + if (sameEndian && combinedSize + seg.effectiveSize <= 64 && |
| 212 | + suitableSizes) { |
| 213 | + combinedSize += seg.effectiveSize; |
122 | 214 | continue; |
123 | 215 | } |
124 | 216 |
|
125 | | - /* The current segment is compatible with the previous |
126 | | - * segment. Try combining them. */ |
127 | | - if (prev.effectiveSize + seg.effectiveSize <= 64) { |
128 | | - /* The combined values of the segments fit in the |
129 | | - * accumulator. Insert an ACCUMULATE action for the |
130 | | - * current segment before the pre-existing STORE |
131 | | - * action. */ |
132 | | - segs.pop_back(); |
133 | | - prev.effectiveSize += seg.effectiveSize; |
134 | | - seg.action = BscSegment::action::ACCUMULATE; |
135 | | - segs.push_back(seg); |
136 | | - segs.push_back(prev); |
137 | | - } else { |
138 | | - /* The size exceeds 64 bits. Can't combine. */ |
139 | | - seg.action = BscSegment::action::ACCUMULATE_FIRST; |
140 | | - segs.push_back(seg); |
141 | | - seg.action = BscSegment::action::STORE; |
142 | | - segs.push_back(seg); |
143 | | - } |
| 217 | + /* |
| 218 | + * Not possible to fit anything more into the group. |
| 219 | + * Flush the group and start a new group. |
| 220 | + */ |
| 221 | + push_group(segs, group, it); |
| 222 | + group = it; |
| 223 | + |
| 224 | + combinedSize = seg.effectiveSize; |
144 | 225 | break; |
145 | 226 | } |
146 | 227 | default: |
| 228 | + push_group(segs, group, it); |
| 229 | + group = segments.cend(); |
| 230 | + |
147 | 231 | segs.push_back(seg); |
148 | 232 | break; |
149 | 233 | } |
150 | 234 | } |
151 | 235 |
|
152 | | - /* Calculate bit offsets for each ACCUMULATE segment. */ |
| 236 | + push_group(segs, group, segments.cend()); |
| 237 | + |
| 238 | + /* Calculate bit offsets for ACCUMULATE and STORE segments. */ |
153 | 239 |
|
154 | 240 | Uint offset = 0; |
155 | 241 | for (int i = segs.size() - 1; i >= 0; i--) { |
|
0 commit comments