Untitled
unknown
c_cpp
2 years ago
3.1 kB
9
Indexable
void dv::render::draw_list::add_convex_poly_filled(const __m128* points, uint32_t points_count, color col) noexcept { #if !defined(RENDER_NO_ALPHA_CHECK) if (col.a == 0) return; #endif if (points_count < 3) return; prim_reserve((points_count - 2) * 3 + points_count * 6, points_count * 2); uint32_t vtx_inner_idx = vertex_current_index, vtx_outer_idx = vertex_current_index + 1; for (uint32_t i = 2; i != points_count; i++) { index_write[0] = static_cast<draw_index>(vtx_inner_idx); index_write[1] = static_cast<draw_index>(vtx_inner_idx + ((i - 1) << 1)); index_write[2] = static_cast<draw_index>(vtx_inner_idx + (i << 1)); index_write += 3; } for (uint32_t i0 = points_count - 1, i1 = 0; i1 != points_count; i0 = i1++) { __m128 delta = _mm_sub_ps(_mm_shuffle_ps(points[i1], points[i1 + 1], 0x44), _mm_shuffle_ps(points[i0], points[i1], 0x44)); __m128 len_sq = _mm_mul_ps(delta, delta); __m128 len_sq_lo = _mm_add_ss(len_sq, _mm_shuffle_ps(len_sq, len_sq, 0x55)); __m128 len_sq_hi = _mm_add_ss(_mm_shuffle_ps(len_sq, len_sq, 0xAA), _mm_shuffle_ps(len_sq, len_sq, 0xFF)); len_sq = _mm_rsqrt_ps(_mm_shuffle_ps(_mm_shuffle_ps(len_sq_lo, len_sq_lo, 0x00), _mm_shuffle_ps(len_sq_hi, len_sq_hi, 0x00), 0x44)); delta = _MM_BLENDV_PS(delta, _mm_mul_ps(delta, len_sq), _mm_cmpgt_ps(len_sq, _mm_setzero_ps())); delta = _mm_shuffle_ps(delta, _mm_xor_ps(delta, _mm_set1_ps(-0.f)), 0x8D); delta = _mm_shuffle_ps(delta, delta, 0xD8); len_sq = _mm_mul_ps(delta, delta); len_sq = _mm_add_ss(len_sq, _mm_shuffle_ps(len_sq, len_sq, 0x55)); len_sq = _mm_shuffle_ps(len_sq, len_sq, 0x00); if (_mm_comigt_ss(len_sq, _mm_set1_ps(0.000001f))) { //len_sq = _mm_min_ps(_mm_div_ps(_mm_set1_ps(1.f), len_sq), _mm_set1_ps(100.f)); len_sq = _mm_min_ps(_mm_rcp_ps(len_sq), _mm_set1_ps(100.f)); delta = _mm_mul_ps(delta, len_sq); } delta = _mm_mul_ps(delta, _mm_mul_ps(_mm_set1_ps(fringe_scale), _mm_set1_ps(0.5f))); __m128 x_col = col.get_epi32(), z = _mm_set1_ps(1.f); __m128 v0 = _mm_shuffle_ps(_mm_sub_ps(points[i1], delta), _mm_shuffle_ps(x_col, z, 0x00), 0x24); __m128 v1 = _mm_shuffle_ps(_mm_add_ps(points[i1], delta), _mm_shuffle_ps(_mm_and_si128(x_col, _mm_set1_epi32(0x00FFFFFF)), z, 0x00), 0x24); _mm_storeu_ps(vertex_write[0].pos, v0); _mm_storel_pi(reinterpret_cast<__m64*>(vertex_write[0].uv), shared_data->tex_uv_white_pixel); _mm_storeu_ps(vertex_write[1].pos, v1); _mm_storel_pi(reinterpret_cast<__m64*>(vertex_write[0].uv), shared_data->tex_uv_white_pixel); __m128i idx0 = _mm_set1_epi16(i0), idx1 = _mm_set1_epi16(i1), vtx0 = _mm_set1_epi16(vtx_inner_idx), vtx1 = _mm_set1_epi16(vtx_outer_idx); __m128i idx_map = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_slli_epi16(_mm_unpacklo_epi32(idx0, idx1), 1), 0x02), 0xAA); idx_map = _mm_add_epi16(_mm_shufflehi_epi16(_mm_unpacklo_epi32(vtx0, vtx1), 0x02), idx_map); _mm_storeu_si128(reinterpret_cast<__m128i_u*>(index_write), idx_map); index_write += 6; vertex_write += 2; } vertex_current_index += points_count * 2; }
Editor is loading...
Leave a Comment