Untitled

mail@pastecode.io avatar
unknown
c_cpp
7 months ago
3.1 kB
6
Indexable
Never
void dv::render::draw_list::add_convex_poly_filled(const __m128* points, uint32_t points_count, color col) noexcept {
#if !defined(RENDER_NO_ALPHA_CHECK)
	if (col.a == 0)
		return;
#endif

	if (points_count < 3)
		return;

	prim_reserve((points_count - 2) * 3 + points_count * 6, points_count * 2);

	uint32_t vtx_inner_idx = vertex_current_index, vtx_outer_idx = vertex_current_index + 1;

	for (uint32_t i = 2; i != points_count; i++) {
		index_write[0] = static_cast<draw_index>(vtx_inner_idx);
		index_write[1] = static_cast<draw_index>(vtx_inner_idx + ((i - 1) << 1));
		index_write[2] = static_cast<draw_index>(vtx_inner_idx + (i << 1));

		index_write += 3;
	}

	for (uint32_t i0 = points_count - 1, i1 = 0; i1 != points_count; i0 = i1++) {
		__m128 delta = _mm_sub_ps(_mm_shuffle_ps(points[i1], points[i1 + 1], 0x44), _mm_shuffle_ps(points[i0], points[i1], 0x44));

		__m128 len_sq = _mm_mul_ps(delta, delta);
		__m128 len_sq_lo = _mm_add_ss(len_sq, _mm_shuffle_ps(len_sq, len_sq, 0x55));
		__m128 len_sq_hi = _mm_add_ss(_mm_shuffle_ps(len_sq, len_sq, 0xAA), _mm_shuffle_ps(len_sq, len_sq, 0xFF));

		len_sq = _mm_rsqrt_ps(_mm_shuffle_ps(_mm_shuffle_ps(len_sq_lo, len_sq_lo, 0x00), _mm_shuffle_ps(len_sq_hi, len_sq_hi, 0x00), 0x44));

		delta = _MM_BLENDV_PS(delta, _mm_mul_ps(delta, len_sq), _mm_cmpgt_ps(len_sq, _mm_setzero_ps()));
		delta = _mm_shuffle_ps(delta, _mm_xor_ps(delta, _mm_set1_ps(-0.f)), 0x8D);
		delta = _mm_shuffle_ps(delta, delta, 0xD8);

		len_sq = _mm_mul_ps(delta, delta);
		len_sq = _mm_add_ss(len_sq, _mm_shuffle_ps(len_sq, len_sq, 0x55));
		len_sq = _mm_shuffle_ps(len_sq, len_sq, 0x00);

		if (_mm_comigt_ss(len_sq, _mm_set1_ps(0.000001f))) {
			//len_sq = _mm_min_ps(_mm_div_ps(_mm_set1_ps(1.f), len_sq), _mm_set1_ps(100.f));
			len_sq = _mm_min_ps(_mm_rcp_ps(len_sq), _mm_set1_ps(100.f));
			delta = _mm_mul_ps(delta, len_sq);
		}

		delta = _mm_mul_ps(delta, _mm_mul_ps(_mm_set1_ps(fringe_scale), _mm_set1_ps(0.5f)));

		__m128 x_col = col.get_epi32(), z = _mm_set1_ps(1.f);

		__m128 v0 = _mm_shuffle_ps(_mm_sub_ps(points[i1], delta), _mm_shuffle_ps(x_col, z, 0x00), 0x24);
		__m128 v1 = _mm_shuffle_ps(_mm_add_ps(points[i1], delta), _mm_shuffle_ps(_mm_and_si128(x_col, _mm_set1_epi32(0x00FFFFFF)), z, 0x00), 0x24);
	
		_mm_storeu_ps(vertex_write[0].pos, v0);
		_mm_storel_pi(reinterpret_cast<__m64*>(vertex_write[0].uv), shared_data->tex_uv_white_pixel);

		_mm_storeu_ps(vertex_write[1].pos, v1);
		_mm_storel_pi(reinterpret_cast<__m64*>(vertex_write[0].uv), shared_data->tex_uv_white_pixel);

		__m128i idx0 = _mm_set1_epi16(i0), idx1 = _mm_set1_epi16(i1), vtx0 = _mm_set1_epi16(vtx_inner_idx), vtx1 = _mm_set1_epi16(vtx_outer_idx);

		__m128i idx_map = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_slli_epi16(_mm_unpacklo_epi32(idx0, idx1), 1), 0x02), 0xAA);
		idx_map = _mm_add_epi16(_mm_shufflehi_epi16(_mm_unpacklo_epi32(vtx0, vtx1), 0x02), idx_map);

		_mm_storeu_si128(reinterpret_cast<__m128i_u*>(index_write), idx_map);

		index_write += 6;
		vertex_write += 2;
	}

	vertex_current_index += points_count * 2;
}
Leave a Comment