// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "lib/jxl/splines.h" #include #include #include "lib/jxl/ans_params.h" #include "lib/jxl/base/printf_macros.h" #include "lib/jxl/base/status.h" #include "lib/jxl/chroma_from_luma.h" #include "lib/jxl/common.h" #include "lib/jxl/dct_scales.h" #include "lib/jxl/entropy_coder.h" #include "lib/jxl/opsin_params.h" #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "lib/jxl/splines.cc" #include #include #include "lib/jxl/fast_math-inl.h" HWY_BEFORE_NAMESPACE(); namespace jxl { namespace HWY_NAMESPACE { namespace { // These templates are not found via ADL. using hwy::HWY_NAMESPACE::Mul; using hwy::HWY_NAMESPACE::MulAdd; using hwy::HWY_NAMESPACE::MulSub; using hwy::HWY_NAMESPACE::Sqrt; using hwy::HWY_NAMESPACE::Sub; // Given a set of DCT coefficients, this returns the result of performing cosine // interpolation on the original samples. float ContinuousIDCT(const float dct[32], const float t) { // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform // treatment of all the coefficients. constexpr float kMultipliers[32] = { kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, kPi / 32 * 30, kPi / 32 * 31, }; HWY_CAPPED(float, 32) df; auto result = Zero(df); const auto tandhalf = Set(df, t + 0.5f); for (int i = 0; i < 32; i += Lanes(df)) { auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); auto cos = FastCosf(df, cos_arg); auto local_res = Mul(LoadU(df, dct + i), cos); result = MulAdd(Set(df, kSqrt2), local_res, result); } return GetLane(SumOfLanes(df, result)); } template void DrawSegment(DF df, const SplineSegment& segment, const bool add, const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { Rebind di; const auto inv_sigma = Set(df, segment.inv_sigma); const auto half = Set(df, 0.5f); const auto one_over_2s2 = Set(df, 0.353553391f); const auto sigma_over_4_times_intensity = Set(df, segment.sigma_over_4_times_intensity); const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); const auto dy = Set(df, y - segment.center_y); const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); const auto distance = Sqrt(sqd); const auto one_dimensional_factor = Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); auto local_intensity = Mul(sigma_over_4_times_intensity, Mul(one_dimensional_factor, one_dimensional_factor)); for (size_t c = 0; c < 3; ++c) { const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); const auto in = LoadU(df, rows[c] + x); StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); } } void DrawSegment(const SplineSegment& segment, const bool add, const size_t y, const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) { ssize_t x = std::max(x0, segment.center_x - segment.maximum_distance + 0.5f); // one-past-the-end x1 = std::min(x1, segment.center_x + segment.maximum_distance + 1.5f); HWY_FULL(float) df; for (; x + static_cast(Lanes(df)) <= x1; x += Lanes(df)) { DrawSegment(df, segment, add, y, x, rows); } for (; x < x1; ++x) { DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows); } } void ComputeSegments(const Spline::Point& center, const float intensity, const float color[3], const float sigma, std::vector& segments, std::vector>& segments_by_y) { // Sanity check sigma, inverse sigma and intensity if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && std::isfinite(intensity))) { return; } #if JXL_HIGH_PRECISION constexpr float kDistanceExp = 5; #else // About 30% faster. constexpr float kDistanceExp = 3; #endif // We cap from below colors to at least 0.01. float max_color = 0.01f; for (size_t c = 0; c < 3; c++) { max_color = std::max(max_color, std::abs(color[c] * intensity)); } // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops // below 10^-kDistanceExp. const float maximum_distance = std::sqrt(-2 * sigma * sigma * (std::log(0.1) * kDistanceExp - std::log(max_color))); SplineSegment segment; segment.center_y = center.y; segment.center_x = center.x; memcpy(segment.color, color, sizeof(segment.color)); segment.inv_sigma = 1.0f / sigma; segment.sigma_over_4_times_intensity = .25f * sigma * intensity; segment.maximum_distance = maximum_distance; ssize_t y0 = center.y - maximum_distance + .5f; ssize_t y1 = center.y + maximum_distance + 1.5f; // one-past-the-end for (ssize_t y = std::max(y0, 0); y < y1; y++) { segments_by_y.emplace_back(y, segments.size()); } segments.push_back(segment); } void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, float* JXL_RESTRICT row_b, const Rect& image_rect, const bool add, const SplineSegment* segments, const size_t* segment_indices, const size_t* segment_y_start) { JXL_ASSERT(image_rect.ysize() == 1); float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(), row_y - image_rect.x0(), row_b - image_rect.x0()}; size_t y = image_rect.y0(); for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(), image_rect.x0() + image_rect.xsize(), rows); } } void SegmentsFromPoints( const Spline& spline, const std::vector>& points_to_draw, const float arc_length, std::vector& segments, std::vector>& segments_by_y) { const float inv_arc_length = 1.0f / arc_length; int k = 0; for (const auto& point_to_draw : points_to_draw) { const Spline::Point& point = point_to_draw.first; const float multiplier = point_to_draw.second; const float progress_along_arc = std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); ++k; float color[3]; for (size_t c = 0; c < 3; ++c) { color[c] = ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); } const float sigma = ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); } } } // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace jxl HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace jxl { HWY_EXPORT(SegmentsFromPoints); HWY_EXPORT(DrawSegments); namespace { // It is not in spec, but reasonable limit to avoid overflows. template Status ValidateSplinePointPos(const T& x, const T& y) { constexpr T kSplinePosLimit = 1u << 23; if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { return JXL_FAILURE("Spline coordinates out of bounds"); } return true; } // Maximum number of spline control points per frame is // std::min(kMaxNumControlPoints, xsize * ysize / 2) constexpr size_t kMaxNumControlPoints = 1u << 20u; constexpr size_t kMaxNumControlPointsPerPixelRatio = 2; float AdjustedQuant(const int32_t adjustment) { return (adjustment >= 0) ? (1.f + .125f * adjustment) : 1.f / (1.f - .125f * adjustment); } float InvAdjustedQuant(const int32_t adjustment) { return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment) : (1.f - .125f * adjustment); } // X, Y, B, sigma. static constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f}; Status DecodeAllStartingPoints(std::vector* const points, BitReader* const br, ANSSymbolReader* reader, const std::vector& context_map, const size_t num_splines) { points->clear(); points->reserve(num_splines); int64_t last_x = 0; int64_t last_y = 0; for (size_t i = 0; i < num_splines; i++) { int64_t x = reader->ReadHybridUint(kStartingPositionContext, br, context_map); int64_t y = reader->ReadHybridUint(kStartingPositionContext, br, context_map); if (i != 0) { x = UnpackSigned(x) + last_x; y = UnpackSigned(y) + last_y; } JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y)); points->emplace_back(static_cast(x), static_cast(y)); last_x = x; last_y = y; } return true; } struct Vector { float x, y; Vector operator-() const { return {-x, -y}; } Vector operator+(const Vector& other) const { return {x + other.x, y + other.y}; } float SquaredNorm() const { return x * x + y * y; } }; Vector operator*(const float k, const Vector& vec) { return {k * vec.x, k * vec.y}; } Spline::Point operator+(const Spline::Point& p, const Vector& vec) { return {p.x + vec.x, p.y + vec.y}; } Vector operator-(const Spline::Point& a, const Spline::Point& b) { return {a.x - b.x, a.y - b.y}; } // TODO(eustas): avoid making a copy of "points". void DrawCentripetalCatmullRomSpline(std::vector points, std::vector& result) { if (points.empty()) return; if (points.size() == 1) { result.push_back(points[0]); return; } // Number of points to compute between each control point. static constexpr int kNumPoints = 16; result.reserve((points.size() - 1) * kNumPoints + 1); points.insert(points.begin(), points[0] + (points[0] - points[1])); points.push_back(points[points.size() - 1] + (points[points.size() - 1] - points[points.size() - 2])); // points has at least 4 elements at this point. for (size_t start = 0; start < points.size() - 3; ++start) { // 4 of them are used, and we draw from p[1] to p[2]. const Spline::Point* const p = &points[start]; result.push_back(p[1]); float d[3]; float t[4]; t[0] = 0; for (int k = 0; k < 3; ++k) { // TODO(eustas): for each segment delta is calculated 3 times... // TODO(eustas): restrict d[k] with reasonable limit and spec it. d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y)); t[k + 1] = t[k] + d[k]; } for (int i = 1; i < kNumPoints; ++i) { const float tt = d[0] + (static_cast(i) / kNumPoints) * d[1]; Spline::Point a[3]; for (int k = 0; k < 3; ++k) { // TODO(eustas): reciprocal multiplication would be faster. a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]); } Spline::Point b[2]; for (int k = 0; k < 2; ++k) { b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]); } result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0])); } } result.push_back(points[points.size() - 2]); } // Move along the line segments defined by `points`, `kDesiredRenderingDistance` // pixels at a time, and call `functor` with each point and the actual distance // to the previous point (which will always be kDesiredRenderingDistance except // possibly for the very last point). // TODO(eustas): this method always adds the last point, but never the first // (unless those are one); I believe both ends matter. template void ForEachEquallySpacedPoint(const Points& points, const Functor& functor) { JXL_ASSERT(!points.empty()); Spline::Point current = points.front(); functor(current, kDesiredRenderingDistance); auto next = points.begin(); while (next != points.end()) { const Spline::Point* previous = ¤t; float arclength_from_previous = 0.f; for (;;) { if (next == points.end()) { functor(*previous, arclength_from_previous); return; } const float arclength_to_next = std::sqrt((*next - *previous).SquaredNorm()); if (arclength_from_previous + arclength_to_next >= kDesiredRenderingDistance) { current = *previous + ((kDesiredRenderingDistance - arclength_from_previous) / arclength_to_next) * (*next - *previous); functor(current, kDesiredRenderingDistance); break; } arclength_from_previous += arclength_to_next; previous = &*next; ++next; } } } } // namespace QuantizedSpline::QuantizedSpline(const Spline& original, const int32_t quantization_adjustment, const float y_to_x, const float y_to_b) { JXL_ASSERT(!original.control_points.empty()); control_points_.reserve(original.control_points.size() - 1); const Spline::Point& starting_point = original.control_points.front(); int previous_x = static_cast(roundf(starting_point.x)), previous_y = static_cast(roundf(starting_point.y)); int previous_delta_x = 0, previous_delta_y = 0; for (auto it = original.control_points.begin() + 1; it != original.control_points.end(); ++it) { const int new_x = static_cast(roundf(it->x)); const int new_y = static_cast(roundf(it->y)); const int new_delta_x = new_x - previous_x; const int new_delta_y = new_y - previous_y; control_points_.emplace_back(new_delta_x - previous_delta_x, new_delta_y - previous_delta_y); previous_delta_x = new_delta_x; previous_delta_y = new_delta_y; previous_x = new_x; previous_y = new_y; } const auto to_int = [](float v) -> int { return static_cast(roundf(v)); }; const auto quant = AdjustedQuant(quantization_adjustment); const auto inv_quant = InvAdjustedQuant(quantization_adjustment); for (int c : {1, 0, 2}) { float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b; for (int i = 0; i < 32; ++i) { const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; auto restored_y = color_dct_[1][i] * inv_dct_factor * kChannelWeight[1] * inv_quant; auto decorellated = original.color_dct[c][i] - factor * restored_y; color_dct_[c][i] = to_int(decorellated * dct_factor * quant / kChannelWeight[c]); } } for (int i = 0; i < 32; ++i) { const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; sigma_dct_[i] = to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]); } } Status QuantizedSpline::Dequantize(const Spline::Point& starting_point, const int32_t quantization_adjustment, const float y_to_x, const float y_to_b, const uint64_t image_size, uint64_t* total_estimated_area_reached, Spline& result) const { result.control_points.clear(); result.control_points.reserve(control_points_.size() + 1); float px = roundf(starting_point.x); float py = roundf(starting_point.y); JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py)); int current_x = static_cast(px); int current_y = static_cast(py); result.control_points.push_back(Spline::Point{static_cast(current_x), static_cast(current_y)}); int current_delta_x = 0, current_delta_y = 0; size_t manhattan_distance = 0; for (const auto& point : control_points_) { current_delta_x += point.first; current_delta_y += point.second; manhattan_distance += abs(current_delta_x) + abs(current_delta_y); JXL_RETURN_IF_ERROR( ValidateSplinePointPos(current_delta_x, current_delta_y)); current_x += current_delta_x; current_y += current_delta_y; JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y)); result.control_points.push_back(Spline::Point{ static_cast(current_x), static_cast(current_y)}); } const auto inv_quant = InvAdjustedQuant(quantization_adjustment); for (int c = 0; c < 3; ++c) { for (int i = 0; i < 32; ++i) { const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; result.color_dct[c][i] = color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant; } } for (int i = 0; i < 32; ++i) { result.color_dct[0][i] += y_to_x * result.color_dct[1][i]; result.color_dct[2][i] += y_to_b * result.color_dct[1][i]; } uint64_t width_estimate = 0; uint64_t color[3] = {}; for (int c = 0; c < 3; ++c) { for (int i = 0; i < 32; ++i) { color[c] += static_cast(ceil(inv_quant * std::abs(color_dct_[c][i]))); } } color[0] += static_cast(ceil(abs(y_to_x))) * color[1]; color[2] += static_cast(ceil(abs(y_to_b))) * color[1]; // This is not taking kChannelWeight into account, but up to constant factors // it gives an indication of the influence of the color values on the area // that will need to be rendered. uint64_t logcolor = std::max( uint64_t(1), static_cast(CeilLog2Nonzero( uint64_t(1) + std::max(color[1], std::max(color[0], color[2]))))); for (int i = 0; i < 32; ++i) { const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; result.sigma_dct[i] = sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant; // If we include the factor kChannelWeight[3]=.3333f here, we get a // realistic area estimate. We leave it out to simplify the calculations, // and understand that this way we underestimate the area by a factor of // 1/(0.3333*0.3333). This is taken into account in the limits below. uint64_t weight = std::max( uint64_t(1), static_cast(ceil(inv_quant * std::abs(sigma_dct_[i])))); width_estimate += weight * weight * logcolor; } *total_estimated_area_reached += (width_estimate * manhattan_distance); if (*total_estimated_area_reached > std::min((1024 * image_size + (uint64_t(1) << 32)), (uint64_t(1) << 42))) { return JXL_FAILURE("Too large total_estimated_area_reached: %" PRIu64, *total_estimated_area_reached); } return true; } Status QuantizedSpline::Decode(const std::vector& context_map, ANSSymbolReader* const decoder, BitReader* const br, const size_t max_control_points, size_t* total_num_control_points) { const size_t num_control_points = decoder->ReadHybridUint(kNumControlPointsContext, br, context_map); *total_num_control_points += num_control_points; if (*total_num_control_points > max_control_points) { return JXL_FAILURE("Too many control points: %" PRIuS, *total_num_control_points); } control_points_.resize(num_control_points); // Maximal image dimension. constexpr int64_t kDeltaLimit = 1u << 30; for (std::pair& control_point : control_points_) { control_point.first = UnpackSigned( decoder->ReadHybridUint(kControlPointsContext, br, context_map)); control_point.second = UnpackSigned( decoder->ReadHybridUint(kControlPointsContext, br, context_map)); // Check delta-deltas are not outrageous; it is not in spec, but there is // no reason to allow larger values. if ((control_point.first >= kDeltaLimit) || (control_point.first <= -kDeltaLimit) || (control_point.second >= kDeltaLimit) || (control_point.second <= -kDeltaLimit)) { return JXL_FAILURE("Spline delta-delta is out of bounds"); } } const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status { for (int i = 0; i < 32; ++i) { dct[i] = UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map)); } return true; }; for (int c = 0; c < 3; ++c) { JXL_RETURN_IF_ERROR(decode_dct(color_dct_[c])); } JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_)); return true; } void Splines::Clear() { quantization_adjustment_ = 0; splines_.clear(); starting_points_.clear(); segments_.clear(); segment_indices_.clear(); segment_y_start_.clear(); } Status Splines::Decode(jxl::BitReader* br, const size_t num_pixels) { std::vector context_map; ANSCode code; JXL_RETURN_IF_ERROR( DecodeHistograms(br, kNumSplineContexts, &code, &context_map)); ANSSymbolReader decoder(&code, br); const size_t num_splines = 1 + decoder.ReadHybridUint(kNumSplinesContext, br, context_map); size_t max_control_points = std::min( kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio); if (num_splines > max_control_points) { return JXL_FAILURE("Too many splines: %" PRIuS, num_splines); } JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder, context_map, num_splines)); quantization_adjustment_ = UnpackSigned( decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map)); splines_.clear(); splines_.reserve(num_splines); size_t num_control_points = num_splines; for (size_t i = 0; i < num_splines; ++i) { QuantizedSpline spline; JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br, max_control_points, &num_control_points)); splines_.push_back(std::move(spline)); } JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState()); if (!HasAny()) { return JXL_FAILURE("Decoded splines but got none"); } return true; } void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect, const Rect& image_rect) const { return Apply(opsin, opsin_rect, image_rect); } void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, float* JXL_RESTRICT row_b, const Rect& image_row) const { return ApplyToRow(row_x, row_y, row_b, image_row); } void Splines::SubtractFrom(Image3F* const opsin) const { return Apply(opsin, Rect(*opsin), Rect(*opsin)); } Status Splines::InitializeDrawCache(const size_t image_xsize, const size_t image_ysize, const ColorCorrelationMap& cmap) { // TODO(veluca): avoid storing segments that are entirely outside image // boundaries. segments_.clear(); segment_indices_.clear(); segment_y_start_.clear(); std::vector> segments_by_y; std::vector intermediate_points; uint64_t total_estimated_area_reached = 0; std::vector splines; for (size_t i = 0; i < splines_.size(); ++i) { Spline spline; JXL_RETURN_IF_ERROR(splines_[i].Dequantize( starting_points_[i], quantization_adjustment_, cmap.YtoXRatio(0), cmap.YtoBRatio(0), image_xsize * image_ysize, &total_estimated_area_reached, spline)); if (std::adjacent_find(spline.control_points.begin(), spline.control_points.end()) != spline.control_points.end()) { // Otherwise division by zero might occur. Once control points coincide, // the direction of curve is undefined... return JXL_FAILURE( "identical successive control points in spline %" PRIuS, i); } splines.push_back(spline); } // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams. if (total_estimated_area_reached > std::min((8 * image_xsize * image_ysize + (uint64_t(1) << 25)), (uint64_t(1) << 30))) { JXL_WARNING( "Large total_estimated_area_reached, expect slower decoding: %" PRIu64, total_estimated_area_reached); } for (Spline& spline : splines) { std::vector> points_to_draw; auto add_point = [&](const Spline::Point& point, const float multiplier) { points_to_draw.emplace_back(point, multiplier); }; intermediate_points.clear(); DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points); ForEachEquallySpacedPoint(intermediate_points, add_point); const float arc_length = (points_to_draw.size() - 2) * kDesiredRenderingDistance + points_to_draw.back().second; if (arc_length <= 0.f) { // This spline wouldn't have any effect. continue; } HWY_DYNAMIC_DISPATCH(SegmentsFromPoints) (spline, points_to_draw, arc_length, segments_, segments_by_y); } // TODO(eustas): consider linear sorting here. std::sort(segments_by_y.begin(), segments_by_y.end()); segment_indices_.resize(segments_by_y.size()); segment_y_start_.resize(image_ysize + 1); for (size_t i = 0; i < segments_by_y.size(); i++) { segment_indices_[i] = segments_by_y[i].second; size_t y = segments_by_y[i].first; if (y < image_ysize) { segment_y_start_[y + 1]++; } } for (size_t y = 0; y < image_ysize; y++) { segment_y_start_[y + 1] += segment_y_start_[y]; } return true; } template void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, float* JXL_RESTRICT row_b, const Rect& image_row) const { if (segments_.empty()) return; JXL_ASSERT(image_row.ysize() == 1); for (size_t iy = 0; iy < image_row.ysize(); iy++) { HWY_DYNAMIC_DISPATCH(DrawSegments) (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(), segment_indices_.data(), segment_y_start_.data()); } } template void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect, const Rect& image_rect) const { if (segments_.empty()) return; for (size_t iy = 0; iy < image_rect.ysize(); iy++) { const size_t y0 = opsin_rect.Line(iy).y0(); const size_t x0 = opsin_rect.x0(); ApplyToRow(opsin->PlaneRow(0, y0) + x0, opsin->PlaneRow(1, y0) + x0, opsin->PlaneRow(2, y0) + x0, image_rect.Line(iy)); } } } // namespace jxl #endif // HWY_ONCE