OpenShot Library | libopenshot  0.7.0
FrameScope.cpp
Go to the documentation of this file.
1 
9 // Copyright (c) 2008-2026 OpenShot Studios, LLC
10 //
11 // SPDX-License-Identifier: LGPL-3.0-or-later
12 
13 #include "FrameScope.h"
14 
15 #include <algorithm>
16 #include <array>
17 #include <cmath>
18 #include <limits>
19 
20 using namespace openshot;
21 
22 namespace {
23 constexpr float kInv255 = 1.0f / 255.0f;
24 constexpr float kVectorscopeUMax = 0.43600f;
25 constexpr float kVectorscopeVMax = 0.61500f;
26 
27 static int clamp_int(int value, int min_value, int max_value) {
28  return std::max(min_value, std::min(max_value, value));
29 }
30 
31 static const std::array<float, 256>& inv_alpha_lut() {
32  static const std::array<float, 256> lut = [] {
33  std::array<float, 256> values{};
34  values[0] = 0.0f;
35  for (int i = 1; i < 256; ++i)
36  values[i] = 255.0f / static_cast<float>(i);
37  return values;
38  }();
39  return lut;
40 }
41 
42 static Json::Value json_array_from_vector(const std::vector<int>& values) {
43  Json::Value array(Json::arrayValue);
44  for (size_t i = 0; i < values.size(); ++i)
45  array.append(values[i]);
46  return array;
47 }
48 
49 static Json::Value json_array_from_vector(const std::vector<uint32_t>& values) {
50  Json::Value array(Json::arrayValue);
51  for (size_t i = 0; i < values.size(); ++i)
52  array.append(Json::Value::UInt(values[i]));
53  return array;
54 }
55 
56 static Json::Value json_array_from_vector(const std::vector<float>& values) {
57  Json::Value array(Json::arrayValue);
58  for (size_t i = 0; i < values.size(); ++i)
59  array.append(values[i]);
60  return array;
61 }
62 }
63 
65  : frame(nullptr),
66  waveform_columns(256),
67  audio_buckets(256),
68  vectorscope_size(256),
69  roi_enabled(false),
70  roi_x(0.0f),
71  roi_y(0.0f),
72  roi_width(1.0f),
73  roi_height(1.0f),
74  waveform_bins(256),
75  waveform_column_map_width(0),
76  waveform_column_map_columns(0),
77  json_dirty(true) {
78  reset();
79 }
80 
81 FrameScope::FrameScope(std::shared_ptr<Frame> new_frame, int new_waveform_columns, int new_audio_buckets, int new_vectorscope_size)
82  : frame(new_frame),
83  waveform_columns(std::max(1, new_waveform_columns)),
84  audio_buckets(std::max(1, new_audio_buckets)),
85  vectorscope_size(std::max(1, new_vectorscope_size)),
86  roi_enabled(false),
87  roi_x(0.0f),
88  roi_y(0.0f),
89  roi_width(1.0f),
90  roi_height(1.0f),
91  waveform_bins(256),
92  waveform_column_map_width(0),
93  waveform_column_map_columns(0),
94  json_dirty(true) {
95  analyze();
96 }
97 
98 void FrameScope::reset() {
99  reset_video();
100  reset_audio();
101  json_dirty = true;
102 }
103 
104 void FrameScope::reset_video() {
105  video_present = false;
106  video_width = 0;
107  video_height = 0;
108  avg_luma = 0.0;
109  clipped_shadows = 0;
110  clipped_highlights = 0;
111  clipped_red = 0;
112  clipped_green = 0;
113  clipped_blue = 0;
114  ensure_video_buffers();
115  std::fill(histogram_luma.begin(), histogram_luma.end(), 0u);
116  std::fill(histogram_red.begin(), histogram_red.end(), 0u);
117  std::fill(histogram_green.begin(), histogram_green.end(), 0u);
118  std::fill(histogram_blue.begin(), histogram_blue.end(), 0u);
119  std::fill(waveform_luma.begin(), waveform_luma.end(), 0u);
120  std::fill(waveform_red.begin(), waveform_red.end(), 0u);
121  std::fill(waveform_green.begin(), waveform_green.end(), 0u);
122  std::fill(waveform_blue.begin(), waveform_blue.end(), 0u);
123  std::fill(vectorscope.begin(), vectorscope.end(), 0u);
124  json_dirty = true;
125 }
126 
127 void FrameScope::reset_audio() {
128  audio_present = false;
129  audio_channels = 0;
130  audio_samples = 0;
131  audio_sample_rate = 0;
132  audio_peak.clear();
133  audio_rms.clear();
134  audio_clipped_samples.clear();
135  audio_waveform_min.clear();
136  audio_waveform_max.clear();
137  json_dirty = true;
138 }
139 
140 void FrameScope::ensure_video_buffers() {
141  histogram_luma.resize(256);
142  histogram_red.resize(256);
143  histogram_green.resize(256);
144  histogram_blue.resize(256);
145  waveform_luma.resize(static_cast<size_t>(waveform_columns) * static_cast<size_t>(waveform_bins));
146  waveform_red.resize(static_cast<size_t>(waveform_columns) * static_cast<size_t>(waveform_bins));
147  waveform_green.resize(static_cast<size_t>(waveform_columns) * static_cast<size_t>(waveform_bins));
148  waveform_blue.resize(static_cast<size_t>(waveform_columns) * static_cast<size_t>(waveform_bins));
149  vectorscope.resize(static_cast<size_t>(vectorscope_size) * static_cast<size_t>(vectorscope_size));
150 }
151 
152 void FrameScope::ensure_audio_buffers() {
153  audio_peak.assign(static_cast<size_t>(audio_channels), 0.0f);
154  audio_rms.assign(static_cast<size_t>(audio_channels), 0.0f);
155  audio_clipped_samples.assign(static_cast<size_t>(audio_channels), 0u);
156  audio_waveform_min.assign(static_cast<size_t>(audio_channels), std::vector<float>(static_cast<size_t>(audio_buckets), 0.0f));
157  audio_waveform_max.assign(static_cast<size_t>(audio_channels), std::vector<float>(static_cast<size_t>(audio_buckets), 0.0f));
158 }
159 
160 void FrameScope::rebuild_waveform_column_map(int width) {
161  if (width == waveform_column_map_width && waveform_columns == waveform_column_map_columns &&
162  static_cast<int>(waveform_column_map.size()) == width)
163  return;
164 
165  waveform_column_map.resize(static_cast<size_t>(width));
166  waveform_offset_map.resize(static_cast<size_t>(width));
167  const int waveform_column_limit = waveform_columns - 1;
168  for (int x = 0; x < width; ++x) {
169  const int col = clamp_int((x * waveform_columns) / std::max(1, width), 0, waveform_column_limit);
170  waveform_column_map[static_cast<size_t>(x)] = col;
171  waveform_offset_map[static_cast<size_t>(x)] = static_cast<size_t>(col) * static_cast<size_t>(waveform_bins);
172  }
173 
174  waveform_column_map_width = width;
175  waveform_column_map_columns = waveform_columns;
176 }
177 
178 void FrameScope::SetFrame(std::shared_ptr<Frame> new_frame) {
179  frame = new_frame;
180  analyze();
181 }
182 
184  waveform_columns = std::max(1, columns);
185  reset_video();
186  if (frame)
187  analyze_video();
188 }
189 
190 void FrameScope::SetAudioBuckets(int buckets) {
191  audio_buckets = std::max(1, buckets);
192  reset_audio();
193  if (frame)
194  analyze_audio();
195 }
196 
198  vectorscope_size = std::max(1, size);
199  reset_video();
200  if (frame)
201  analyze_video();
202 }
203 
204 void FrameScope::SetVideoRegionNormalized(float x, float y, float width, float height) {
205  roi_x = std::max(0.0f, std::min(1.0f, x));
206  roi_y = std::max(0.0f, std::min(1.0f, y));
207  roi_width = std::max(0.0f, std::min(1.0f - roi_x, width));
208  roi_height = std::max(0.0f, std::min(1.0f - roi_y, height));
209  roi_enabled = roi_width > 0.0f && roi_height > 0.0f;
210  reset_video();
211  if (frame)
212  analyze_video();
213 }
214 
216  roi_enabled = false;
217  roi_x = 0.0f;
218  roi_y = 0.0f;
219  roi_width = 1.0f;
220  roi_height = 1.0f;
221  reset_video();
222  if (frame)
223  analyze_video();
224 }
225 
226 void FrameScope::analyze() {
227  reset();
228  if (!frame)
229  return;
230 
231  analyze_video();
232  analyze_audio();
233  json_dirty = true;
234 }
235 
236 void FrameScope::analyze_video() {
237  // Frame images are always QImage::Format_RGBA8888_Premultiplied (enforced
238  // by Frame::AddImage). Pixel byte order is [R=0, G=1, B=2, A=3].
239  std::shared_ptr<QImage> image = frame->GetImage();
240  if (!image || image->isNull())
241  return;
242 
243  video_present = true;
244  const int width = image->width();
245  const int height = image->height();
246  int start_x = 0;
247  int end_x = width;
248  int start_y = 0;
249  int end_y = height;
250  if (roi_enabled) {
251  start_x = clamp_int(static_cast<int>(std::floor(roi_x * width)), 0, width - 1);
252  start_y = clamp_int(static_cast<int>(std::floor(roi_y * height)), 0, height - 1);
253  end_x = clamp_int(static_cast<int>(std::ceil((roi_x + roi_width) * width)), start_x + 1, width);
254  end_y = clamp_int(static_cast<int>(std::ceil((roi_y + roi_height) * height)), start_y + 1, height);
255  }
256  video_width = std::max(1, end_x - start_x);
257  video_height = std::max(1, end_y - start_y);
258  ensure_video_buffers();
259 
260  double luma_sum = 0.0;
261  int64_t pixel_count = 0;
262  clipped_shadows = 0;
263  clipped_highlights = 0;
264  clipped_red = 0;
265  clipped_green = 0;
266  clipped_blue = 0;
267 
268  const int bytes_per_line = image->bytesPerLine();
269  const unsigned char* bits = image->constBits();
270  const auto& inv_alpha = inv_alpha_lut();
271  rebuild_waveform_column_map(video_width);
272  const float vectorscope_center = static_cast<float>(vectorscope_size - 1) * 0.5f;
273  const float vectorscope_scale = vectorscope_center;
274 
275  for (int y = start_y; y < end_y; ++y) {
276  // Use pointer increment instead of per-pixel index arithmetic (x * 4).
277  const unsigned char* pixel = bits + (static_cast<size_t>(y) * bytes_per_line)
278  + (static_cast<size_t>(start_x) * 4);
279  int roi_column = 0;
280  for (int x = start_x; x < end_x; ++x, ++roi_column, pixel += 4) {
281  const int red = pixel[0]; // RGBA8888: [R=0, G=1, B=2, A=3]
282  const int green = pixel[1];
283  const int blue = pixel[2];
284  const int alpha = pixel[3]; // premultiplied — divided out below
285  if (alpha <= 0)
286  continue;
287 
288  int red_idx, green_idx, blue_idx;
289  float redf, greenf, bluef;
290  if (alpha == 255) {
291  redf = red * kInv255;
292  greenf = green * kInv255;
293  bluef = blue * kInv255;
294  // For fully-opaque pixels the bin index is just the raw byte
295  // value — no float rounding needed (round(byte/255 * 255) == byte).
296  red_idx = red;
297  green_idx = green;
298  blue_idx = blue;
299  } else {
300  const float inv_a = inv_alpha[alpha];
301  redf = std::min(1.0f, (red * inv_a) * kInv255);
302  greenf = std::min(1.0f, (green * inv_a) * kInv255);
303  bluef = std::min(1.0f, (blue * inv_a) * kInv255);
304  // All values clamped to [0,1], so val*255+0.5 ∈ [0,255.5] — cast is safe.
305  red_idx = static_cast<int>(redf * 255.0f + 0.5f);
306  green_idx = static_cast<int>(greenf * 255.0f + 0.5f);
307  blue_idx = static_cast<int>(bluef * 255.0f + 0.5f);
308  }
309  const float luma = 0.299f * redf + 0.587f * greenf + 0.114f * bluef;
310  // luma ∈ [0,1] (weighted sum of [0,1] values), so luma*255+0.5 ∈ [0,255.5].
311  const int luma_idx = static_cast<int>(luma * 255.0f + 0.5f);
312 
313  // Pre-multiplied offset: eliminates a runtime multiply per pixel.
314  const size_t waveform_offset = waveform_offset_map[static_cast<size_t>(roi_column)];
315 
316  const float u = -0.14713f * redf - 0.28886f * greenf + 0.43600f * bluef;
317  const float v = 0.61500f * redf - 0.51499f * greenf - 0.10001f * bluef;
318  const float normalized_u = u / kVectorscopeUMax;
319  const float normalized_v = v / kVectorscopeVMax;
320  // vectorscope_center + normalized_{u,v} * vectorscope_scale is always in
321  // [0, vectorscope_size-1]; adding 0.5 before truncation equals std::round
322  // for all non-negative values.
323  const int vector_x = clamp_int(static_cast<int>(vectorscope_center + (normalized_u * vectorscope_scale) + 0.5f), 0, vectorscope_size - 1);
324  const int vector_y = clamp_int(static_cast<int>(vectorscope_center - (normalized_v * vectorscope_scale) + 0.5f), 0, vectorscope_size - 1);
325  const size_t vector_offset = (static_cast<size_t>(vector_y) * static_cast<size_t>(vectorscope_size)) + static_cast<size_t>(vector_x);
326 
327  histogram_luma[luma_idx]++;
328  histogram_red[red_idx]++;
329  histogram_green[green_idx]++;
330  histogram_blue[blue_idx]++;
331  waveform_luma[waveform_offset + luma_idx]++;
332  waveform_red[waveform_offset + red_idx]++;
333  waveform_green[waveform_offset + green_idx]++;
334  waveform_blue[waveform_offset + blue_idx]++;
335  vectorscope[vector_offset]++;
336 
337  luma_sum += luma;
338  ++pixel_count;
339  if (luma_idx <= 2) ++clipped_shadows;
340  if (luma_idx >= 253) ++clipped_highlights;
341  if (red_idx >= 253) ++clipped_red;
342  if (green_idx >= 253) ++clipped_green;
343  if (blue_idx >= 253) ++clipped_blue;
344  }
345  }
346 
347  avg_luma = pixel_count > 0 ? (luma_sum / static_cast<double>(pixel_count)) : 0.0;
348 }
349 
350 void FrameScope::analyze_audio() {
351  if (!frame->has_audio_data || !frame->audio)
352  return;
353 
354  const int channels = frame->GetAudioChannelsCount();
355  const int samples = frame->GetAudioSamplesCount();
356  if (channels <= 0 || samples <= 0)
357  return;
358 
359  audio_present = true;
360  audio_channels = channels;
361  audio_samples = samples;
362  audio_sample_rate = frame->SampleRate();
363  ensure_audio_buffers();
364  std::vector<double> rms_sums(static_cast<size_t>(channels), 0.0);
365 
366  for (int channel = 0; channel < channels; ++channel) {
367  float* channel_samples = frame->GetAudioSamples(channel);
368  if (!channel_samples)
369  continue;
370 
371  std::fill(audio_waveform_min[channel].begin(), audio_waveform_min[channel].end(), 1.0f);
372  std::fill(audio_waveform_max[channel].begin(), audio_waveform_max[channel].end(), -1.0f);
373 
374  for (int sample = 0; sample < samples; ++sample) {
375  const float value = channel_samples[sample];
376  const float abs_value = std::abs(value);
377  const int bucket = clamp_int((sample * audio_buckets) / std::max(1, samples), 0, audio_buckets - 1);
378 
379  audio_peak[channel] = std::max(audio_peak[channel], abs_value);
380  rms_sums[channel] += static_cast<double>(value) * static_cast<double>(value);
381  if (abs_value >= 0.999f)
382  audio_clipped_samples[channel]++;
383 
384  audio_waveform_min[channel][bucket] = std::min(audio_waveform_min[channel][bucket], value);
385  audio_waveform_max[channel][bucket] = std::max(audio_waveform_max[channel][bucket], value);
386  }
387 
388  for (int bucket = 0; bucket < audio_buckets; ++bucket) {
389  if (audio_waveform_min[channel][bucket] > audio_waveform_max[channel][bucket]) {
390  audio_waveform_min[channel][bucket] = 0.0f;
391  audio_waveform_max[channel][bucket] = 0.0f;
392  }
393  }
394  }
395 
396  for (int channel = 0; channel < channels; ++channel) {
397  audio_rms[channel] = samples > 0 ? static_cast<float>(std::sqrt(rms_sums[channel] / static_cast<double>(samples))) : 0.0f;
398  }
399 }
400 
401 void FrameScope::rebuild_json() const {
402  scope_data = Json::Value(Json::objectValue);
403  scope_data["version"] = 1;
404 
405  Json::Value video(Json::objectValue);
406  video["present"] = video_present;
407  if (video_present) {
408  video["width"] = video_width;
409  video["height"] = video_height;
410 
411  video["summary"] = Json::Value(Json::objectValue);
412  video["summary"]["avg_luma"] = avg_luma;
413  video["summary"]["clipped_shadows"] = clipped_shadows;
414  video["summary"]["clipped_highlights"] = clipped_highlights;
415  video["summary"]["clipped_red"] = clipped_red;
416  video["summary"]["clipped_green"] = clipped_green;
417  video["summary"]["clipped_blue"] = clipped_blue;
418 
419  video["histogram"] = Json::Value(Json::objectValue);
420  video["histogram"]["luma"] = json_array_from_vector(histogram_luma);
421  video["histogram"]["red"] = json_array_from_vector(histogram_red);
422  video["histogram"]["green"] = json_array_from_vector(histogram_green);
423  video["histogram"]["blue"] = json_array_from_vector(histogram_blue);
424 
425  video["waveform"] = Json::Value(Json::objectValue);
426  video["waveform"]["columns"] = waveform_columns;
427  video["waveform"]["bins"] = waveform_bins;
428  video["waveform"]["luma"] = json_array_from_vector(waveform_luma);
429  video["waveform"]["red"] = json_array_from_vector(waveform_red);
430  video["waveform"]["green"] = json_array_from_vector(waveform_green);
431  video["waveform"]["blue"] = json_array_from_vector(waveform_blue);
432 
433  video["vectorscope"] = Json::Value(Json::objectValue);
434  video["vectorscope"]["size"] = vectorscope_size;
435  video["vectorscope"]["density"] = json_array_from_vector(vectorscope);
436  }
437  scope_data["video"] = video;
438 
439  Json::Value audio(Json::objectValue);
440  audio["present"] = audio_present;
441  if (audio_present) {
442  audio["channels"] = audio_channels;
443  audio["samples"] = audio_samples;
444  audio["sample_rate"] = audio_sample_rate;
445 
446  audio["summary"] = Json::Value(Json::objectValue);
447  audio["summary"]["peak"] = json_array_from_vector(audio_peak);
448  audio["summary"]["rms"] = json_array_from_vector(audio_rms);
449  audio["summary"]["clipped_samples"] = json_array_from_vector(audio_clipped_samples);
450 
451  audio["waveform"] = Json::Value(Json::objectValue);
452  audio["waveform"]["buckets"] = audio_buckets;
453  audio["waveform"]["min"] = Json::Value(Json::arrayValue);
454  audio["waveform"]["max"] = Json::Value(Json::arrayValue);
455  for (int channel = 0; channel < audio_channels; ++channel) {
456  audio["waveform"]["min"].append(json_array_from_vector(audio_waveform_min[static_cast<size_t>(channel)]));
457  audio["waveform"]["max"].append(json_array_from_vector(audio_waveform_max[static_cast<size_t>(channel)]));
458  }
459  }
460  scope_data["audio"] = audio;
461  json_dirty = false;
462 }
463 
464 Json::Value FrameScope::JsonValue() const {
465  if (json_dirty)
466  rebuild_json();
467  return scope_data;
468 }
469 
470 std::string FrameScope::Json() const {
471  if (json_dirty)
472  rebuild_json();
473  return scope_data.toStyledString();
474 }
475 
476 std::vector<int> FrameScope::copy_to_int_vector(const std::vector<uint32_t>& values) {
477  std::vector<int> copy(values.size(), 0);
478  const uint32_t max_int = static_cast<uint32_t>(std::numeric_limits<int>::max());
479  for (size_t i = 0; i < values.size(); ++i)
480  copy[i] = static_cast<int>(std::min(values[i], max_int));
481  return copy;
482 }
483 
484 std::vector<float> FrameScope::GetAudioWaveformMin(int channel) const {
485  if (channel < 0 || channel >= static_cast<int>(audio_waveform_min.size()))
486  return std::vector<float>();
487  return audio_waveform_min[static_cast<size_t>(channel)];
488 }
489 
490 std::vector<float> FrameScope::GetAudioWaveformMax(int channel) const {
491  if (channel < 0 || channel >= static_cast<int>(audio_waveform_max.size()))
492  return std::vector<float>();
493  return audio_waveform_max[static_cast<size_t>(channel)];
494 }
openshot::FrameScope::ClearVideoRegion
void ClearVideoRegion()
Clear any video ROI and re-analyze the full frame.
Definition: FrameScope.cpp:215
openshot::FrameScope::SetFrame
void SetFrame(std::shared_ptr< Frame > new_frame)
Replace the current frame and recompute the scope data.
Definition: FrameScope.cpp:178
openshot::FrameScope::SetVectorscopeSize
void SetVectorscopeSize(int size)
Set the vectorscope plane edge length and re-analyze video.
Definition: FrameScope.cpp:197
openshot::FrameScope::FrameScope
FrameScope()
Create an empty scope analyzer with default bucket sizes.
Definition: FrameScope.cpp:64
openshot
This namespace is the default namespace for all code in the openshot library.
Definition: AnimatedCurve.h:24
openshot::FrameScope::Json
std::string Json() const
Return the current scope payload as a JSON string.
Definition: FrameScope.cpp:470
openshot::FrameScope::SetAudioBuckets
void SetAudioBuckets(int buckets)
Set the number of audio buckets and re-analyze.
Definition: FrameScope.cpp:190
openshot::FrameScope::SetWaveformColumns
void SetWaveformColumns(int columns)
Set the number of horizontal waveform columns and re-analyze.
Definition: FrameScope.cpp:183
openshot::FrameScope::GetAudioWaveformMax
std::vector< float > GetAudioWaveformMax(int channel) const
Return one channel of audio waveform maximum values.
Definition: FrameScope.cpp:490
openshot::FrameScope::SetVideoRegionNormalized
void SetVideoRegionNormalized(float x, float y, float width, float height)
Set a normalized ROI for video analysis and re-analyze video.
Definition: FrameScope.cpp:204
FrameScope.h
Header file for FrameScope class.
openshot::FrameScope::JsonValue
Json::Value JsonValue() const
Return the current scope payload as a Json::Value tree.
Definition: FrameScope.cpp:464
openshot::FrameScope::GetAudioWaveformMin
std::vector< float > GetAudioWaveformMin(int channel) const
Return one channel of audio waveform minimum values.
Definition: FrameScope.cpp:484