diff --git a/doc/classes/VideoStreamPlayer.xml b/doc/classes/VideoStreamPlayer.xml
index f903f171d10..46bc4f491d6 100644
--- a/doc/classes/VideoStreamPlayer.xml
+++ b/doc/classes/VideoStreamPlayer.xml
@@ -16,7 +16,6 @@
The length of the current stream, in seconds.
- [b]Note:[/b] For [VideoStreamTheora] streams (the built-in format supported by Godot), this value will always be zero, as getting the stream length is not implemented yet. The feature may be supported by video formats implemented by a GDExtension add-on.
@@ -79,7 +78,6 @@
The current position of the stream, in seconds.
- [b]Note:[/b] Changing this value won't have any effect as seeking is not implemented yet, except in video formats implemented by a GDExtension add-on.
Audio volume as a linear value.
diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp
index 02a1c7f8687..e8eefbacf2e 100644
--- a/modules/theora/video_stream_theora.cpp
+++ b/modules/theora/video_stream_theora.cpp
@@ -41,17 +41,15 @@ int VideoStreamPlaybackTheora::buffer_data() {
uint64_t bytes = file->get_buffer((uint8_t *)buffer, 4096);
ogg_sync_wrote(&oy, bytes);
- return (bytes);
+ return bytes;
}
int VideoStreamPlaybackTheora::queue_page(ogg_page *page) {
- if (theora_p) {
- ogg_stream_pagein(&to, page);
- if (to.e_o_s) {
- theora_eos = true;
- }
+ ogg_stream_pagein(&to, page);
+ if (to.e_o_s) {
+ theora_eos = true;
}
- if (vorbis_p) {
+ if (has_audio) {
ogg_stream_pagein(&vo, page);
if (vo.e_o_s) {
vorbis_eos = true;
@@ -60,6 +58,179 @@ int VideoStreamPlaybackTheora::queue_page(ogg_page *page) {
return 0;
}
+int VideoStreamPlaybackTheora::read_page(ogg_page *page) {
+ int ret = 0;
+
+ while (ret <= 0) {
+ ret = ogg_sync_pageout(&oy, page);
+ if (ret <= 0) {
+ int bytes = buffer_data();
+ if (bytes == 0) {
+ return 0;
+ }
+ }
+ }
+
+ return ret;
+}
+
+double VideoStreamPlaybackTheora::get_page_time(ogg_page *page) {
+ uint64_t granulepos = ogg_page_granulepos(page);
+ int page_serialno = ogg_page_serialno(page);
+ double page_time = -1;
+
+ if (page_serialno == to.serialno) {
+ page_time = th_granule_time(td, granulepos);
+ }
+ if (has_audio && page_serialno == vo.serialno) {
+ page_time = vorbis_granule_time(&vd, granulepos);
+ }
+
+ return page_time;
+}
+
+// Read one buffer worth of pages and feed them to the streams.
+int VideoStreamPlaybackTheora::feed_pages() {
+ int pages = 0;
+ ogg_page og;
+
+ while (pages == 0) {
+ while (ogg_sync_pageout(&oy, &og) > 0) {
+ queue_page(&og);
+ pages++;
+ }
+ if (pages == 0) {
+ int bytes = buffer_data();
+ if (bytes == 0) {
+ break;
+ }
+ }
+ }
+
+ return pages;
+}
+
+// Seek the video and audio streams simultaneously to find the granulepos where we should start decoding.
+// It will return the position where we should start reading pages, and the video and audio granulepos.
+int64_t VideoStreamPlaybackTheora::seek_streams(double p_time, int64_t &cur_video_granulepos, int64_t &cur_audio_granulepos) {
+ // Backtracking less than this is probably a waste of time.
+ const int64_t min_seek = 512 * 1024;
+ int64_t target_video_granulepos;
+ int64_t target_audio_granulepos;
+ double target_time = 0;
+ int64_t seek_pos;
+
+ // Make a guess where we should start reading in the file, and scan from there.
+ // We base the guess on the mean bitrate of the streams. It would be theoretically faster to use the bisect method but
+ // in practice there's a lot of linear scanning to do to find the right pages.
+ // We want to catch the previous keyframe to the seek time. Since we only know the max GOP, we use that.
+ if (p_time == -1) { // This is a special case to find the last packets and calculate the video length.
+ seek_pos = MAX(stream_data_size - min_seek, stream_data_offset);
+ target_video_granulepos = INT64_MAX;
+ target_audio_granulepos = INT64_MAX;
+ } else {
+ int64_t video_frame = (int64_t)(p_time / frame_duration);
+ target_video_granulepos = MAX(1LL, video_frame - (1LL << ti.keyframe_granule_shift)) << ti.keyframe_granule_shift;
+ target_audio_granulepos = 0;
+ seek_pos = MAX(((target_video_granulepos >> ti.keyframe_granule_shift) - 1) * frame_duration * stream_data_size / stream_length, stream_data_offset);
+ target_time = th_granule_time(td, target_video_granulepos);
+ if (has_audio) {
+ target_audio_granulepos = video_frame * frame_duration * vi.rate;
+ target_time = MIN(target_time, vorbis_granule_time(&vd, target_audio_granulepos));
+ }
+ }
+
+ int64_t video_seek_pos = seek_pos;
+ int64_t audio_seek_pos = seek_pos;
+ double backtrack_time = 0;
+ bool video_catch = false;
+ bool audio_catch = false;
+ int64_t last_video_granule_seek_pos = seek_pos;
+ int64_t last_audio_granule_seek_pos = seek_pos;
+
+ cur_video_granulepos = -1;
+ cur_audio_granulepos = -1;
+
+ while (!video_catch || (has_audio && !audio_catch)) { // Backtracking loop
+ if (seek_pos < stream_data_offset) {
+ seek_pos = stream_data_offset;
+ }
+ file->seek(seek_pos);
+ ogg_sync_reset(&oy);
+
+ backtrack_time = 0;
+ last_video_granule_seek_pos = seek_pos;
+ last_audio_granule_seek_pos = seek_pos;
+ while (!video_catch || (has_audio && !audio_catch)) { // Page scanning loop
+ ogg_page page;
+ uint64_t last_seek_pos = file->get_position() - oy.fill + oy.returned;
+ int ret = read_page(&page);
+ if (ret <= 0) { // End of file.
+ if (seek_pos < stream_data_offset) { // We've already searched the whole file
+ return -1;
+ }
+ seek_pos -= min_seek;
+ break;
+ }
+ int64_t cur_granulepos = ogg_page_granulepos(&page);
+ if (cur_granulepos >= 0) {
+ int page_serialno = ogg_page_serialno(&page);
+ if (!video_catch && page_serialno == to.serialno) {
+ if (cur_granulepos >= target_video_granulepos) {
+ video_catch = true;
+ if (cur_video_granulepos < 0) {
+ // Adding 1s helps catching the start of the page and avoids backtrack_time = 0.
+ backtrack_time = MAX(backtrack_time, 1 + th_granule_time(td, cur_granulepos) - target_time);
+ }
+ } else {
+ video_seek_pos = last_video_granule_seek_pos;
+ cur_video_granulepos = cur_granulepos;
+ }
+ last_video_granule_seek_pos = last_seek_pos;
+ }
+ if ((has_audio && !audio_catch) && page_serialno == vo.serialno) {
+ if (cur_granulepos >= target_audio_granulepos) {
+ audio_catch = true;
+ if (cur_audio_granulepos < 0) {
+ // Adding 1s helps catching the start of the page and avoids backtrack_time = 0.
+ backtrack_time = MAX(backtrack_time, 1 + vorbis_granule_time(&vd, cur_granulepos) - target_time);
+ }
+ } else {
+ audio_seek_pos = last_audio_granule_seek_pos;
+ cur_audio_granulepos = cur_granulepos;
+ }
+ last_audio_granule_seek_pos = last_seek_pos;
+ }
+ }
+ }
+ if (backtrack_time > 0) {
+ if (seek_pos <= stream_data_offset) {
+ break;
+ }
+ int64_t delta_seek = MAX(backtrack_time * stream_data_size / stream_length, min_seek);
+ seek_pos -= delta_seek;
+ }
+ video_catch = cur_video_granulepos != -1;
+ audio_catch = cur_audio_granulepos != -1;
+ }
+
+ if (cur_video_granulepos < (1LL << ti.keyframe_granule_shift)) {
+ video_seek_pos = stream_data_offset;
+ cur_video_granulepos = 1LL << ti.keyframe_granule_shift;
+ }
+ if (has_audio) {
+ if (cur_audio_granulepos == -1) {
+ audio_seek_pos = stream_data_offset;
+ cur_audio_granulepos = 0;
+ }
+ seek_pos = MIN(video_seek_pos, audio_seek_pos);
+ } else {
+ seek_pos = video_seek_pos;
+ }
+
+ return seek_pos;
+}
+
void VideoStreamPlaybackTheora::video_write(th_ycbcr_buffer yuv) {
uint8_t *w = frame_data.ptrw();
char *dst = (char *)w;
@@ -77,83 +248,53 @@ void VideoStreamPlaybackTheora::video_write(th_ycbcr_buffer yuv) {
Ref img;
img.instantiate(region.size.x, region.size.y, false, Image::FORMAT_RGBA8, frame_data); //zero copy image creation
- texture->update(img); //zero copy send to rendering server
+ texture->update(img); // Zero-copy send to rendering server.
}
void VideoStreamPlaybackTheora::clear() {
- if (file.is_null()) {
- return;
+ if (!file.is_null()) {
+ file.unref();
}
-
- if (vorbis_p) {
- ogg_stream_clear(&vo);
- if (vorbis_p >= 3) {
- vorbis_block_clear(&vb);
- vorbis_dsp_clear(&vd);
- }
+ if (has_audio) {
+ vorbis_block_clear(&vb);
+ vorbis_dsp_clear(&vd);
vorbis_comment_clear(&vc);
vorbis_info_clear(&vi);
- vorbis_p = 0;
+ ogg_stream_clear(&vo);
+ if (audio_buffer_size) {
+ memdelete_arr(audio_buffer);
+ }
}
- if (theora_p) {
- ogg_stream_clear(&to);
+ if (has_video) {
th_decode_free(td);
th_comment_clear(&tc);
th_info_clear(&ti);
- theora_p = 0;
+ ogg_stream_clear(&to);
+ ogg_sync_clear(&oy);
}
- ogg_sync_clear(&oy);
- theora_p = 0;
- vorbis_p = 0;
- next_frame_time = 0;
- current_frame_time = 0;
+ audio_buffer = nullptr;
+ playing = false;
+ has_video = false;
+ has_audio = false;
theora_eos = false;
vorbis_eos = false;
- video_ready = false;
- video_done = false;
- audio_done = false;
-
- file.unref();
- playing = false;
}
-void VideoStreamPlaybackTheora::set_file(const String &p_file) {
- ERR_FAIL_COND(playing);
+void VideoStreamPlaybackTheora::find_streams(th_setup_info *&ts) {
+ ogg_stream_state test;
ogg_packet op;
- th_setup_info *ts = nullptr;
-
- file_name = p_file;
- file = FileAccess::open(p_file, FileAccess::READ);
- ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'.");
-
- ogg_sync_init(&oy);
-
- /* init supporting Vorbis structures needed in header parsing */
- vorbis_info_init(&vi);
- vorbis_comment_init(&vc);
-
- /* init supporting Theora structures needed in header parsing */
- th_comment_init(&tc);
- th_info_init(&ti);
-
- theora_eos = false;
- vorbis_eos = false;
-
- /* Ogg file open; parse the headers */
- /* Only interested in Vorbis/Theora streams */
+ ogg_page og;
int stateflag = 0;
-
int audio_track_skip = audio_track;
+ /* Only interested in Vorbis/Theora streams */
while (!stateflag) {
int ret = buffer_data();
- if (ret == 0) {
+ if (!ret) {
break;
}
while (ogg_sync_pageout(&oy, &og) > 0) {
- ogg_stream_state test;
-
/* is this a mandated initial header? If not, stop parsing */
if (!ogg_page_bos(&og)) {
/* don't leak the page; get it into the appropriate stream */
@@ -167,11 +308,11 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) {
ogg_stream_packetout(&test, &op);
/* identify the codec: try theora */
- if (!theora_p && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) {
+ if (!has_video && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) {
/* it is theora */
memcpy(&to, &test, sizeof(test));
- theora_p = 1;
- } else if (!vorbis_p && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) {
+ has_video = true;
+ } else if (!has_audio && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) {
/* it is vorbis */
if (audio_track_skip) {
vorbis_info_clear(&vi);
@@ -179,141 +320,165 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) {
ogg_stream_clear(&test);
vorbis_info_init(&vi);
vorbis_comment_init(&vc);
-
audio_track_skip--;
} else {
memcpy(&vo, &test, sizeof(test));
- vorbis_p = 1;
+ has_audio = true;
}
} else {
/* whatever it is, we don't care about it */
ogg_stream_clear(&test);
}
}
- /* fall through to non-bos page parsing */
}
+}
+
+void VideoStreamPlaybackTheora::read_headers(th_setup_info *&ts) {
+ ogg_packet op;
+ int theora_header_packets = 1;
+ int vorbis_header_packets = 1;
/* we're expecting more header packets. */
- while ((theora_p && theora_p < 3) || (vorbis_p && vorbis_p < 3)) {
- int ret = 0;
-
+ while (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) {
/* look for further theora headers */
- if (theora_p && theora_p < 3) {
- ret = ogg_stream_packetout(&to, &op);
- }
- while (theora_p && theora_p < 3 && ret) {
- if (ret < 0) {
- fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n");
- clear();
- return;
+ // The API says there can be more than three but only three are mandatory.
+ while (theora_header_packets < 3 && ogg_stream_packetout(&to, &op) > 0) {
+ if (th_decode_headerin(&ti, &tc, &ts, &op) > 0) {
+ theora_header_packets++;
}
- if (!th_decode_headerin(&ti, &tc, &ts, &op)) {
- fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n");
- clear();
- return;
- }
- ret = ogg_stream_packetout(&to, &op);
- theora_p++;
}
/* look for more vorbis header packets */
- if (vorbis_p && vorbis_p < 3) {
- ret = ogg_stream_packetout(&vo, &op);
+ while (has_audio && vorbis_header_packets < 3 && ogg_stream_packetout(&vo, &op) > 0) {
+ if (!vorbis_synthesis_headerin(&vi, &vc, &op)) {
+ vorbis_header_packets++;
+ }
}
- while (vorbis_p && vorbis_p < 3 && ret) {
- if (ret < 0) {
- fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n");
- clear();
- return;
- }
- ret = vorbis_synthesis_headerin(&vi, &vc, &op);
- if (ret) {
- fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n");
- clear();
- return;
- }
- vorbis_p++;
- if (vorbis_p == 3) {
+
+ /* The header pages/packets will arrive before anything else we care about, or the stream is not obeying spec */
+ if (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) {
+ ogg_page page;
+ if (read_page(&page)) {
+ queue_page(&page);
+ } else {
+ fprintf(stderr, "End of file while searching for codec headers.\n");
break;
}
- ret = ogg_stream_packetout(&vo, &op);
}
+ }
- /* The header pages/packets will arrive before anything else we
- care about, or the stream is not obeying spec */
+ has_video = theora_header_packets == 3;
+ has_audio = vorbis_header_packets == 3;
+}
- if (ogg_sync_pageout(&oy, &og) > 0) {
- queue_page(&og); /* demux into the appropriate stream */
- } else {
- int ret2 = buffer_data(); /* someone needs more data */
- if (ret2 == 0) {
- fprintf(stderr, "End of file while searching for codec headers.\n");
- clear();
- return;
- }
+void VideoStreamPlaybackTheora::set_file(const String &p_file) {
+ ERR_FAIL_COND(playing);
+ th_setup_info *ts = nullptr;
+
+ clear();
+
+ file = FileAccess::open(p_file, FileAccess::READ);
+ ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'.");
+
+ file_name = p_file;
+
+ ogg_sync_init(&oy);
+
+ /* init supporting Vorbis structures needed in header parsing */
+ vorbis_info_init(&vi);
+ vorbis_comment_init(&vc);
+
+ /* init supporting Theora structures needed in header parsing */
+ th_comment_init(&tc);
+ th_info_init(&ti);
+
+ /* Zero stream state structs so they can be checked later. */
+ memset(&to, 0, sizeof(to));
+ memset(&vo, 0, sizeof(vo));
+
+ /* Ogg file open; parse the headers */
+ find_streams(ts);
+ read_headers(ts);
+
+ if (!has_audio) {
+ vorbis_comment_clear(&vc);
+ vorbis_info_clear(&vi);
+ if (!ogg_stream_check(&vo)) {
+ ogg_stream_clear(&vo);
}
}
+ // One video stream is mandatory.
+ if (!has_video) {
+ th_setup_free(ts);
+ th_comment_clear(&tc);
+ th_info_clear(&ti);
+ if (!ogg_stream_check(&to)) {
+ ogg_stream_clear(&to);
+ }
+ file.unref();
+ return;
+ }
+
/* And now we have it all. Initialize decoders. */
- if (theora_p) {
- td = th_decode_alloc(&ti, ts);
- px_fmt = ti.pixel_fmt;
- switch (ti.pixel_fmt) {
- case TH_PF_420:
- //printf(" 4:2:0 video\n");
- break;
- case TH_PF_422:
- //printf(" 4:2:2 video\n");
- break;
- case TH_PF_444:
- //printf(" 4:4:4 video\n");
- break;
- case TH_PF_RSVD:
- default:
- printf(" video\n (UNKNOWN Chroma sampling!)\n");
- break;
- }
- th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max,
- sizeof(pp_level_max));
- pp_level = 0;
- th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level));
- pp_inc = 0;
-
- size.x = ti.frame_width;
- size.y = ti.frame_height;
- region.position.x = ti.pic_x;
- region.position.y = ti.pic_y;
- region.size.x = ti.pic_width;
- region.size.y = ti.pic_height;
-
- Ref img = Image::create_empty(region.size.x, region.size.y, false, Image::FORMAT_RGBA8);
- texture->set_image(img);
- frame_data.resize(region.size.x * region.size.y * 4);
-
- frame_duration = (double)ti.fps_denominator / ti.fps_numerator;
- } else {
- /* tear down the partial theora setup */
- th_info_clear(&ti);
- th_comment_clear(&tc);
- }
-
+ td = th_decode_alloc(&ti, ts);
th_setup_free(ts);
+ px_fmt = ti.pixel_fmt;
+ switch (ti.pixel_fmt) {
+ case TH_PF_420:
+ case TH_PF_422:
+ case TH_PF_444:
+ break;
+ default:
+ WARN_PRINT(" video\n (UNKNOWN Chroma sampling!)\n");
+ break;
+ }
+ th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max, sizeof(pp_level_max));
+ pp_level = 0;
+ th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level));
+ pp_inc = 0;
- if (vorbis_p) {
+ size.x = ti.frame_width;
+ size.y = ti.frame_height;
+ region.position.x = ti.pic_x;
+ region.position.y = ti.pic_y;
+ region.size.x = ti.pic_width;
+ region.size.y = ti.pic_height;
+
+ Ref img = Image::create_empty(region.size.x, region.size.y, false, Image::FORMAT_RGBA8);
+ texture->set_image(img);
+ frame_data.resize(region.size.x * region.size.y * 4);
+
+ frame_duration = (double)ti.fps_denominator / ti.fps_numerator;
+
+ if (has_audio) {
vorbis_synthesis_init(&vd, &vi);
vorbis_block_init(&vd, &vb);
- //_setup(vi.channels, vi.rate);
- } else {
- /* tear down the partial vorbis setup */
- vorbis_info_clear(&vi);
- vorbis_comment_clear(&vc);
+ audio_buffer_size = MIN(vi.channels, 8) * 1024;
+ audio_buffer = memnew_arr(float, audio_buffer_size);
}
- playing = false;
- buffering = true;
- time = 0;
- video_done = !theora_p;
- audio_done = !vorbis_p;
+ stream_data_offset = file->get_position() - oy.fill + oy.returned;
+ stream_data_size = file->get_length() - stream_data_offset;
+
+ // Sync to last page to find video length.
+ int64_t seek_pos = MAX(stream_data_offset, (int64_t)file->get_length() - 64 * 1024);
+ int64_t video_granulepos = INT64_MAX;
+ int64_t audio_granulepos = INT64_MAX;
+ file->seek(seek_pos);
+ seek_pos = seek_streams(-1, video_granulepos, audio_granulepos);
+ file->seek(seek_pos);
+ ogg_sync_reset(&oy);
+
+ stream_length = 0;
+ ogg_page page;
+ while (read_page(&page) > 0) {
+ // Use MAX because, even though pages are ordered, page time can be -1
+ // for pages without full frames. Streams could be truncated too.
+ stream_length = MAX(stream_length, get_page_time(&page));
+ }
+
+ seek(0);
}
double VideoStreamPlaybackTheora::get_time() const {
@@ -346,28 +511,32 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
ogg_packet op;
while (!audio_ready && !audio_done) {
+ // Send remaining frames
+ if (!send_audio()) {
+ audio_ready = true;
+ break;
+ }
+
float **pcm;
int ret = vorbis_synthesis_pcmout(&vd, &pcm);
if (ret > 0) {
- const int AUXBUF_LEN = 4096;
- int to_read = ret;
- float aux_buffer[AUXBUF_LEN];
- while (to_read) {
- int m = MIN(AUXBUF_LEN / vi.channels, to_read);
+ int frames_read = 0;
+ while (frames_read < ret) {
+ int m = MIN(audio_buffer_size / vi.channels, ret - frames_read);
int count = 0;
for (int j = 0; j < m; j++) {
for (int i = 0; i < vi.channels; i++) {
- aux_buffer[count++] = pcm[i][j];
+ audio_buffer[count++] = pcm[i][frames_read + j];
}
}
- int mixed = mix_callback(mix_udata, aux_buffer, m);
- to_read -= mixed;
- if (mixed != m) { //could mix no more
+ frames_read += m;
+ audio_ptr_end = m;
+ if (!send_audio()) {
audio_ready = true;
break;
}
}
- vorbis_synthesis_read(&vd, ret - to_read);
+ vorbis_synthesis_read(&vd, frames_read);
} else {
/* no pending audio; is there a pending packet to decode? */
if (ogg_stream_packetout(&vo, &op) > 0) {
@@ -383,17 +552,10 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
while (!video_ready && !video_done) {
if (ogg_stream_packetout(&to, &op) > 0) {
- /*HACK: This should be set after a seek or a gap, but we might not have
- a granulepos for the first packet (we only have them for the last
- packet on a page), so we just set it as often as we get it.
- To do this right, we should back-track from the last packet on the
- page and compute the correct granulepos for the first packet after
- a seek or a gap.*/
if (op.granulepos >= 0) {
- th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos,
- sizeof(op.granulepos));
+ th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, sizeof(op.granulepos));
}
- ogg_int64_t videobuf_granulepos;
+ int64_t videobuf_granulepos;
int ret = th_decode_packetin(td, &op, &videobuf_granulepos);
if (ret == 0 || ret == TH_DUPFRAME) {
next_frame_time = th_granule_time(td, videobuf_granulepos);
@@ -412,12 +574,8 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
}
if (!video_ready || !audio_ready) {
- int ret = buffer_data();
- if (ret > 0) {
- while (ogg_sync_pageout(&oy, &og) > 0) {
- queue_page(&og);
- }
- } else {
+ int ret = feed_pages();
+ if (ret == 0) {
vorbis_eos = true;
theora_eos = true;
break;
@@ -452,10 +610,8 @@ void VideoStreamPlaybackTheora::update(double p_delta) {
}
void VideoStreamPlaybackTheora::play() {
- if (!playing) {
- time = 0;
- } else {
- stop();
+ if (playing) {
+ return;
}
playing = true;
@@ -464,12 +620,8 @@ void VideoStreamPlaybackTheora::play() {
}
void VideoStreamPlaybackTheora::stop() {
- if (playing) {
- clear();
- set_file(file_name); //reset
- }
playing = false;
- time = 0;
+ seek(0);
}
bool VideoStreamPlaybackTheora::is_playing() const {
@@ -485,7 +637,7 @@ bool VideoStreamPlaybackTheora::is_paused() const {
}
double VideoStreamPlaybackTheora::get_length() const {
- return 0;
+ return stream_length;
}
double VideoStreamPlaybackTheora::get_playback_position() const {
@@ -493,7 +645,123 @@ double VideoStreamPlaybackTheora::get_playback_position() const {
}
void VideoStreamPlaybackTheora::seek(double p_time) {
- WARN_PRINT_ONCE("Seeking in Theora videos is not implemented yet (it's only supported for GDExtension-provided video streams).");
+ if (file.is_null()) {
+ return;
+ }
+ if (p_time >= stream_length) {
+ return;
+ }
+
+ video_ready = false;
+ next_frame_time = 0;
+ current_frame_time = -1;
+ dup_frame = false;
+ video_done = false;
+ audio_done = !has_audio;
+ theora_eos = false;
+ vorbis_eos = false;
+ audio_ptr_start = 0;
+ audio_ptr_end = 0;
+
+ ogg_stream_reset(&to);
+ if (has_audio) {
+ ogg_stream_reset(&vo);
+ vorbis_synthesis_restart(&vd);
+ }
+
+ int64_t seek_pos;
+ int64_t video_granulepos;
+ int64_t audio_granulepos;
+ // Find the granules we need so we can start playing at the seek time.
+ seek_pos = seek_streams(p_time, video_granulepos, audio_granulepos);
+ if (seek_pos < 0) {
+ return;
+ }
+ file->seek(seek_pos);
+ ogg_sync_reset(&oy);
+
+ time = p_time;
+
+ double last_audio_time = 0;
+ double last_video_time = 0;
+ bool first_frame_decoded = false;
+ bool start_audio = (audio_granulepos == 0);
+ bool start_video = (video_granulepos == (1LL << ti.keyframe_granule_shift));
+ bool keyframe_found = false;
+ uint64_t current_frame = 0;
+
+ // Read from the streams skipping pages until we reach the granules we want. We won't skip pages from both video and
+ // audio streams, only one of them, until decoding of both starts.
+ // video_granulepos and audio_granulepos are guaranteed to be found by checking the granulepos in the packets, no
+ // need to keep track of packets with granulepos == -1 until decoding starts.
+ while ((has_audio && last_audio_time < p_time) || (last_video_time <= p_time)) {
+ ogg_packet op;
+ if (feed_pages() == 0) {
+ break;
+ }
+ while (has_audio && last_audio_time < p_time && ogg_stream_packetout(&vo, &op) > 0) {
+ if (start_audio) {
+ if (vorbis_synthesis(&vb, &op) == 0) { /* test for success! */
+ vorbis_synthesis_blockin(&vd, &vb);
+ float **pcm;
+ int samples_left = ceil((p_time - last_audio_time) * vi.rate);
+ int samples_read = vorbis_synthesis_pcmout(&vd, &pcm);
+ int samples_consumed = MIN(samples_left, samples_read);
+ vorbis_synthesis_read(&vd, samples_consumed);
+ last_audio_time += (double)samples_consumed / vi.rate;
+ }
+ } else if (op.granulepos >= audio_granulepos) {
+ last_audio_time = vorbis_granule_time(&vd, op.granulepos);
+ // Start tracking audio now. This won't produce any samples but will update the decoder state.
+ if (vorbis_synthesis_trackonly(&vb, &op) == 0) {
+ vorbis_synthesis_blockin(&vd, &vb);
+ }
+ start_audio = true;
+ }
+ }
+ while (last_video_time <= p_time && ogg_stream_packetout(&to, &op) > 0) {
+ if (!start_video && (op.granulepos >= video_granulepos || video_granulepos == (1LL << ti.keyframe_granule_shift))) {
+ if (op.granulepos > 0) {
+ current_frame = th_granule_frame(td, op.granulepos);
+ }
+ start_video = true;
+ }
+ // Don't start decoding until a keyframe is found, but count frames.
+ if (start_video) {
+ if (!keyframe_found && th_packet_iskeyframe(&op)) {
+ keyframe_found = true;
+ int64_t cur_granulepos = (current_frame + 1) << ti.keyframe_granule_shift;
+ th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &cur_granulepos, sizeof(cur_granulepos));
+ }
+ if (keyframe_found) {
+ int64_t videobuf_granulepos;
+ if (op.granulepos >= 0) {
+ th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, sizeof(op.granulepos));
+ }
+ int ret = th_decode_packetin(td, &op, &videobuf_granulepos);
+ if (ret == 0 || ret == TH_DUPFRAME) {
+ last_video_time = th_granule_time(td, videobuf_granulepos);
+ first_frame_decoded = true;
+ }
+ } else {
+ current_frame++;
+ }
+ }
+ }
+ }
+
+ if (first_frame_decoded) {
+ if (is_playing()) {
+ // Draw the current frame.
+ th_ycbcr_buffer yuv;
+ th_decode_ycbcr_out(td, yuv);
+ video_write(yuv);
+ current_frame_time = last_video_time;
+ } else {
+ next_frame_time = current_frame_time;
+ video_ready = true;
+ }
+ }
}
int VideoStreamPlaybackTheora::get_channels() const {
diff --git a/modules/theora/video_stream_theora.h b/modules/theora/video_stream_theora.h
index 3e1b33dda1a..46cb88b0505 100644
--- a/modules/theora/video_stream_theora.h
+++ b/modules/theora/video_stream_theora.h
@@ -51,8 +51,19 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
Point2i size;
Rect2i region;
+ float *audio_buffer = nullptr;
+ int audio_buffer_size = 0;
+ int audio_ptr_start = 0;
+ int audio_ptr_end = 0;
+
int buffer_data();
int queue_page(ogg_page *page);
+ int read_page(ogg_page *page);
+ int feed_pages();
+ double get_page_time(ogg_page *page);
+ int64_t seek_streams(double p_time, int64_t &video_granulepos, int64_t &audio_granulepos);
+ void find_streams(th_setup_info *&ts);
+ void read_headers(th_setup_info *&ts);
void video_write(th_ycbcr_buffer yuv);
double get_time() const;
@@ -60,7 +71,6 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
bool vorbis_eos = false;
ogg_sync_state oy;
- ogg_page og;
ogg_stream_state vo;
ogg_stream_state to;
th_info ti;
@@ -71,19 +81,21 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
vorbis_block vb;
vorbis_comment vc;
th_pixel_fmt px_fmt;
- double frame_duration;
+ double frame_duration = 0;
+ double stream_length = 0;
+ int64_t stream_data_offset = 0;
+ int64_t stream_data_size = 0;
- int theora_p = 0;
- int vorbis_p = 0;
int pp_level_max = 0;
int pp_level = 0;
int pp_inc = 0;
bool playing = false;
- bool buffering = false;
bool paused = false;
bool dup_frame = false;
+ bool has_video = false;
+ bool has_audio = false;
bool video_ready = false;
bool video_done = false;
bool audio_done = false;
@@ -100,6 +112,20 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
protected:
void clear();
+ _FORCE_INLINE_ bool send_audio() {
+ if (audio_ptr_end > 0) {
+ int mixed = mix_callback(mix_udata, &audio_buffer[audio_ptr_start * vi.channels], audio_ptr_end - audio_ptr_start);
+ audio_ptr_start += mixed;
+ if (audio_ptr_start == audio_ptr_end) {
+ audio_ptr_start = 0;
+ audio_ptr_end = 0;
+ } else {
+ return false;
+ }
+ }
+ return true;
+ }
+
public:
virtual void play() override;
virtual void stop() override;
diff --git a/scene/gui/video_stream_player.cpp b/scene/gui/video_stream_player.cpp
index 8e2d522a823..e98f5b254a3 100644
--- a/scene/gui/video_stream_player.cpp
+++ b/scene/gui/video_stream_player.cpp
@@ -339,7 +339,6 @@ void VideoStreamPlayer::play() {
if (playback.is_null()) {
return;
}
- playback->stop();
playback->play();
set_process_internal(true);
last_audio_time = 0;
@@ -468,7 +467,9 @@ double VideoStreamPlayer::get_stream_position() const {
void VideoStreamPlayer::set_stream_position(double p_position) {
if (playback.is_valid()) {
+ resampler.flush();
playback->seek(p_position);
+ last_audio_time = 0;
}
}
diff --git a/servers/audio/audio_rb_resampler.cpp b/servers/audio/audio_rb_resampler.cpp
index 94c3f0dd36c..a3351889d69 100644
--- a/servers/audio/audio_rb_resampler.cpp
+++ b/servers/audio/audio_rb_resampler.cpp
@@ -75,23 +75,37 @@ uint32_t AudioRBResampler::_resample(AudioFrame *p_dest, int p_todo, int32_t p_i
p_dest[i] = AudioFrame(v0, v1);
}
- // This will probably never be used, but added anyway
+ // Downmix to stereo. Apply -3dB to center, and sides, -6dB to rear.
+
+ // four channels - channel order: front left, front right, rear left, rear right
if constexpr (C == 4) {
- float v0 = rb[(pos << 2) + 0];
- float v1 = rb[(pos << 2) + 1];
- float v0n = rb[(pos_next << 2) + 0];
- float v1n = rb[(pos_next << 2) + 1];
+ float v0 = rb[(pos << 2) + 0] + rb[(pos << 2) + 2] / 2;
+ float v1 = rb[(pos << 2) + 1] + rb[(pos << 2) + 3] / 2;
+ float v0n = rb[(pos_next << 2) + 0] + rb[(pos_next << 2) + 2] / 2;
+ float v1n = rb[(pos_next << 2) + 1] + rb[(pos_next << 2) + 3] / 2;
v0 += (v0n - v0) * frac;
v1 += (v1n - v1) * frac;
p_dest[i] = AudioFrame(v0, v1);
}
+ // six channels - channel order: front left, center, front right, rear left, rear right, LFE
if constexpr (C == 6) {
- float v0 = rb[(pos * 6) + 0];
- float v1 = rb[(pos * 6) + 1];
- float v0n = rb[(pos_next * 6) + 0];
- float v1n = rb[(pos_next * 6) + 1];
+ float v0 = rb[(pos * 6) + 0] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 3] / 2;
+ float v1 = rb[(pos * 6) + 2] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 4] / 2;
+ float v0n = rb[(pos_next * 6) + 0] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 3] / 2;
+ float v1n = rb[(pos_next * 6) + 2] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 4] / 2;
+ v0 += (v0n - v0) * frac;
+ v1 += (v1n - v1) * frac;
+ p_dest[i] = AudioFrame(v0, v1);
+ }
+ // eight channels - channel order: front left, center, front right, side left, side right, rear left, rear
+ // right, LFE
+ if constexpr (C == 8) {
+ float v0 = rb[(pos << 3) + 0] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 3] / Math::SQRT2 + rb[(pos << 3) + 5] / 2;
+ float v1 = rb[(pos << 3) + 2] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 4] / Math::SQRT2 + rb[(pos << 3) + 6] / 2;
+ float v0n = rb[(pos_next << 3) + 0] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 3] / Math::SQRT2 + rb[(pos_next << 3) + 5] / 2;
+ float v1n = rb[(pos_next << 3) + 2] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 4] / Math::SQRT2 + rb[(pos_next << 3) + 6] / 2;
v0 += (v0n - v0) * frac;
v1 += (v1n - v1) * frac;
p_dest[i] = AudioFrame(v0, v1);
@@ -125,6 +139,9 @@ bool AudioRBResampler::mix(AudioFrame *p_dest, int p_frames) {
case 6:
src_read = _resample<6>(p_dest, target_todo, increment);
break;
+ case 8:
+ src_read = _resample<8>(p_dest, target_todo, increment);
+ break;
}
if (src_read > read_space) {
@@ -159,7 +176,7 @@ int AudioRBResampler::get_num_of_ready_frames() {
}
Error AudioRBResampler::setup(int p_channels, int p_src_mix_rate, int p_target_mix_rate, int p_buffer_msec, int p_minbuff_needed) {
- ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6, ERR_INVALID_PARAMETER);
+ ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6 && p_channels != 8, ERR_INVALID_PARAMETER);
int desired_rb_bits = nearest_shift(MAX((p_buffer_msec / 1000.0) * p_src_mix_rate, p_minbuff_needed));
diff --git a/servers/audio/audio_rb_resampler.h b/servers/audio/audio_rb_resampler.h
index dd396c8bbb7..2acbc44097f 100644
--- a/servers/audio/audio_rb_resampler.h
+++ b/servers/audio/audio_rb_resampler.h
@@ -152,6 +152,19 @@ public:
wp = (wp + 1) & rb_mask;
}
} break;
+ case 8: {
+ for (uint32_t i = 0; i < p_frames; i++) {
+ rb[(wp << 3) + 0] = read_buf[(i << 3) + 0];
+ rb[(wp << 3) + 1] = read_buf[(i << 3) + 1];
+ rb[(wp << 3) + 2] = read_buf[(i << 3) + 2];
+ rb[(wp << 3) + 3] = read_buf[(i << 3) + 3];
+ rb[(wp << 3) + 4] = read_buf[(i << 3) + 4];
+ rb[(wp << 3) + 5] = read_buf[(i << 3) + 5];
+ rb[(wp << 3) + 6] = read_buf[(i << 3) + 6];
+ rb[(wp << 3) + 7] = read_buf[(i << 3) + 7];
+ wp = (wp + 1) & rb_mask;
+ }
+ } break;
}
rb_write_pos.set(wp);