diff --git a/doc/classes/VideoStreamPlayer.xml b/doc/classes/VideoStreamPlayer.xml index f903f171d10..46bc4f491d6 100644 --- a/doc/classes/VideoStreamPlayer.xml +++ b/doc/classes/VideoStreamPlayer.xml @@ -16,7 +16,6 @@ The length of the current stream, in seconds. - [b]Note:[/b] For [VideoStreamTheora] streams (the built-in format supported by Godot), this value will always be zero, as getting the stream length is not implemented yet. The feature may be supported by video formats implemented by a GDExtension add-on. @@ -79,7 +78,6 @@ The current position of the stream, in seconds. - [b]Note:[/b] Changing this value won't have any effect as seeking is not implemented yet, except in video formats implemented by a GDExtension add-on. Audio volume as a linear value. diff --git a/modules/theora/video_stream_theora.cpp b/modules/theora/video_stream_theora.cpp index 02a1c7f8687..e8eefbacf2e 100644 --- a/modules/theora/video_stream_theora.cpp +++ b/modules/theora/video_stream_theora.cpp @@ -41,17 +41,15 @@ int VideoStreamPlaybackTheora::buffer_data() { uint64_t bytes = file->get_buffer((uint8_t *)buffer, 4096); ogg_sync_wrote(&oy, bytes); - return (bytes); + return bytes; } int VideoStreamPlaybackTheora::queue_page(ogg_page *page) { - if (theora_p) { - ogg_stream_pagein(&to, page); - if (to.e_o_s) { - theora_eos = true; - } + ogg_stream_pagein(&to, page); + if (to.e_o_s) { + theora_eos = true; } - if (vorbis_p) { + if (has_audio) { ogg_stream_pagein(&vo, page); if (vo.e_o_s) { vorbis_eos = true; @@ -60,6 +58,179 @@ int VideoStreamPlaybackTheora::queue_page(ogg_page *page) { return 0; } +int VideoStreamPlaybackTheora::read_page(ogg_page *page) { + int ret = 0; + + while (ret <= 0) { + ret = ogg_sync_pageout(&oy, page); + if (ret <= 0) { + int bytes = buffer_data(); + if (bytes == 0) { + return 0; + } + } + } + + return ret; +} + +double VideoStreamPlaybackTheora::get_page_time(ogg_page *page) { + uint64_t granulepos = ogg_page_granulepos(page); + int page_serialno = ogg_page_serialno(page); + double page_time = -1; + + if (page_serialno == to.serialno) { + page_time = th_granule_time(td, granulepos); + } + if (has_audio && page_serialno == vo.serialno) { + page_time = vorbis_granule_time(&vd, granulepos); + } + + return page_time; +} + +// Read one buffer worth of pages and feed them to the streams. +int VideoStreamPlaybackTheora::feed_pages() { + int pages = 0; + ogg_page og; + + while (pages == 0) { + while (ogg_sync_pageout(&oy, &og) > 0) { + queue_page(&og); + pages++; + } + if (pages == 0) { + int bytes = buffer_data(); + if (bytes == 0) { + break; + } + } + } + + return pages; +} + +// Seek the video and audio streams simultaneously to find the granulepos where we should start decoding. +// It will return the position where we should start reading pages, and the video and audio granulepos. +int64_t VideoStreamPlaybackTheora::seek_streams(double p_time, int64_t &cur_video_granulepos, int64_t &cur_audio_granulepos) { + // Backtracking less than this is probably a waste of time. + const int64_t min_seek = 512 * 1024; + int64_t target_video_granulepos; + int64_t target_audio_granulepos; + double target_time = 0; + int64_t seek_pos; + + // Make a guess where we should start reading in the file, and scan from there. + // We base the guess on the mean bitrate of the streams. It would be theoretically faster to use the bisect method but + // in practice there's a lot of linear scanning to do to find the right pages. + // We want to catch the previous keyframe to the seek time. Since we only know the max GOP, we use that. + if (p_time == -1) { // This is a special case to find the last packets and calculate the video length. + seek_pos = MAX(stream_data_size - min_seek, stream_data_offset); + target_video_granulepos = INT64_MAX; + target_audio_granulepos = INT64_MAX; + } else { + int64_t video_frame = (int64_t)(p_time / frame_duration); + target_video_granulepos = MAX(1LL, video_frame - (1LL << ti.keyframe_granule_shift)) << ti.keyframe_granule_shift; + target_audio_granulepos = 0; + seek_pos = MAX(((target_video_granulepos >> ti.keyframe_granule_shift) - 1) * frame_duration * stream_data_size / stream_length, stream_data_offset); + target_time = th_granule_time(td, target_video_granulepos); + if (has_audio) { + target_audio_granulepos = video_frame * frame_duration * vi.rate; + target_time = MIN(target_time, vorbis_granule_time(&vd, target_audio_granulepos)); + } + } + + int64_t video_seek_pos = seek_pos; + int64_t audio_seek_pos = seek_pos; + double backtrack_time = 0; + bool video_catch = false; + bool audio_catch = false; + int64_t last_video_granule_seek_pos = seek_pos; + int64_t last_audio_granule_seek_pos = seek_pos; + + cur_video_granulepos = -1; + cur_audio_granulepos = -1; + + while (!video_catch || (has_audio && !audio_catch)) { // Backtracking loop + if (seek_pos < stream_data_offset) { + seek_pos = stream_data_offset; + } + file->seek(seek_pos); + ogg_sync_reset(&oy); + + backtrack_time = 0; + last_video_granule_seek_pos = seek_pos; + last_audio_granule_seek_pos = seek_pos; + while (!video_catch || (has_audio && !audio_catch)) { // Page scanning loop + ogg_page page; + uint64_t last_seek_pos = file->get_position() - oy.fill + oy.returned; + int ret = read_page(&page); + if (ret <= 0) { // End of file. + if (seek_pos < stream_data_offset) { // We've already searched the whole file + return -1; + } + seek_pos -= min_seek; + break; + } + int64_t cur_granulepos = ogg_page_granulepos(&page); + if (cur_granulepos >= 0) { + int page_serialno = ogg_page_serialno(&page); + if (!video_catch && page_serialno == to.serialno) { + if (cur_granulepos >= target_video_granulepos) { + video_catch = true; + if (cur_video_granulepos < 0) { + // Adding 1s helps catching the start of the page and avoids backtrack_time = 0. + backtrack_time = MAX(backtrack_time, 1 + th_granule_time(td, cur_granulepos) - target_time); + } + } else { + video_seek_pos = last_video_granule_seek_pos; + cur_video_granulepos = cur_granulepos; + } + last_video_granule_seek_pos = last_seek_pos; + } + if ((has_audio && !audio_catch) && page_serialno == vo.serialno) { + if (cur_granulepos >= target_audio_granulepos) { + audio_catch = true; + if (cur_audio_granulepos < 0) { + // Adding 1s helps catching the start of the page and avoids backtrack_time = 0. + backtrack_time = MAX(backtrack_time, 1 + vorbis_granule_time(&vd, cur_granulepos) - target_time); + } + } else { + audio_seek_pos = last_audio_granule_seek_pos; + cur_audio_granulepos = cur_granulepos; + } + last_audio_granule_seek_pos = last_seek_pos; + } + } + } + if (backtrack_time > 0) { + if (seek_pos <= stream_data_offset) { + break; + } + int64_t delta_seek = MAX(backtrack_time * stream_data_size / stream_length, min_seek); + seek_pos -= delta_seek; + } + video_catch = cur_video_granulepos != -1; + audio_catch = cur_audio_granulepos != -1; + } + + if (cur_video_granulepos < (1LL << ti.keyframe_granule_shift)) { + video_seek_pos = stream_data_offset; + cur_video_granulepos = 1LL << ti.keyframe_granule_shift; + } + if (has_audio) { + if (cur_audio_granulepos == -1) { + audio_seek_pos = stream_data_offset; + cur_audio_granulepos = 0; + } + seek_pos = MIN(video_seek_pos, audio_seek_pos); + } else { + seek_pos = video_seek_pos; + } + + return seek_pos; +} + void VideoStreamPlaybackTheora::video_write(th_ycbcr_buffer yuv) { uint8_t *w = frame_data.ptrw(); char *dst = (char *)w; @@ -77,83 +248,53 @@ void VideoStreamPlaybackTheora::video_write(th_ycbcr_buffer yuv) { Ref img; img.instantiate(region.size.x, region.size.y, false, Image::FORMAT_RGBA8, frame_data); //zero copy image creation - texture->update(img); //zero copy send to rendering server + texture->update(img); // Zero-copy send to rendering server. } void VideoStreamPlaybackTheora::clear() { - if (file.is_null()) { - return; + if (!file.is_null()) { + file.unref(); } - - if (vorbis_p) { - ogg_stream_clear(&vo); - if (vorbis_p >= 3) { - vorbis_block_clear(&vb); - vorbis_dsp_clear(&vd); - } + if (has_audio) { + vorbis_block_clear(&vb); + vorbis_dsp_clear(&vd); vorbis_comment_clear(&vc); vorbis_info_clear(&vi); - vorbis_p = 0; + ogg_stream_clear(&vo); + if (audio_buffer_size) { + memdelete_arr(audio_buffer); + } } - if (theora_p) { - ogg_stream_clear(&to); + if (has_video) { th_decode_free(td); th_comment_clear(&tc); th_info_clear(&ti); - theora_p = 0; + ogg_stream_clear(&to); + ogg_sync_clear(&oy); } - ogg_sync_clear(&oy); - theora_p = 0; - vorbis_p = 0; - next_frame_time = 0; - current_frame_time = 0; + audio_buffer = nullptr; + playing = false; + has_video = false; + has_audio = false; theora_eos = false; vorbis_eos = false; - video_ready = false; - video_done = false; - audio_done = false; - - file.unref(); - playing = false; } -void VideoStreamPlaybackTheora::set_file(const String &p_file) { - ERR_FAIL_COND(playing); +void VideoStreamPlaybackTheora::find_streams(th_setup_info *&ts) { + ogg_stream_state test; ogg_packet op; - th_setup_info *ts = nullptr; - - file_name = p_file; - file = FileAccess::open(p_file, FileAccess::READ); - ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'."); - - ogg_sync_init(&oy); - - /* init supporting Vorbis structures needed in header parsing */ - vorbis_info_init(&vi); - vorbis_comment_init(&vc); - - /* init supporting Theora structures needed in header parsing */ - th_comment_init(&tc); - th_info_init(&ti); - - theora_eos = false; - vorbis_eos = false; - - /* Ogg file open; parse the headers */ - /* Only interested in Vorbis/Theora streams */ + ogg_page og; int stateflag = 0; - int audio_track_skip = audio_track; + /* Only interested in Vorbis/Theora streams */ while (!stateflag) { int ret = buffer_data(); - if (ret == 0) { + if (!ret) { break; } while (ogg_sync_pageout(&oy, &og) > 0) { - ogg_stream_state test; - /* is this a mandated initial header? If not, stop parsing */ if (!ogg_page_bos(&og)) { /* don't leak the page; get it into the appropriate stream */ @@ -167,11 +308,11 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) { ogg_stream_packetout(&test, &op); /* identify the codec: try theora */ - if (!theora_p && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) { + if (!has_video && th_decode_headerin(&ti, &tc, &ts, &op) >= 0) { /* it is theora */ memcpy(&to, &test, sizeof(test)); - theora_p = 1; - } else if (!vorbis_p && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) { + has_video = true; + } else if (!has_audio && vorbis_synthesis_headerin(&vi, &vc, &op) >= 0) { /* it is vorbis */ if (audio_track_skip) { vorbis_info_clear(&vi); @@ -179,141 +320,165 @@ void VideoStreamPlaybackTheora::set_file(const String &p_file) { ogg_stream_clear(&test); vorbis_info_init(&vi); vorbis_comment_init(&vc); - audio_track_skip--; } else { memcpy(&vo, &test, sizeof(test)); - vorbis_p = 1; + has_audio = true; } } else { /* whatever it is, we don't care about it */ ogg_stream_clear(&test); } } - /* fall through to non-bos page parsing */ } +} + +void VideoStreamPlaybackTheora::read_headers(th_setup_info *&ts) { + ogg_packet op; + int theora_header_packets = 1; + int vorbis_header_packets = 1; /* we're expecting more header packets. */ - while ((theora_p && theora_p < 3) || (vorbis_p && vorbis_p < 3)) { - int ret = 0; - + while (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) { /* look for further theora headers */ - if (theora_p && theora_p < 3) { - ret = ogg_stream_packetout(&to, &op); - } - while (theora_p && theora_p < 3 && ret) { - if (ret < 0) { - fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n"); - clear(); - return; + // The API says there can be more than three but only three are mandatory. + while (theora_header_packets < 3 && ogg_stream_packetout(&to, &op) > 0) { + if (th_decode_headerin(&ti, &tc, &ts, &op) > 0) { + theora_header_packets++; } - if (!th_decode_headerin(&ti, &tc, &ts, &op)) { - fprintf(stderr, "Error parsing Theora stream headers; corrupt stream?\n"); - clear(); - return; - } - ret = ogg_stream_packetout(&to, &op); - theora_p++; } /* look for more vorbis header packets */ - if (vorbis_p && vorbis_p < 3) { - ret = ogg_stream_packetout(&vo, &op); + while (has_audio && vorbis_header_packets < 3 && ogg_stream_packetout(&vo, &op) > 0) { + if (!vorbis_synthesis_headerin(&vi, &vc, &op)) { + vorbis_header_packets++; + } } - while (vorbis_p && vorbis_p < 3 && ret) { - if (ret < 0) { - fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n"); - clear(); - return; - } - ret = vorbis_synthesis_headerin(&vi, &vc, &op); - if (ret) { - fprintf(stderr, "Error parsing Vorbis stream headers; corrupt stream?\n"); - clear(); - return; - } - vorbis_p++; - if (vorbis_p == 3) { + + /* The header pages/packets will arrive before anything else we care about, or the stream is not obeying spec */ + if (theora_header_packets < 3 || (has_audio && vorbis_header_packets < 3)) { + ogg_page page; + if (read_page(&page)) { + queue_page(&page); + } else { + fprintf(stderr, "End of file while searching for codec headers.\n"); break; } - ret = ogg_stream_packetout(&vo, &op); } + } - /* The header pages/packets will arrive before anything else we - care about, or the stream is not obeying spec */ + has_video = theora_header_packets == 3; + has_audio = vorbis_header_packets == 3; +} - if (ogg_sync_pageout(&oy, &og) > 0) { - queue_page(&og); /* demux into the appropriate stream */ - } else { - int ret2 = buffer_data(); /* someone needs more data */ - if (ret2 == 0) { - fprintf(stderr, "End of file while searching for codec headers.\n"); - clear(); - return; - } +void VideoStreamPlaybackTheora::set_file(const String &p_file) { + ERR_FAIL_COND(playing); + th_setup_info *ts = nullptr; + + clear(); + + file = FileAccess::open(p_file, FileAccess::READ); + ERR_FAIL_COND_MSG(file.is_null(), "Cannot open file '" + p_file + "'."); + + file_name = p_file; + + ogg_sync_init(&oy); + + /* init supporting Vorbis structures needed in header parsing */ + vorbis_info_init(&vi); + vorbis_comment_init(&vc); + + /* init supporting Theora structures needed in header parsing */ + th_comment_init(&tc); + th_info_init(&ti); + + /* Zero stream state structs so they can be checked later. */ + memset(&to, 0, sizeof(to)); + memset(&vo, 0, sizeof(vo)); + + /* Ogg file open; parse the headers */ + find_streams(ts); + read_headers(ts); + + if (!has_audio) { + vorbis_comment_clear(&vc); + vorbis_info_clear(&vi); + if (!ogg_stream_check(&vo)) { + ogg_stream_clear(&vo); } } + // One video stream is mandatory. + if (!has_video) { + th_setup_free(ts); + th_comment_clear(&tc); + th_info_clear(&ti); + if (!ogg_stream_check(&to)) { + ogg_stream_clear(&to); + } + file.unref(); + return; + } + /* And now we have it all. Initialize decoders. */ - if (theora_p) { - td = th_decode_alloc(&ti, ts); - px_fmt = ti.pixel_fmt; - switch (ti.pixel_fmt) { - case TH_PF_420: - //printf(" 4:2:0 video\n"); - break; - case TH_PF_422: - //printf(" 4:2:2 video\n"); - break; - case TH_PF_444: - //printf(" 4:4:4 video\n"); - break; - case TH_PF_RSVD: - default: - printf(" video\n (UNKNOWN Chroma sampling!)\n"); - break; - } - th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max, - sizeof(pp_level_max)); - pp_level = 0; - th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level)); - pp_inc = 0; - - size.x = ti.frame_width; - size.y = ti.frame_height; - region.position.x = ti.pic_x; - region.position.y = ti.pic_y; - region.size.x = ti.pic_width; - region.size.y = ti.pic_height; - - Ref img = Image::create_empty(region.size.x, region.size.y, false, Image::FORMAT_RGBA8); - texture->set_image(img); - frame_data.resize(region.size.x * region.size.y * 4); - - frame_duration = (double)ti.fps_denominator / ti.fps_numerator; - } else { - /* tear down the partial theora setup */ - th_info_clear(&ti); - th_comment_clear(&tc); - } - + td = th_decode_alloc(&ti, ts); th_setup_free(ts); + px_fmt = ti.pixel_fmt; + switch (ti.pixel_fmt) { + case TH_PF_420: + case TH_PF_422: + case TH_PF_444: + break; + default: + WARN_PRINT(" video\n (UNKNOWN Chroma sampling!)\n"); + break; + } + th_decode_ctl(td, TH_DECCTL_GET_PPLEVEL_MAX, &pp_level_max, sizeof(pp_level_max)); + pp_level = 0; + th_decode_ctl(td, TH_DECCTL_SET_PPLEVEL, &pp_level, sizeof(pp_level)); + pp_inc = 0; - if (vorbis_p) { + size.x = ti.frame_width; + size.y = ti.frame_height; + region.position.x = ti.pic_x; + region.position.y = ti.pic_y; + region.size.x = ti.pic_width; + region.size.y = ti.pic_height; + + Ref img = Image::create_empty(region.size.x, region.size.y, false, Image::FORMAT_RGBA8); + texture->set_image(img); + frame_data.resize(region.size.x * region.size.y * 4); + + frame_duration = (double)ti.fps_denominator / ti.fps_numerator; + + if (has_audio) { vorbis_synthesis_init(&vd, &vi); vorbis_block_init(&vd, &vb); - //_setup(vi.channels, vi.rate); - } else { - /* tear down the partial vorbis setup */ - vorbis_info_clear(&vi); - vorbis_comment_clear(&vc); + audio_buffer_size = MIN(vi.channels, 8) * 1024; + audio_buffer = memnew_arr(float, audio_buffer_size); } - playing = false; - buffering = true; - time = 0; - video_done = !theora_p; - audio_done = !vorbis_p; + stream_data_offset = file->get_position() - oy.fill + oy.returned; + stream_data_size = file->get_length() - stream_data_offset; + + // Sync to last page to find video length. + int64_t seek_pos = MAX(stream_data_offset, (int64_t)file->get_length() - 64 * 1024); + int64_t video_granulepos = INT64_MAX; + int64_t audio_granulepos = INT64_MAX; + file->seek(seek_pos); + seek_pos = seek_streams(-1, video_granulepos, audio_granulepos); + file->seek(seek_pos); + ogg_sync_reset(&oy); + + stream_length = 0; + ogg_page page; + while (read_page(&page) > 0) { + // Use MAX because, even though pages are ordered, page time can be -1 + // for pages without full frames. Streams could be truncated too. + stream_length = MAX(stream_length, get_page_time(&page)); + } + + seek(0); } double VideoStreamPlaybackTheora::get_time() const { @@ -346,28 +511,32 @@ void VideoStreamPlaybackTheora::update(double p_delta) { ogg_packet op; while (!audio_ready && !audio_done) { + // Send remaining frames + if (!send_audio()) { + audio_ready = true; + break; + } + float **pcm; int ret = vorbis_synthesis_pcmout(&vd, &pcm); if (ret > 0) { - const int AUXBUF_LEN = 4096; - int to_read = ret; - float aux_buffer[AUXBUF_LEN]; - while (to_read) { - int m = MIN(AUXBUF_LEN / vi.channels, to_read); + int frames_read = 0; + while (frames_read < ret) { + int m = MIN(audio_buffer_size / vi.channels, ret - frames_read); int count = 0; for (int j = 0; j < m; j++) { for (int i = 0; i < vi.channels; i++) { - aux_buffer[count++] = pcm[i][j]; + audio_buffer[count++] = pcm[i][frames_read + j]; } } - int mixed = mix_callback(mix_udata, aux_buffer, m); - to_read -= mixed; - if (mixed != m) { //could mix no more + frames_read += m; + audio_ptr_end = m; + if (!send_audio()) { audio_ready = true; break; } } - vorbis_synthesis_read(&vd, ret - to_read); + vorbis_synthesis_read(&vd, frames_read); } else { /* no pending audio; is there a pending packet to decode? */ if (ogg_stream_packetout(&vo, &op) > 0) { @@ -383,17 +552,10 @@ void VideoStreamPlaybackTheora::update(double p_delta) { while (!video_ready && !video_done) { if (ogg_stream_packetout(&to, &op) > 0) { - /*HACK: This should be set after a seek or a gap, but we might not have - a granulepos for the first packet (we only have them for the last - packet on a page), so we just set it as often as we get it. - To do this right, we should back-track from the last packet on the - page and compute the correct granulepos for the first packet after - a seek or a gap.*/ if (op.granulepos >= 0) { - th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, - sizeof(op.granulepos)); + th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, sizeof(op.granulepos)); } - ogg_int64_t videobuf_granulepos; + int64_t videobuf_granulepos; int ret = th_decode_packetin(td, &op, &videobuf_granulepos); if (ret == 0 || ret == TH_DUPFRAME) { next_frame_time = th_granule_time(td, videobuf_granulepos); @@ -412,12 +574,8 @@ void VideoStreamPlaybackTheora::update(double p_delta) { } if (!video_ready || !audio_ready) { - int ret = buffer_data(); - if (ret > 0) { - while (ogg_sync_pageout(&oy, &og) > 0) { - queue_page(&og); - } - } else { + int ret = feed_pages(); + if (ret == 0) { vorbis_eos = true; theora_eos = true; break; @@ -452,10 +610,8 @@ void VideoStreamPlaybackTheora::update(double p_delta) { } void VideoStreamPlaybackTheora::play() { - if (!playing) { - time = 0; - } else { - stop(); + if (playing) { + return; } playing = true; @@ -464,12 +620,8 @@ void VideoStreamPlaybackTheora::play() { } void VideoStreamPlaybackTheora::stop() { - if (playing) { - clear(); - set_file(file_name); //reset - } playing = false; - time = 0; + seek(0); } bool VideoStreamPlaybackTheora::is_playing() const { @@ -485,7 +637,7 @@ bool VideoStreamPlaybackTheora::is_paused() const { } double VideoStreamPlaybackTheora::get_length() const { - return 0; + return stream_length; } double VideoStreamPlaybackTheora::get_playback_position() const { @@ -493,7 +645,123 @@ double VideoStreamPlaybackTheora::get_playback_position() const { } void VideoStreamPlaybackTheora::seek(double p_time) { - WARN_PRINT_ONCE("Seeking in Theora videos is not implemented yet (it's only supported for GDExtension-provided video streams)."); + if (file.is_null()) { + return; + } + if (p_time >= stream_length) { + return; + } + + video_ready = false; + next_frame_time = 0; + current_frame_time = -1; + dup_frame = false; + video_done = false; + audio_done = !has_audio; + theora_eos = false; + vorbis_eos = false; + audio_ptr_start = 0; + audio_ptr_end = 0; + + ogg_stream_reset(&to); + if (has_audio) { + ogg_stream_reset(&vo); + vorbis_synthesis_restart(&vd); + } + + int64_t seek_pos; + int64_t video_granulepos; + int64_t audio_granulepos; + // Find the granules we need so we can start playing at the seek time. + seek_pos = seek_streams(p_time, video_granulepos, audio_granulepos); + if (seek_pos < 0) { + return; + } + file->seek(seek_pos); + ogg_sync_reset(&oy); + + time = p_time; + + double last_audio_time = 0; + double last_video_time = 0; + bool first_frame_decoded = false; + bool start_audio = (audio_granulepos == 0); + bool start_video = (video_granulepos == (1LL << ti.keyframe_granule_shift)); + bool keyframe_found = false; + uint64_t current_frame = 0; + + // Read from the streams skipping pages until we reach the granules we want. We won't skip pages from both video and + // audio streams, only one of them, until decoding of both starts. + // video_granulepos and audio_granulepos are guaranteed to be found by checking the granulepos in the packets, no + // need to keep track of packets with granulepos == -1 until decoding starts. + while ((has_audio && last_audio_time < p_time) || (last_video_time <= p_time)) { + ogg_packet op; + if (feed_pages() == 0) { + break; + } + while (has_audio && last_audio_time < p_time && ogg_stream_packetout(&vo, &op) > 0) { + if (start_audio) { + if (vorbis_synthesis(&vb, &op) == 0) { /* test for success! */ + vorbis_synthesis_blockin(&vd, &vb); + float **pcm; + int samples_left = ceil((p_time - last_audio_time) * vi.rate); + int samples_read = vorbis_synthesis_pcmout(&vd, &pcm); + int samples_consumed = MIN(samples_left, samples_read); + vorbis_synthesis_read(&vd, samples_consumed); + last_audio_time += (double)samples_consumed / vi.rate; + } + } else if (op.granulepos >= audio_granulepos) { + last_audio_time = vorbis_granule_time(&vd, op.granulepos); + // Start tracking audio now. This won't produce any samples but will update the decoder state. + if (vorbis_synthesis_trackonly(&vb, &op) == 0) { + vorbis_synthesis_blockin(&vd, &vb); + } + start_audio = true; + } + } + while (last_video_time <= p_time && ogg_stream_packetout(&to, &op) > 0) { + if (!start_video && (op.granulepos >= video_granulepos || video_granulepos == (1LL << ti.keyframe_granule_shift))) { + if (op.granulepos > 0) { + current_frame = th_granule_frame(td, op.granulepos); + } + start_video = true; + } + // Don't start decoding until a keyframe is found, but count frames. + if (start_video) { + if (!keyframe_found && th_packet_iskeyframe(&op)) { + keyframe_found = true; + int64_t cur_granulepos = (current_frame + 1) << ti.keyframe_granule_shift; + th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &cur_granulepos, sizeof(cur_granulepos)); + } + if (keyframe_found) { + int64_t videobuf_granulepos; + if (op.granulepos >= 0) { + th_decode_ctl(td, TH_DECCTL_SET_GRANPOS, &op.granulepos, sizeof(op.granulepos)); + } + int ret = th_decode_packetin(td, &op, &videobuf_granulepos); + if (ret == 0 || ret == TH_DUPFRAME) { + last_video_time = th_granule_time(td, videobuf_granulepos); + first_frame_decoded = true; + } + } else { + current_frame++; + } + } + } + } + + if (first_frame_decoded) { + if (is_playing()) { + // Draw the current frame. + th_ycbcr_buffer yuv; + th_decode_ycbcr_out(td, yuv); + video_write(yuv); + current_frame_time = last_video_time; + } else { + next_frame_time = current_frame_time; + video_ready = true; + } + } } int VideoStreamPlaybackTheora::get_channels() const { diff --git a/modules/theora/video_stream_theora.h b/modules/theora/video_stream_theora.h index 3e1b33dda1a..46cb88b0505 100644 --- a/modules/theora/video_stream_theora.h +++ b/modules/theora/video_stream_theora.h @@ -51,8 +51,19 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback { Point2i size; Rect2i region; + float *audio_buffer = nullptr; + int audio_buffer_size = 0; + int audio_ptr_start = 0; + int audio_ptr_end = 0; + int buffer_data(); int queue_page(ogg_page *page); + int read_page(ogg_page *page); + int feed_pages(); + double get_page_time(ogg_page *page); + int64_t seek_streams(double p_time, int64_t &video_granulepos, int64_t &audio_granulepos); + void find_streams(th_setup_info *&ts); + void read_headers(th_setup_info *&ts); void video_write(th_ycbcr_buffer yuv); double get_time() const; @@ -60,7 +71,6 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback { bool vorbis_eos = false; ogg_sync_state oy; - ogg_page og; ogg_stream_state vo; ogg_stream_state to; th_info ti; @@ -71,19 +81,21 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback { vorbis_block vb; vorbis_comment vc; th_pixel_fmt px_fmt; - double frame_duration; + double frame_duration = 0; + double stream_length = 0; + int64_t stream_data_offset = 0; + int64_t stream_data_size = 0; - int theora_p = 0; - int vorbis_p = 0; int pp_level_max = 0; int pp_level = 0; int pp_inc = 0; bool playing = false; - bool buffering = false; bool paused = false; bool dup_frame = false; + bool has_video = false; + bool has_audio = false; bool video_ready = false; bool video_done = false; bool audio_done = false; @@ -100,6 +112,20 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback { protected: void clear(); + _FORCE_INLINE_ bool send_audio() { + if (audio_ptr_end > 0) { + int mixed = mix_callback(mix_udata, &audio_buffer[audio_ptr_start * vi.channels], audio_ptr_end - audio_ptr_start); + audio_ptr_start += mixed; + if (audio_ptr_start == audio_ptr_end) { + audio_ptr_start = 0; + audio_ptr_end = 0; + } else { + return false; + } + } + return true; + } + public: virtual void play() override; virtual void stop() override; diff --git a/scene/gui/video_stream_player.cpp b/scene/gui/video_stream_player.cpp index 8e2d522a823..e98f5b254a3 100644 --- a/scene/gui/video_stream_player.cpp +++ b/scene/gui/video_stream_player.cpp @@ -339,7 +339,6 @@ void VideoStreamPlayer::play() { if (playback.is_null()) { return; } - playback->stop(); playback->play(); set_process_internal(true); last_audio_time = 0; @@ -468,7 +467,9 @@ double VideoStreamPlayer::get_stream_position() const { void VideoStreamPlayer::set_stream_position(double p_position) { if (playback.is_valid()) { + resampler.flush(); playback->seek(p_position); + last_audio_time = 0; } } diff --git a/servers/audio/audio_rb_resampler.cpp b/servers/audio/audio_rb_resampler.cpp index 94c3f0dd36c..a3351889d69 100644 --- a/servers/audio/audio_rb_resampler.cpp +++ b/servers/audio/audio_rb_resampler.cpp @@ -75,23 +75,37 @@ uint32_t AudioRBResampler::_resample(AudioFrame *p_dest, int p_todo, int32_t p_i p_dest[i] = AudioFrame(v0, v1); } - // This will probably never be used, but added anyway + // Downmix to stereo. Apply -3dB to center, and sides, -6dB to rear. + + // four channels - channel order: front left, front right, rear left, rear right if constexpr (C == 4) { - float v0 = rb[(pos << 2) + 0]; - float v1 = rb[(pos << 2) + 1]; - float v0n = rb[(pos_next << 2) + 0]; - float v1n = rb[(pos_next << 2) + 1]; + float v0 = rb[(pos << 2) + 0] + rb[(pos << 2) + 2] / 2; + float v1 = rb[(pos << 2) + 1] + rb[(pos << 2) + 3] / 2; + float v0n = rb[(pos_next << 2) + 0] + rb[(pos_next << 2) + 2] / 2; + float v1n = rb[(pos_next << 2) + 1] + rb[(pos_next << 2) + 3] / 2; v0 += (v0n - v0) * frac; v1 += (v1n - v1) * frac; p_dest[i] = AudioFrame(v0, v1); } + // six channels - channel order: front left, center, front right, rear left, rear right, LFE if constexpr (C == 6) { - float v0 = rb[(pos * 6) + 0]; - float v1 = rb[(pos * 6) + 1]; - float v0n = rb[(pos_next * 6) + 0]; - float v1n = rb[(pos_next * 6) + 1]; + float v0 = rb[(pos * 6) + 0] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 3] / 2; + float v1 = rb[(pos * 6) + 2] + rb[(pos * 6) + 1] / Math::SQRT2 + rb[(pos * 6) + 4] / 2; + float v0n = rb[(pos_next * 6) + 0] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 3] / 2; + float v1n = rb[(pos_next * 6) + 2] + rb[(pos_next * 6) + 1] / Math::SQRT2 + rb[(pos_next * 6) + 4] / 2; + v0 += (v0n - v0) * frac; + v1 += (v1n - v1) * frac; + p_dest[i] = AudioFrame(v0, v1); + } + // eight channels - channel order: front left, center, front right, side left, side right, rear left, rear + // right, LFE + if constexpr (C == 8) { + float v0 = rb[(pos << 3) + 0] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 3] / Math::SQRT2 + rb[(pos << 3) + 5] / 2; + float v1 = rb[(pos << 3) + 2] + rb[(pos << 3) + 1] / Math::SQRT2 + rb[(pos << 3) + 4] / Math::SQRT2 + rb[(pos << 3) + 6] / 2; + float v0n = rb[(pos_next << 3) + 0] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 3] / Math::SQRT2 + rb[(pos_next << 3) + 5] / 2; + float v1n = rb[(pos_next << 3) + 2] + rb[(pos_next << 3) + 1] / Math::SQRT2 + rb[(pos_next << 3) + 4] / Math::SQRT2 + rb[(pos_next << 3) + 6] / 2; v0 += (v0n - v0) * frac; v1 += (v1n - v1) * frac; p_dest[i] = AudioFrame(v0, v1); @@ -125,6 +139,9 @@ bool AudioRBResampler::mix(AudioFrame *p_dest, int p_frames) { case 6: src_read = _resample<6>(p_dest, target_todo, increment); break; + case 8: + src_read = _resample<8>(p_dest, target_todo, increment); + break; } if (src_read > read_space) { @@ -159,7 +176,7 @@ int AudioRBResampler::get_num_of_ready_frames() { } Error AudioRBResampler::setup(int p_channels, int p_src_mix_rate, int p_target_mix_rate, int p_buffer_msec, int p_minbuff_needed) { - ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6, ERR_INVALID_PARAMETER); + ERR_FAIL_COND_V(p_channels != 1 && p_channels != 2 && p_channels != 4 && p_channels != 6 && p_channels != 8, ERR_INVALID_PARAMETER); int desired_rb_bits = nearest_shift(MAX((p_buffer_msec / 1000.0) * p_src_mix_rate, p_minbuff_needed)); diff --git a/servers/audio/audio_rb_resampler.h b/servers/audio/audio_rb_resampler.h index dd396c8bbb7..2acbc44097f 100644 --- a/servers/audio/audio_rb_resampler.h +++ b/servers/audio/audio_rb_resampler.h @@ -152,6 +152,19 @@ public: wp = (wp + 1) & rb_mask; } } break; + case 8: { + for (uint32_t i = 0; i < p_frames; i++) { + rb[(wp << 3) + 0] = read_buf[(i << 3) + 0]; + rb[(wp << 3) + 1] = read_buf[(i << 3) + 1]; + rb[(wp << 3) + 2] = read_buf[(i << 3) + 2]; + rb[(wp << 3) + 3] = read_buf[(i << 3) + 3]; + rb[(wp << 3) + 4] = read_buf[(i << 3) + 4]; + rb[(wp << 3) + 5] = read_buf[(i << 3) + 5]; + rb[(wp << 3) + 6] = read_buf[(i << 3) + 6]; + rb[(wp << 3) + 7] = read_buf[(i << 3) + 7]; + wp = (wp + 1) & rb_mask; + } + } break; } rb_write_pos.set(wp);