From afe36454574f173599de4644dc61a2bd7341d354 Mon Sep 17 00:00:00 2001
From: arch1t3cht <arch1t3cht@gmail.com>
Date: Sat, 6 Apr 2024 22:08:20 +0200
Subject: [PATCH] Don't default to UseDTS for mpegts

Presentation timestamps seem to work fine for most files in
these containers (and replacing PTS with DTS can break predictions)
so wait until we encounter a packet with no PTS (which does
happen sometimes) before reverting to UseDTS.

On the other hand, seeking by PTS is unreliable in mpegts (since
it's just lavf's generic internal seek function), so default to
SeekByPos there.

I don't have an mpegtsraw test file on hand but as far as I can see that
should get the same treatment. For nuv I don't really know, so I'll
leave it as it is.
---
 src/core/indexing.cpp    | 14 ++++++++++----
 src/core/track.cpp       | 16 ++++++++++++----
 src/core/track.h         |  7 +++++--
 src/core/videosource.cpp |  2 ++
 4 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/src/core/indexing.cpp b/src/core/indexing.cpp
index 79c0e2a344..1ac07861e6 100644
--- a/src/core/indexing.cpp
+++ b/src/core/indexing.cpp
@@ -71,6 +71,10 @@ void FFMS_Index::Finalize(std::vector<SharedAVContext> const& video_contexts, co
     for (size_t i = 0, end = size(); i != end; ++i) {
         FFMS_Track& track = (*this)[i];
 
+        if (!strcmp(Format, "mpeg") || !strcmp(Format, "mpegts") || !strcmp(Format, "mpegtsraw"))
+            if (std::any_of(track.begin(), track.end(), [](FrameInfo F) { return F.PTS == AV_NOPTS_VALUE; }))
+                track.RevertToDTS();
+
         // Some audio tracks are simply insane junk (seen with als) and will have a single(?) super long packet and
         // apart from that look legit and be chosen instead of usable audio. This hopefully rejects some of it.
         // Caused by sample in https://github.com/FFMS/ffms2/issues/351
@@ -85,6 +89,7 @@ void FFMS_Index::Finalize(std::vector<SharedAVContext> const& video_contexts, co
         // but may also have valid, split packets, with pos equal to the previous pos.
         if (video_contexts[i].CodecContext && video_contexts[i].CodecContext->codec_id == AV_CODEC_ID_H264 && !!strcmp(Format, "asf"))
             track.MaybeHideFrames();
+
         track.FinalizeTrack();
 
         if (track.TT != FFMS_TYPE_VIDEO) continue;
@@ -430,7 +435,8 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
     std::vector<SharedAVContext> AVContexts(FormatContext->nb_streams);
 
     auto TrackIndices = std::unique_ptr<FFMS_Index>(new FFMS_Index(Filesize, Digest, ErrorHandling, LAVFOpts));
-    bool UseDTS = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw") || !strcmp(FormatContext->iformat->name, "nuv");
+    bool UseDTS = !strcmp(FormatContext->iformat->name, "nuv");
+    bool IsMpegLike = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw");
 
     for (unsigned int i = 0; i < FormatContext->nb_streams; i++) {
         TrackIndices->emplace_back((int64_t)FormatContext->streams[i]->time_base.num * 1000,
@@ -527,7 +533,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
 
         if (FormatContext->streams[Track]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
             int64_t PTS = TrackInfo.UseDTS ? Packet->dts : Packet->pts;
-            if (PTS == AV_NOPTS_VALUE) {
+            if (PTS == AV_NOPTS_VALUE && !IsMpegLike) {
                 // VPx alt-refs are output as packets which lack timestmps or durations, since
                 // they are invisible. Currently, the timestamp mangling code in libavformat
                 // will sometimes add a bogus timestamp and duration, if the webm in question
@@ -558,7 +564,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
             bool SecondField = false;
             ParseVideoPacket(AVContexts[Track], Packet, &RepeatPict, &FrameType, &Invisible, &SecondField, &LastPicStruct);
 
-            TrackInfo.AddVideoFrame(PTS, RepeatPict, KeyFrame,
+            TrackInfo.AddVideoFrame(PTS, Packet->dts, RepeatPict, KeyFrame,
                 FrameType, Packet->pos, Invisible, SecondField);
         } else if (FormatContext->streams[Track]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
             // For video seeking timestamps are used only if all packets have
@@ -571,7 +577,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
             uint32_t SampleCount = IndexAudioPacket(Track, Packet, AVContexts[Track], *TrackIndices);
             TrackInfo.SampleRate = AVContexts[Track].CodecContext->sample_rate;
 
-            TrackInfo.AddAudioFrame(LastValidTS[Track],
+            TrackInfo.AddAudioFrame(LastValidTS[Track], Packet->dts,
                 StartSample, SampleCount, KeyFrame, Packet->pos, Packet->flags & AV_PKT_FLAG_DISCARD);
         }
 
diff --git a/src/core/track.cpp b/src/core/track.cpp
index 0e1a85b93d..604e1493df 100644
--- a/src/core/track.cpp
+++ b/src/core/track.cpp
@@ -131,14 +131,14 @@ void FFMS_Track::Write(ZipFile &stream) const {
         WriteFrame(stream, Frames[i], i == 0 ? temp : Frames[i - 1], TT);
 }
 
-void FFMS_Track::AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool MarkedHidden, bool SecondField) {
-    Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, 0, FrameType, RepeatPict, KeyFrame, MarkedHidden, SecondField });
+void FFMS_Track::AddVideoFrame(int64_t PTS, int64_t DTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool MarkedHidden, bool SecondField) {
+    Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, 0, FrameType, RepeatPict, KeyFrame, MarkedHidden, SecondField, DTS });
 }
 
-void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool MarkedHidden) {
+void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t DTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool MarkedHidden) {
     if (SampleCount > 0) {
         Data->Frames.push_back({ PTS, 0, FilePos, SampleStart, SampleCount,
-            0, 0, 0, 0, KeyFrame, MarkedHidden, false });
+            0, 0, 0, 0, KeyFrame, MarkedHidden, false, DTS });
     }
 }
 
@@ -248,6 +248,14 @@ void FFMS_Track::MaybeReorderFrames() {
     }
 }
 
+void FFMS_Track::RevertToDTS() {
+    frame_vec &Frames = Data->Frames;
+    for (size_t i = 0; i < size(); ++i)
+        Frames[i].PTS = Frames[i].DTS;
+
+    UseDTS = true;
+}
+
 void FFMS_Track::MaybeHideFrames() {
     frame_vec &Frames = Data->Frames;
     // Awful handling for interlaced H.264: each frame is output twice, so hide
diff --git a/src/core/track.h b/src/core/track.h
index 3e2111a252..dde1321881 100644
--- a/src/core/track.h
+++ b/src/core/track.h
@@ -43,6 +43,8 @@ struct FrameInfo {
     bool MarkedHidden;
     bool SecondField;
 
+    int64_t DTS;        // Only used during indexing and not stored in the index file. (If UseDTS is true, the PTS values will be DTS)
+
     // If true, no frame corresponding to this packet will be output
     constexpr bool Skipped() const { return MarkedHidden || SecondField; }
 };
@@ -71,9 +73,10 @@ struct FFMS_Track {
     int64_t LastDuration = 0;
     int SampleRate = 0; // not persisted
 
-    void AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos = 0, bool Invisible = false, bool SecondField = false);
-    void AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos = 0, bool Invisible = false);
+    void AddVideoFrame(int64_t PTS, int64_t DTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos = 0, bool Invisible = false, bool SecondField = false);
+    void AddAudioFrame(int64_t PTS, int64_t DTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos = 0, bool Invisible = false);
 
+    void RevertToDTS();
     void MaybeHideFrames();
     void FinalizeTrack();
     void FillAudioGaps();
diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp
index f148a944e8..ee60c8030c 100644
--- a/src/core/videosource.cpp
+++ b/src/core/videosource.cpp
@@ -279,6 +279,8 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in
                 Delay.ThreadDelay = CodecContext->thread_count - 1;
         }
 
+        SeekByPos = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw");
+
         // Always try to decode a frame to make sure all required parameters are known
         int64_t DummyPTS = 0, DummyPos = 0;
         DecodeNextFrame(DummyPTS, DummyPos);