From b5f2934ad6b56bf903156a34d00a699b3cf50ecc Mon Sep 17 00:00:00 2001 From: Quentin Renard Date: Sun, 1 Dec 2024 16:18:53 +0100 Subject: [PATCH] Added software resample context --- README.md | 3 +- astiav_test.go | 127 ++++----- class_test.go | 4 +- examples/resampling_audio/main.go | 295 ++++++++++++++++++++ examples/{scaling => scaling_video}/main.go | 0 software_resample_context.go | 58 ++++ software_resample_context_test.go | 45 +++ 7 files changed, 467 insertions(+), 65 deletions(-) create mode 100644 examples/resampling_audio/main.go rename examples/{scaling => scaling_video}/main.go (100%) create mode 100644 software_resample_context.go create mode 100644 software_resample_context_test.go diff --git a/README.md b/README.md index 3321da3..d0fe124 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,8 @@ Examples are located in the [examples](examples) directory and mirror as much as |Hardware Decoding|[see](examples/hardware_decoding/main.go)|[see](https://github.com/FFmpeg/FFmpeg/blob/n7.0/doc/examples/hw_decode.c) |Hardware Encoding|[see](examples/hardware_encoding/main.go)|[see](https://github.com/FFmpeg/FFmpeg/blob/n7.0/doc/examples/vaapi_encode.c) |Remuxing|[see](examples/remuxing/main.go)|[see](https://github.com/FFmpeg/FFmpeg/blob/n7.0/doc/examples/remuxing.c) -|Scaling|[see](examples/scaling/main.go)|[see](https://github.com/FFmpeg/FFmpeg/blob/n7.0/doc/examples/scaling_video.c) +|Resampling audio|[see](examples/resampling_audio/main.go)|[see](https://github.com/FFmpeg/FFmpeg/blob/n7.0/doc/examples/resample_audio.c) +|Scaling video|[see](examples/scaling_video/main.go)|[see](https://github.com/FFmpeg/FFmpeg/blob/n7.0/doc/examples/scaling_video.c) |Transcoding|[see](examples/transcoding/main.go)|[see](https://github.com/FFmpeg/FFmpeg/blob/n7.0/doc/examples/transcoding.c) *Tip: you can use the video sample located in the `testdata` directory for your tests* diff --git a/astiav_test.go b/astiav_test.go index 64f7cb3..a613d0e 100644 --- a/astiav_test.go +++ b/astiav_test.go @@ -47,7 +47,11 @@ func (h *helper) close() { type helperInput struct { firstPkt *Packet formatContext *FormatContext - lastFrame *Frame + lastFrames map[MediaType]*Frame +} + +func newHelperInput() *helperInput { + return &helperInput{lastFrames: make(map[MediaType]*Frame)} } func (h *helper) inputFormatContext(name string, ifmt *InputFormat) (fc *FormatContext, err error) { @@ -78,7 +82,7 @@ func (h *helper) inputFormatContext(name string, ifmt *InputFormat) (fc *FormatC h.m.Lock() if _, ok := h.inputs[name]; !ok { - h.inputs[name] = &helperInput{} + h.inputs[name] = newHelperInput() } h.inputs[name].formatContext = fc h.m.Unlock() @@ -118,127 +122,124 @@ func (h *helper) inputFirstPacket(name string) (pkt *Packet, err error) { return } -func (h *helper) inputLastFrame(name string, mediaType MediaType, ifmt *InputFormat) (f *Frame, err error) { +func (h *helper) inputLastFrame(name string, mediaType MediaType, ifmt *InputFormat) (*Frame, error) { h.m.Lock() - i, ok := h.inputs[name] - if ok && i.lastFrame != nil { - h.m.Unlock() - return i.lastFrame, nil + if i, ok := h.inputs[name]; ok { + if len(i.lastFrames) > 0 { + f, ok := i.lastFrames[mediaType] + h.m.Unlock() + if ok { + return f, nil + } + return nil, fmt.Errorf("astiav_test: no last frame for media type %s", mediaType) + } } h.m.Unlock() - var fc *FormatContext - if fc, err = h.inputFormatContext(name, ifmt); err != nil { - err = fmt.Errorf("astiav_test: getting input format context failed: %w", err) - return + fc, err := h.inputFormatContext(name, ifmt) + if err != nil { + return nil, fmt.Errorf("astiav_test: getting input format context failed: %w", err) } - var cc *CodecContext - var cs *Stream - for _, s := range fc.Streams() { - if s.CodecParameters().MediaType() != mediaType { - continue + type stream struct { + cc *CodecContext + s *Stream + } + streams := make(map[int]*stream) + mediaTypeFound := false + for _, v := range fc.Streams() { + s := &stream{s: v} + streams[v.Index()] = s + + c := FindDecoder(v.CodecParameters().CodecID()) + if c == nil { + return nil, errors.New("astiav_test: no codec") } - cs = s + s.cc = AllocCodecContext(c) + if s.cc == nil { + return nil, errors.New("astiav_test: no codec context") + } + h.closer.Add(s.cc.Free) - c := FindDecoder(s.CodecParameters().CodecID()) - if c == nil { - err = errors.New("astiav_test: no codec") - return + if err = s.s.CodecParameters().ToCodecContext(s.cc); err != nil { + return nil, fmt.Errorf("astiav_test: updating codec context failed: %w", err) } - cc = AllocCodecContext(c) - if cc == nil { - err = errors.New("astiav_test: no codec context") - return + if err = s.cc.Open(c, nil); err != nil { + return nil, fmt.Errorf("astiav_test: opening codec context failed: %w", err) } - h.closer.Add(cc.Free) - if err = cs.CodecParameters().ToCodecContext(cc); err != nil { - err = fmt.Errorf("astiav_test: updating codec context failed: %w", err) - return + if _, ok := h.inputs[name].lastFrames[s.cc.MediaType()]; !ok { + h.inputs[name].lastFrames[s.cc.MediaType()] = AllocFrame() + h.closer.Add(h.inputs[name].lastFrames[s.cc.MediaType()].Free) } - if err = cc.Open(c, nil); err != nil { - err = fmt.Errorf("astiav_test: opening codec context failed: %w", err) - return + if s.cc.MediaType() == mediaType { + mediaTypeFound = true } - break } - if cs == nil { - err = errors.New("astiav_test: no valid video stream") - return + if !mediaTypeFound { + return nil, fmt.Errorf("astiav_test: no stream for media type %s", mediaType) } var pkt1 *Packet if pkt1, err = h.inputFirstPacket(name); err != nil { - err = fmt.Errorf("astiav_test: getting input first packet failed: %w", err) - return + return nil, fmt.Errorf("astiav_test: getting input first packet failed: %w", err) } pkt2 := AllocPacket() h.closer.Add(pkt2.Free) - f = AllocFrame() + f := AllocFrame() h.closer.Add(f.Free) - lastFrame := AllocFrame() - h.closer.Add(lastFrame.Free) - pkts := []*Packet{pkt1} for { if err = fc.ReadFrame(pkt2); err != nil { if errors.Is(err, ErrEof) || errors.Is(err, ErrEagain) { if len(pkts) == 0 { - if err = f.Ref(lastFrame); err != nil { - err = fmt.Errorf("astiav_test: last refing frame failed: %w", err) - return - } err = nil break } } else { - err = fmt.Errorf("astiav_test: reading frame failed: %w", err) - return + return nil, fmt.Errorf("astiav_test: reading frame failed: %w", err) } } else { pkts = append(pkts, pkt2) } for _, pkt := range pkts { - if pkt.StreamIndex() != cs.Index() { + s, ok := streams[pkt.StreamIndex()] + if !ok { continue } - if err = cc.SendPacket(pkt); err != nil { - err = fmt.Errorf("astiav_test: sending packet failed: %w", err) - return + if err = s.cc.SendPacket(pkt); err != nil { + return nil, fmt.Errorf("astiav_test: sending packet failed: %w", err) } for { - if err = cc.ReceiveFrame(f); err != nil { + if err = s.cc.ReceiveFrame(f); err != nil { if errors.Is(err, ErrEof) || errors.Is(err, ErrEagain) { err = nil break } - err = fmt.Errorf("astiav_test: receiving frame failed: %w", err) - return + return nil, fmt.Errorf("astiav_test: receiving frame failed: %w", err) } - if err = lastFrame.Ref(f); err != nil { - err = fmt.Errorf("astiav_test: refing frame failed: %w", err) - return + h.m.Lock() + h.inputs[name].lastFrames[s.cc.MediaType()].Unref() + err = h.inputs[name].lastFrames[s.cc.MediaType()].Ref(f) + h.m.Unlock() + if err != nil { + return nil, fmt.Errorf("astiav_test: refing frame failed: %w", err) } } } pkts = []*Packet{} } - - h.m.Lock() - h.inputs[name].lastFrame = f - h.m.Unlock() - return + return h.inputs[name].lastFrames[mediaType], nil } diff --git a/class_test.go b/class_test.go index 2187085..d462407 100644 --- a/class_test.go +++ b/class_test.go @@ -53,10 +53,11 @@ func TestClassers(t *testing.T) { defer os.RemoveAll(path) ic2, err := AllocIOContext(1, true, nil, nil, nil) require.NoError(t, err) + src := AllocSoftwareResampleContext() ssc, err := CreateSoftwareScaleContext(1, 1, PixelFormatRgba, 2, 2, PixelFormatRgba, NewSoftwareScaleContextFlags()) require.NoError(t, err) - require.Equal(t, cl+12, len(classers.p)) + require.Equal(t, cl+13, len(classers.p)) v, ok := classers.get(unsafe.Pointer(f1.c)) require.True(t, ok) require.Equal(t, f1, v) @@ -70,6 +71,7 @@ func TestClassers(t *testing.T) { fmc2.CloseInput() require.NoError(t, ic1.Close()) ic2.Free() + src.Free() ssc.Free() require.Equal(t, cl, len(classers.p)) } diff --git a/examples/resampling_audio/main.go b/examples/resampling_audio/main.go new file mode 100644 index 0000000..1a1eebe --- /dev/null +++ b/examples/resampling_audio/main.go @@ -0,0 +1,295 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "log" + "strings" + + "github.com/asticode/go-astiav" +) + +var ( + input = flag.String("i", "", "the input path") +) + +var ( + af *astiav.AudioFifo + finalFrame *astiav.Frame + resampledFrame *astiav.Frame + src *astiav.SoftwareResampleContext + decodedFrame *astiav.Frame +) + +func main() { + // Handle ffmpeg logs + astiav.SetLogLevel(astiav.LogLevelDebug) + astiav.SetLogCallback(func(c astiav.Classer, l astiav.LogLevel, fmt, msg string) { + var cs string + if c != nil { + if cl := c.Class(); cl != nil { + cs = " - class: " + cl.String() + } + } + log.Printf("ffmpeg log: %s%s - level: %d\n", strings.TrimSpace(msg), cs, l) + }) + + // Parse flags + flag.Parse() + + // Usage + if *input == "" { + log.Println("Usage: -i ") + return + } + + // Allocate input format context + inputFormatContext := astiav.AllocFormatContext() + if inputFormatContext == nil { + log.Fatal(errors.New("main: input format context is nil")) + } + defer inputFormatContext.Free() + + // Open input + if err := inputFormatContext.OpenInput(*input, nil, nil); err != nil { + log.Fatal(fmt.Errorf("main: opening input failed: %w", err)) + } + defer inputFormatContext.CloseInput() + + // Find stream info + if err := inputFormatContext.FindStreamInfo(nil); err != nil { + log.Fatal(fmt.Errorf("main: finding stream info failed: %w", err)) + } + + // Loop through streams + var s *astiav.Stream + var cc *astiav.CodecContext + for _, is := range inputFormatContext.Streams() { + // Only process audio + if is.CodecParameters().MediaType() != astiav.MediaTypeAudio { + continue + } + + // Store stream + s = is + + // Find decoder + c := astiav.FindDecoder(is.CodecParameters().CodecID()) + if c == nil { + log.Fatal(errors.New("main: codec is nil")) + } + + // Allocate codec context + if cc = astiav.AllocCodecContext(c); cc == nil { + log.Fatal(errors.New("main: codec context is nil")) + } + defer cc.Free() + + // Update codec context + if err := is.CodecParameters().ToCodecContext(cc); err != nil { + log.Fatal(fmt.Errorf("main: updating codec context failed: %w", err)) + } + + // Open codec context + if err := cc.Open(c, nil); err != nil { + log.Fatal(fmt.Errorf("main: opening codec context failed: %w", err)) + } + break + } + + // No stream + if s == nil { + log.Fatal("main: no audio stream found") + } + + // Alloc resample context + src = astiav.AllocSoftwareResampleContext() + defer src.Free() + + // Allocate packet + pkt := astiav.AllocPacket() + defer pkt.Free() + + // Allocate decoded frame + decodedFrame = astiav.AllocFrame() + defer decodedFrame.Free() + + // Allocate resampled frame + resampledFrame = astiav.AllocFrame() + defer resampledFrame.Free() + + // For the resampled frame we need to setup mandatory information + resampledFrame.SetChannelLayout(astiav.ChannelLayoutStereo) + resampledFrame.SetSampleFormat(astiav.SampleFormatFltp) + resampledFrame.SetSampleRate(24000) + + // Do this only if you want to make sure the resampled frame's number of samples doesn't get + // bigger than a custom value ("200" in our case) + resampledFrame.SetNbSamples(200) + const align = 0 + if err := resampledFrame.AllocBuffer(align); err != nil { + log.Fatal(fmt.Errorf("main: allocating buffer failed: %w", err)) + } + if err := resampledFrame.AllocSamples(align); err != nil { + log.Fatal(fmt.Errorf("main: allocating samples failed: %w", err)) + } + + // Do this only if you want to make sure final frames have an exact constant number of samples + // In that case we use an audio FIFO + finalFrame = astiav.AllocFrame() + defer finalFrame.Free() + finalFrame.SetChannelLayout(resampledFrame.ChannelLayout()) + finalFrame.SetNbSamples(resampledFrame.NbSamples()) + finalFrame.SetSampleFormat(resampledFrame.SampleFormat()) + finalFrame.SetSampleRate(resampledFrame.SampleRate()) + if err := finalFrame.AllocBuffer(align); err != nil { + log.Fatal(fmt.Errorf("main: allocating buffer failed: %w", err)) + } + if err := finalFrame.AllocSamples(align); err != nil { + log.Fatal(fmt.Errorf("main: allocating samples failed: %w", err)) + } + af = astiav.AllocAudioFifo(finalFrame.SampleFormat(), finalFrame.ChannelLayout().Channels(), finalFrame.NbSamples()) + defer af.Free() + + // Loop + for { + // We use a closure to ease unreferencing the packet + if stop := func() bool { + // Read frame + if err := inputFormatContext.ReadFrame(pkt); err != nil { + if errors.Is(err, astiav.ErrEof) { + return true + } + log.Fatal(fmt.Errorf("main: reading frame failed: %w", err)) + } + + // Make sure to unreference the packet + defer pkt.Unref() + + // Invalid stream + if pkt.StreamIndex() != s.Index() { + return false + } + + // Send packet + if err := cc.SendPacket(pkt); err != nil { + log.Fatal(fmt.Errorf("main: sending packet failed: %w", err)) + } + + // Loop + for { + // We use a closure to ease unreferencing the frame + if stop := func() bool { + // Receive frame + if err := cc.ReceiveFrame(decodedFrame); err != nil { + if errors.Is(err, astiav.ErrEof) || errors.Is(err, astiav.ErrEagain) { + return true + } + log.Fatal(fmt.Errorf("main: receiving frame failed: %w", err)) + } + + // Make sure to unreference the frame + defer decodedFrame.Unref() + + // Log + log.Printf("new decoded frame: nb samples: %d", decodedFrame.NbSamples()) + + // Resample decoded frame + if err := src.ConvertFrame(decodedFrame, resampledFrame); err != nil { + log.Fatal(fmt.Errorf("main: resampling decoded frame failed: %w", err)) + } + + // Something was resampled + if nbSamples := resampledFrame.NbSamples(); nbSamples > 0 { + // Log + log.Printf("new resampled frame: nb samples: %d", nbSamples) + + // Add resampled frame to audio fifo + if err := addResampledFrameToAudioFIFO(false); err != nil { + log.Fatal(fmt.Errorf("main: adding resampled frame to audio fifo failed: %w", err)) + } + + // Flush software resample context + if err := flushSoftwareResampleContext(false); err != nil { + log.Fatal(fmt.Errorf("main: flushing software resample context failed: %w", err)) + } + } + return false + }(); stop { + break + } + } + return false + }(); stop { + break + } + } + + // Flush software resample context + if err := flushSoftwareResampleContext(true); err != nil { + log.Fatal(fmt.Errorf("main: flushing software resample context failed: %w", err)) + } + + // Success + log.Println("success") +} + +func flushSoftwareResampleContext(finalFlush bool) error { + // Loop + for { + // We're making the final flush or there's enough data to flush the resampler + if finalFlush || src.Delay(int64(resampledFrame.SampleRate())) >= int64(resampledFrame.NbSamples()) { + // Flush resampler + if err := src.ConvertFrame(nil, resampledFrame); err != nil { + log.Fatal(fmt.Errorf("main: flushing resampler failed: %w", err)) + } + + // Log + if resampledFrame.NbSamples() > 0 { + log.Printf("new resampled frame: nb samples: %d", resampledFrame.NbSamples()) + } + + // Add resampled frame to audio fifo + if err := addResampledFrameToAudioFIFO(finalFlush); err != nil { + log.Fatal(fmt.Errorf("main: adding resampled frame to audio fifo failed: %w", err)) + } + + // Final flush is done + if finalFlush && resampledFrame.NbSamples() == 0 { + break + } + continue + } + break + } + return nil +} + +func addResampledFrameToAudioFIFO(flush bool) error { + // Write + if resampledFrame.NbSamples() > 0 { + if _, err := af.Write(resampledFrame); err != nil { + return fmt.Errorf("main: writing failed: %w", err) + } + } + + // Loop + for { + // We're flushing or there's enough data to read + if (flush && af.Size() > 0) || (!flush && af.Size() >= finalFrame.NbSamples()) { + // Read + n, err := af.Read(finalFrame) + if err != nil { + return fmt.Errorf("main: reading failed: %w", err) + } + finalFrame.SetNbSamples(n) + + // Log + log.Printf("new final frame: nb samples: %d", finalFrame.NbSamples()) + continue + } + break + } + return nil +} diff --git a/examples/scaling/main.go b/examples/scaling_video/main.go similarity index 100% rename from examples/scaling/main.go rename to examples/scaling_video/main.go diff --git a/software_resample_context.go b/software_resample_context.go new file mode 100644 index 0000000..048eae1 --- /dev/null +++ b/software_resample_context.go @@ -0,0 +1,58 @@ +package astiav + +//#include +import "C" +import "unsafe" + +// https://ffmpeg.org/doxygen/7.0/structSwrContext.html +type SoftwareResampleContext struct { + c *C.SwrContext +} + +func newSoftwareResampleContextFromC(c *C.SwrContext) *SoftwareResampleContext { + if c == nil { + return nil + } + src := &SoftwareResampleContext{c: c} + classers.set(src) + return src +} + +// https://ffmpeg.org/doxygen/7.0/group__lswr.html#gaf58c4ff10f73d74bdab8e5aa7193147c +func AllocSoftwareResampleContext() *SoftwareResampleContext { + return newSoftwareResampleContextFromC(C.swr_alloc()) +} + +// https://ffmpeg.org/doxygen/7.0/group__lswr.html#ga818f7d78b1ad7d8d5b70de374b668c34 +func (src *SoftwareResampleContext) Free() { + // Make sure to clone the classer before freeing the object since + // the C free method may reset the pointer + c := newClonedClasser(src) + C.swr_free(&src.c) + // Make sure to remove from classers after freeing the object since + // the C free method may use methods needing the classer + if c != nil { + classers.del(c) + } +} + +var _ Classer = (*SoftwareResampleContext)(nil) + +// https://ffmpeg.org/doxygen/7.0/structSwrContext.html#a7e13adcdcbc11bcc933cb7d0b9f839a0 +func (src *SoftwareResampleContext) Class() *Class { + return newClassFromC(unsafe.Pointer(src.c)) +} + +// https://ffmpeg.org/doxygen/7.0/group__lswr.html#gac482028c01d95580106183aa84b0930c +func (src_ *SoftwareResampleContext) ConvertFrame(src, dst *Frame) error { + var csrc *C.AVFrame + if src != nil { + csrc = src.c + } + return newError(C.swr_convert_frame(src_.c, dst.c, csrc)) +} + +// https://ffmpeg.org/doxygen/7.0/group__lswr.html#ga5121a5a7890a2d23b72dc871dd0ebb06 +func (src_ *SoftwareResampleContext) Delay(base int64) int64 { + return int64(C.swr_get_delay(src_.c, C.int64_t(base))) +} diff --git a/software_resample_context_test.go b/software_resample_context_test.go new file mode 100644 index 0000000..8943466 --- /dev/null +++ b/software_resample_context_test.go @@ -0,0 +1,45 @@ +package astiav + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestSoftwareResampleContext(t *testing.T) { + src := AllocSoftwareResampleContext() + defer src.Free() + + f1, err := globalHelper.inputLastFrame("video.mp4", MediaTypeAudio, nil) + require.NoError(t, err) + + f2 := AllocFrame() + defer f2.Free() + f2.SetChannelLayout(ChannelLayoutMono) + f2.SetNbSamples(300) + f2.SetSampleFormat(SampleFormatS16) + f2.SetSampleRate(24000) + require.NoError(t, f2.AllocBuffer(0)) + require.NoError(t, f2.AllocSamples(0)) + + for _, v := range []struct { + expectedDelay int64 + expectedNbSamples int + f *Frame + }{ + { + expectedDelay: 212, + expectedNbSamples: 300, + f: f1, + }, + { + expectedDelay: 17, + expectedNbSamples: 212, + }, + {expectedDelay: 17}, + } { + require.NoError(t, src.ConvertFrame(v.f, f2)) + require.Equal(t, v.expectedNbSamples, f2.NbSamples()) + require.Equal(t, v.expectedDelay, src.Delay(int64(f2.SampleRate()))) + } +}