Skip to content

Setting vcodec on input container #2243

@schmarcel02

Description

@schmarcel02

PyAV version: 17.0.0

I have a USB webcam that offers both mjpeg and h264 encoding, mjpeg being the default. In FFmpeg with dshow, i can specify which encoding to use by setting the input vcodec option to h264. PyAV however, does not seem to pass the vcodec (or c:v) option to dshow. Other input options like video_size and framerate work as expected.

import av


input_container = av.open(
    "video=HD USB Camera",
    format="dshow",
    options={
        "video_size": "1920x1080",
        "framerate": "30",
        "vcodec": "h264",
        #"c:v": "h264",
    }
)

in_stream = input_container.streams.video[0]

print("PyAV version:", av.__version__)
print("Input codec:", in_stream.codec_context.name)
print("Input resolution:", in_stream.codec_context.width, "x", in_stream.codec_context.height)
print("Input average rate:", in_stream.average_rate)
print("Input time_base:", in_stream.time_base)

Output:

PyAV version: 17.0.0
Input codec: mjpeg
Input resolution: 1920 x 1080
Input average rate: 10000000/333333
Input time_base: 1/10000000

Using ChatGPT i was able to make a small patch (3 lines) for PyAV to hardcode the input codec as a proof of concept. That was enough to successfully change the input codec to h264, and i also verified that the output stream contained valid h264 encoded video.

New output:

PyAV version: 17.0.0
Input codec: h264
Input resolution: 1920 x 1080
Input average rate: 10000000/333333
Input time_base: 1/10000000

Here's the changes i made:

Added video_codec_id to AVFormatContext struct in include/avformat.pxd:

    cdef struct AVFormatContext:
        unsigned int nb_streams
        AVStream **streams
        unsigned int nb_chapters
        AVChapter **chapters
        const AVInputFormat *iformat
        const AVOutputFormat *oformat
        AVIOContext *pb
        AVIOInterruptCB interrupt_callback
        AVDictionary *metadata
        char filename
        int64_t start_time
        int64_t duration
        int bit_rate
        int flags
        AVCodecID video_codec_id  # <-------- Added this
        AVCodecID audio_codec_id
        void *opaque
        int (*io_open)(
            AVFormatContext *s,
            AVIOContext **pb,
            const char *url,
            int flags,
            AVDictionary **options
        )
        int (*io_close2)(AVFormatContext *s, AVIOContext *pb)

Added the h264 codec id to the AVCodecID enum in include/avcodec.pxd:

    cdef enum AVCodecID:
        AV_CODEC_ID_NONE
        AV_CODEC_ID_MPEG2VIDEO
        AV_CODEC_ID_MPEG1VIDEO
        AV_CODEC_ID_PCM_ALAW
        AV_CODEC_ID_PCM_BLURAY
        AV_CODEC_ID_PCM_DVD
        AV_CODEC_ID_PCM_F16LE
        AV_CODEC_ID_PCM_F24LE
        AV_CODEC_ID_PCM_F32BE
        AV_CODEC_ID_PCM_F32LE
        AV_CODEC_ID_PCM_F64BE
        AV_CODEC_ID_PCM_F64LE
        AV_CODEC_ID_PCM_LXF
        AV_CODEC_ID_PCM_MULAW
        AV_CODEC_ID_PCM_S16BE
        AV_CODEC_ID_PCM_S16BE_PLANAR
        AV_CODEC_ID_PCM_S16LE
        AV_CODEC_ID_PCM_S16LE_PLANAR
        AV_CODEC_ID_PCM_S24BE
        AV_CODEC_ID_PCM_S24DAUD
        AV_CODEC_ID_PCM_S24LE
        AV_CODEC_ID_PCM_S24LE_PLANAR
        AV_CODEC_ID_PCM_S32BE
        AV_CODEC_ID_PCM_S32LE
        AV_CODEC_ID_PCM_S32LE_PLANAR
        AV_CODEC_ID_PCM_S64BE
        AV_CODEC_ID_PCM_S64LE
        AV_CODEC_ID_PCM_S8
        AV_CODEC_ID_PCM_S8_PLANAR
        AV_CODEC_ID_PCM_U16BE
        AV_CODEC_ID_PCM_U16LE
        AV_CODEC_ID_PCM_U24BE
        AV_CODEC_ID_PCM_U24LE
        AV_CODEC_ID_PCM_U32BE
        AV_CODEC_ID_PCM_U32LE
        AV_CODEC_ID_PCM_U8
        AV_CODEC_ID_PCM_VIDC
        AV_CODEC_ID_H264   # <------ Added this

Hardcoded the codec id in Container cinit in av/container/core.py:

@cython.cclass
class Container:
    def __cinit__(
        self,
        sentinel,
        file_,
        format_name,
        options,
        container_options,
        stream_options,
        hwaccel,
        metadata_encoding,
        metadata_errors,
        buffer_size,
        open_timeout,
        read_timeout,
        io_open,
    ):
        if sentinel is not _cinit_sentinel:
            raise RuntimeError("cannot construct base Container")

        writeable: cython.bint = isinstance(self, OutputContainer)
        if not writeable and not isinstance(self, InputContainer):
            raise RuntimeError("Container cannot be directly extended.")

        if isinstance(file_, str):
            self.name = file_
        else:
            self.name = str(getattr(file_, "name", "<none>"))

        self.options = dict(options or ())
        self.container_options = dict(container_options or ())
        self.stream_options = [dict(x) for x in stream_options or ()]

        self.hwaccel = hwaccel

        self.metadata_encoding = metadata_encoding
        self.metadata_errors = metadata_errors

        self.open_timeout = open_timeout
        self.read_timeout = read_timeout

        self.buffer_size = buffer_size
        self.io_open = io_open

        acodec = None  # no audio codec specified
        if format_name is not None:
            if ":" in format_name:
                format_name, acodec = format_name.split(":")
            self.format = ContainerFormat(format_name)

        res: cython.int
        name_obj: bytes = os.fsencode(self.name)
        name: cython.p_char = name_obj
        ofmt: cython.pointer[cython.const[lib.AVOutputFormat]]

        if writeable:
            self._myflag |= 1  # enum.writeable = True
            ofmt = (
                self.format.optr
                if self.format
                else lib.av_guess_format(cython.NULL, name, cython.NULL)
            )
            if ofmt == cython.NULL:
                raise ValueError("Could not determine output format")

            with cython.nogil:
                # This does not actually open the file.
                res = lib.avformat_alloc_output_context2(
                    cython.address(self.ptr),
                    ofmt,
                    cython.NULL,
                    name,
                )
            self.err_check(res)
        else:
            # We need the context before we open the input AND setup Python IO.
            self.ptr = lib.avformat_alloc_context()

            # Setup interrupt callback
            if self.open_timeout is not None or self.read_timeout is not None:
                self.ptr.interrupt_callback.callback = interrupt_cb
                self.ptr.interrupt_callback.opaque = cython.address(
                    self.interrupt_callback_info
                )

            if acodec is not None:
                self.ptr.audio_codec_id = getattr(AudioCodec, acodec)
                
            self.ptr.video_codec_id = lib.AV_CODEC_ID_H264  # <-------- Added this

        self.ptr.flags |= lib.AVFMT_FLAG_GENPTS
        self.ptr.opaque = cython.cast(cython.p_void, self)

        # Setup Python IO.
        self.open_files = {}
        if not isinstance(file_, basestring):
            self.file = PyIOFile(file_, buffer_size, writeable)
            self.ptr.pb = self.file.iocontext

        if io_open is not None:
            self.ptr.io_open = pyav_io_open
            self.ptr.io_close2 = pyav_io_close
            self.ptr.flags |= lib.AVFMT_FLAG_CUSTOM_IO

        ifmt: cython.pointer[cython.const[lib.AVInputFormat]]
        c_options: Dictionary
        if not writeable:
            ifmt = self.format.iptr if self.format else cython.NULL
            c_options = Dictionary(self.options, self.container_options)

            self.set_timeout(self.open_timeout)
            self.start_timeout()
            with cython.nogil:
                res = lib.avformat_open_input(
                    cython.address(self.ptr), name, ifmt, cython.address(c_options.ptr)
                )
            self.set_timeout(None)
            self.err_check(res)
            self._myflag |= 2  # enum.input_was_opened = True

        if format_name is None:
            self.format = build_container_format(self.ptr.iformat, self.ptr.oformat)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions