PyAV version: 17.0.0
I have a USB webcam that offers both mjpeg and h264 encoding, mjpeg being the default. In FFmpeg with dshow, i can specify which encoding to use by setting the input vcodec option to h264. PyAV however, does not seem to pass the vcodec (or c:v) option to dshow. Other input options like video_size and framerate work as expected.
import av
input_container = av.open(
"video=HD USB Camera",
format="dshow",
options={
"video_size": "1920x1080",
"framerate": "30",
"vcodec": "h264",
#"c:v": "h264",
}
)
in_stream = input_container.streams.video[0]
print("PyAV version:", av.__version__)
print("Input codec:", in_stream.codec_context.name)
print("Input resolution:", in_stream.codec_context.width, "x", in_stream.codec_context.height)
print("Input average rate:", in_stream.average_rate)
print("Input time_base:", in_stream.time_base)
Output:
PyAV version: 17.0.0
Input codec: mjpeg
Input resolution: 1920 x 1080
Input average rate: 10000000/333333
Input time_base: 1/10000000
Using ChatGPT i was able to make a small patch (3 lines) for PyAV to hardcode the input codec as a proof of concept. That was enough to successfully change the input codec to h264, and i also verified that the output stream contained valid h264 encoded video.
New output:
PyAV version: 17.0.0
Input codec: h264
Input resolution: 1920 x 1080
Input average rate: 10000000/333333
Input time_base: 1/10000000
Here's the changes i made:
Added video_codec_id to AVFormatContext struct in include/avformat.pxd:
cdef struct AVFormatContext:
unsigned int nb_streams
AVStream **streams
unsigned int nb_chapters
AVChapter **chapters
const AVInputFormat *iformat
const AVOutputFormat *oformat
AVIOContext *pb
AVIOInterruptCB interrupt_callback
AVDictionary *metadata
char filename
int64_t start_time
int64_t duration
int bit_rate
int flags
AVCodecID video_codec_id # <-------- Added this
AVCodecID audio_codec_id
void *opaque
int (*io_open)(
AVFormatContext *s,
AVIOContext **pb,
const char *url,
int flags,
AVDictionary **options
)
int (*io_close2)(AVFormatContext *s, AVIOContext *pb)
Added the h264 codec id to the AVCodecID enum in include/avcodec.pxd:
cdef enum AVCodecID:
AV_CODEC_ID_NONE
AV_CODEC_ID_MPEG2VIDEO
AV_CODEC_ID_MPEG1VIDEO
AV_CODEC_ID_PCM_ALAW
AV_CODEC_ID_PCM_BLURAY
AV_CODEC_ID_PCM_DVD
AV_CODEC_ID_PCM_F16LE
AV_CODEC_ID_PCM_F24LE
AV_CODEC_ID_PCM_F32BE
AV_CODEC_ID_PCM_F32LE
AV_CODEC_ID_PCM_F64BE
AV_CODEC_ID_PCM_F64LE
AV_CODEC_ID_PCM_LXF
AV_CODEC_ID_PCM_MULAW
AV_CODEC_ID_PCM_S16BE
AV_CODEC_ID_PCM_S16BE_PLANAR
AV_CODEC_ID_PCM_S16LE
AV_CODEC_ID_PCM_S16LE_PLANAR
AV_CODEC_ID_PCM_S24BE
AV_CODEC_ID_PCM_S24DAUD
AV_CODEC_ID_PCM_S24LE
AV_CODEC_ID_PCM_S24LE_PLANAR
AV_CODEC_ID_PCM_S32BE
AV_CODEC_ID_PCM_S32LE
AV_CODEC_ID_PCM_S32LE_PLANAR
AV_CODEC_ID_PCM_S64BE
AV_CODEC_ID_PCM_S64LE
AV_CODEC_ID_PCM_S8
AV_CODEC_ID_PCM_S8_PLANAR
AV_CODEC_ID_PCM_U16BE
AV_CODEC_ID_PCM_U16LE
AV_CODEC_ID_PCM_U24BE
AV_CODEC_ID_PCM_U24LE
AV_CODEC_ID_PCM_U32BE
AV_CODEC_ID_PCM_U32LE
AV_CODEC_ID_PCM_U8
AV_CODEC_ID_PCM_VIDC
AV_CODEC_ID_H264 # <------ Added this
Hardcoded the codec id in Container cinit in av/container/core.py:
@cython.cclass
class Container:
def __cinit__(
self,
sentinel,
file_,
format_name,
options,
container_options,
stream_options,
hwaccel,
metadata_encoding,
metadata_errors,
buffer_size,
open_timeout,
read_timeout,
io_open,
):
if sentinel is not _cinit_sentinel:
raise RuntimeError("cannot construct base Container")
writeable: cython.bint = isinstance(self, OutputContainer)
if not writeable and not isinstance(self, InputContainer):
raise RuntimeError("Container cannot be directly extended.")
if isinstance(file_, str):
self.name = file_
else:
self.name = str(getattr(file_, "name", "<none>"))
self.options = dict(options or ())
self.container_options = dict(container_options or ())
self.stream_options = [dict(x) for x in stream_options or ()]
self.hwaccel = hwaccel
self.metadata_encoding = metadata_encoding
self.metadata_errors = metadata_errors
self.open_timeout = open_timeout
self.read_timeout = read_timeout
self.buffer_size = buffer_size
self.io_open = io_open
acodec = None # no audio codec specified
if format_name is not None:
if ":" in format_name:
format_name, acodec = format_name.split(":")
self.format = ContainerFormat(format_name)
res: cython.int
name_obj: bytes = os.fsencode(self.name)
name: cython.p_char = name_obj
ofmt: cython.pointer[cython.const[lib.AVOutputFormat]]
if writeable:
self._myflag |= 1 # enum.writeable = True
ofmt = (
self.format.optr
if self.format
else lib.av_guess_format(cython.NULL, name, cython.NULL)
)
if ofmt == cython.NULL:
raise ValueError("Could not determine output format")
with cython.nogil:
# This does not actually open the file.
res = lib.avformat_alloc_output_context2(
cython.address(self.ptr),
ofmt,
cython.NULL,
name,
)
self.err_check(res)
else:
# We need the context before we open the input AND setup Python IO.
self.ptr = lib.avformat_alloc_context()
# Setup interrupt callback
if self.open_timeout is not None or self.read_timeout is not None:
self.ptr.interrupt_callback.callback = interrupt_cb
self.ptr.interrupt_callback.opaque = cython.address(
self.interrupt_callback_info
)
if acodec is not None:
self.ptr.audio_codec_id = getattr(AudioCodec, acodec)
self.ptr.video_codec_id = lib.AV_CODEC_ID_H264 # <-------- Added this
self.ptr.flags |= lib.AVFMT_FLAG_GENPTS
self.ptr.opaque = cython.cast(cython.p_void, self)
# Setup Python IO.
self.open_files = {}
if not isinstance(file_, basestring):
self.file = PyIOFile(file_, buffer_size, writeable)
self.ptr.pb = self.file.iocontext
if io_open is not None:
self.ptr.io_open = pyav_io_open
self.ptr.io_close2 = pyav_io_close
self.ptr.flags |= lib.AVFMT_FLAG_CUSTOM_IO
ifmt: cython.pointer[cython.const[lib.AVInputFormat]]
c_options: Dictionary
if not writeable:
ifmt = self.format.iptr if self.format else cython.NULL
c_options = Dictionary(self.options, self.container_options)
self.set_timeout(self.open_timeout)
self.start_timeout()
with cython.nogil:
res = lib.avformat_open_input(
cython.address(self.ptr), name, ifmt, cython.address(c_options.ptr)
)
self.set_timeout(None)
self.err_check(res)
self._myflag |= 2 # enum.input_was_opened = True
if format_name is None:
self.format = build_container_format(self.ptr.iformat, self.ptr.oformat)
PyAV version: 17.0.0
I have a USB webcam that offers both mjpeg and h264 encoding, mjpeg being the default. In FFmpeg with dshow, i can specify which encoding to use by setting the input vcodec option to h264. PyAV however, does not seem to pass the vcodec (or c:v) option to dshow. Other input options like video_size and framerate work as expected.
Output:
Using ChatGPT i was able to make a small patch (3 lines) for PyAV to hardcode the input codec as a proof of concept. That was enough to successfully change the input codec to h264, and i also verified that the output stream contained valid h264 encoded video.
New output:
Here's the changes i made:
Added video_codec_id to AVFormatContext struct in include/avformat.pxd:
Added the h264 codec id to the AVCodecID enum in include/avcodec.pxd:
Hardcoded the codec id in Container cinit in av/container/core.py: