Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fastdeploy/input/ernie4_5_vl_processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

from .ernie4_5_vl_processor import Ernie4_5_VLProcessor
from .process import DataProcessor, fancy_print
from .process_video import read_video_decord
from .process_video import read_video_paddlecodec

This comment was marked as outdated.

from .utils.video_utils import VideoReaderWrapper

__all__ = [
"DataProcessor",
"fancy_print",
"VideoReaderWrapper",
"read_video_decord",
"read_video_paddlecodec",
"Ernie4_5_VLProcessor",
]
6 changes: 3 additions & 3 deletions fastdeploy/input/ernie4_5_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from fastdeploy.utils import data_processor_logger

from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor
from .process_video import read_frames_decord, read_video_decord
from .process_video import read_frames_paddlecodec, read_video_paddlecodec
from .utils.render_timestamp import render_frame_timestamp


Expand Down Expand Up @@ -630,7 +630,7 @@ def _extract_labels(self, outputs: Dict, tgts: List[str]) -> None:
outputs["labels"] = labels

def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]:
reader, meta, path = read_video_decord(url, save_to_disk=False)
reader, meta, path = read_video_paddlecodec(url, save_to_disk=False)

video_frame_args = dict()
video_frame_args["fps"] = item.get("fps", self.fps)
Expand All @@ -641,7 +641,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]:

video_frame_args = self._set_video_frame_args(video_frame_args, meta)

frames_data, _, timestamps = read_frames_decord(
frames_data, _, timestamps = read_frames_paddlecodec(
path,
reader,
meta,
Expand Down
8 changes: 4 additions & 4 deletions fastdeploy/input/ernie4_5_vl_processor/process_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from .utils.video_utils import VideoReaderWrapper


def read_video_decord(video_path, save_to_disk):
"""get reader and meta by decord"""
def read_video_paddlecodec(video_path, save_to_disk):

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug Ernie 这条读取链路同样沿用了 bytes -> io.BytesIO 的旧 decord 适配方式;切到 paddlecodec 后,非 GIF 视频字节会作为 BytesIO 传进 VideoDecoder,默认 0.1 后端无法识别。

建议修复方式:这里直接保留原始 bytes 传给 VideoReaderWrapper,并把 GIF/非 GIF 的 bytes 处理统一放到 wrapper 中;修复后补一个非 GIF bytes 输入的测试,fake decoder 需要校验收到的是 bytes 或真实临时路径,而不是 BytesIO

"""get reader and meta by paddlecodec"""
# video_path = get_downloadable(video_path, save_to_disk=save_to_disk)
if isinstance(video_path, VideoReaderWrapper):
video_reader = video_path
Expand Down Expand Up @@ -113,7 +113,7 @@ def get_frame_indices(
return frame_indices


def read_frames_decord(
def read_frames_paddlecodec(
video_path,
video_reader,
video_meta,
Expand All @@ -126,7 +126,7 @@ def read_frames_decord(
frame_indices=None,
tol=10,
):
"""get frames by decord"""
"""get frames by paddlecodec"""

if frame_indices is None:
frame_indices = get_frame_indices(
Expand Down
82 changes: 68 additions & 14 deletions fastdeploy/input/ernie4_5_vl_processor/utils/video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
import os
from tempfile import NamedTemporaryFile as ntf

import decord
import numpy as np
import paddle

from fastdeploy.utils import get_logger

try:
# moviepy 1.0
Expand All @@ -27,6 +30,8 @@
# moviepy 2.0
import moviepy as mp

logger = get_logger("video_utils")


def is_gif(data: bytes) -> bool:
"""
Expand All @@ -35,19 +40,24 @@ def is_gif(data: bytes) -> bool:
return data[:6] in (b"GIF87a", b"GIF89a")


class VideoReaderWrapper(decord.VideoReader):
"""
Solving memory leak bug
class _NumpyFrame:
"""Wrapper so that frame[idx].asnumpy() keeps working with paddlecodec."""

def __init__(self, array):
self._array = array

def asnumpy(self):
return self._array

https://github.com/dmlc/decord/issues/208
"""

class VideoReaderWrapper:
"""paddlecodec VideoDecoder wrapper with GIF support."""

def __init__(self, video_path, *args, **kwargs):
with ntf(delete=True, suffix=".gif") as gif_file:
gif_input = None
self.original_file = None
if isinstance(video_path, str):
self.original_file = video_path
if video_path.lower().endswith(".gif"):
gif_input = video_path
elif isinstance(video_path, bytes):
Expand All @@ -70,14 +80,58 @@ def __init__(self, video_path, *args, **kwargs):
video_path = mp4_file.name
self.original_file = video_path

super().__init__(video_path, *args, **kwargs)
self.seek(0)
with paddle.use_compat_guard(enable=True, scope={"torchcodec"}):
try:
import sys

from torchcodec.decoders import VideoDecoder

sys.modules["torchcodec"] = None

This comment was marked as outdated.

except (ImportError, RuntimeError) as e:
logger.error(
f"Failed to load 'torchcodec' backend via Paddle proxy.\n"
f" - Common Causes:\n"
f" 1. Conflict with official 'torch' or 'torchcodec' packages.\n"
f" 2. Missing FFmpeg libraries or System library mismatch (CXXABI).\n"
f" - Recommended Fix Steps:\n"
f" 1. Install dependencies: `conda install ffmpeg -c conda-forge` or `apt-get update && apt-get install ffmpeg` \n"
f" 2. Uninstall conflicts: `pip uninstall torchcodec paddlecodec -y`\n"
f" 3. Reinstall packages: `pip install paddlecodec --force-reinstall`\n"
f" - If you encounter 'CXXABI' or 'libstdc++' errors, your system libraries might be outdated.\n"
f" Try prioritizing Conda libraries by running: `LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python your_script.py`\n"
f" - Original Error: {e}"
)
raise
PADDLECODEC_NUM_THREADS = int(os.environ.get("PADDLECODEC_NUM_THREADS", 0))
self._decoder = VideoDecoder(
video_path,
seek_mode="exact",

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug Ernie 专用 wrapper 也传入了 seek_mode,而默认依赖固定到 paddlecodec==0.1.0 时该参数不可用;read_video_paddlecodec() 会在创建 VideoDecoder 时直接失败。

建议修复方式:和共享 fastdeploy.input.video_utils 保持一致,删除该参数,或同步提升并固定所有 requirements 中的 paddlecodec 版本到支持 seek_mode 的版本,同时让单测使用会拒绝未知 kwargs 的 fake decoder。

num_ffmpeg_threads=PADDLECODEC_NUM_THREADS,
device=kwargs.get("device", "cpu"),
dimension_order="NHWC",
)

def __len__(self):
return self._decoder.metadata.num_frames

def __getitem__(self, key):
frames = super().__getitem__(key)
self.seek(0)
return frames
if isinstance(key, (int, np.integer)):
frame = self._decoder.get_frames_at(indices=[int(key)]).data[0]
return _NumpyFrame(frame.numpy())
if isinstance(key, slice):
indices = list(range(*key.indices(len(self))))
else:
indices = list(key) if not isinstance(key, list) else key
frames = self._decoder.get_frames_at(indices=indices).data
return _NumpyFrame(frames.numpy())

def get_avg_fps(self):
return self._decoder.metadata.average_fps

def __del__(self):
if self.original_file and os.path.exists(self.original_file):
os.remove(self.original_file)
original_file = getattr(self, "original_file", None)
if original_file and os.path.exists(original_file):
try:
os.remove(original_file)
except OSError:
pass
4 changes: 2 additions & 2 deletions fastdeploy/input/paddleocr_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.input.mm_data_processor import MMBaseDataProcessor
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.video_utils import read_video_decord
from fastdeploy.input.video_utils import read_video_paddlecodec
from fastdeploy.input.video_utils import sample_frames_paddleocr as sample_frames
from fastdeploy.multimodal.hasher import MultimodalHasher
from fastdeploy.utils import data_processor_logger
Expand Down Expand Up @@ -530,7 +530,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic
- frames: Processed video frames as numpy array
- metadata: Updated video metadata dictionary
"""
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

# Apply frame sampling if fps or target_frames specified
fps = item.get("fps", self.fps)
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/qwen3_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.input.mm_data_processor import MMBaseDataProcessor
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.video_utils import read_video_decord
from fastdeploy.input.video_utils import read_video_paddlecodec
from fastdeploy.input.video_utils import sample_frames_qwen as sample_frames
from fastdeploy.multimodal.hasher import MultimodalHasher
from fastdeploy.utils import data_processor_logger
Expand Down Expand Up @@ -681,7 +681,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic
- frames: Processed video frames as numpy array
- metadata: Updated video metadata dictionary
"""
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

# Apply frame sampling if fps or target_frames specified
fps = item.get("fps", self.fps)
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/qwen_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.input.mm_data_processor import MMBaseDataProcessor
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.video_utils import read_video_decord
from fastdeploy.input.video_utils import read_video_paddlecodec
from fastdeploy.input.video_utils import sample_frames_qwen as sample_frames
from fastdeploy.multimodal.hasher import MultimodalHasher
from fastdeploy.utils import data_processor_logger
Expand Down Expand Up @@ -531,7 +531,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic
- frames: Processed video frames as numpy array
- metadata: Updated video metadata dictionary
"""
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

# Apply frame sampling if fps or target_frames specified
fps = item.get("fps", self.fps)
Expand Down
81 changes: 60 additions & 21 deletions fastdeploy/input/video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""Shared video utilities: VideoReaderWrapper, read_video_decord, and sample_frames."""
"""Shared video utilities: VideoReaderWrapper, read_video_paddlecodec, and sample_frames."""

import io
import math
Expand All @@ -21,13 +21,16 @@
from typing import Optional, Union

import numpy as np
import paddle

from fastdeploy.input.image_processors.common import ceil_by_factor, floor_by_factor
from fastdeploy.utils import data_processor_logger
from fastdeploy.utils import data_processor_logger, get_logger

logger = get_logger("video_utils")

__all__ = [
"VideoReaderWrapper",
"read_video_decord",
"read_video_paddlecodec",

This comment was marked as outdated.

"sample_frames",
"sample_frames_qwen",
"sample_frames_paddleocr",
Expand All @@ -44,15 +47,20 @@ def _is_gif(data: bytes) -> bool:
return data[:6] in (b"GIF87a", b"GIF89a")


class VideoReaderWrapper:
"""decord.VideoReader wrapper that fixes a memory leak and adds GIF support.
class _NumpyFrame:
"""Wrapper so that frame[idx].asnumpy() keeps working with paddlecodec."""

Reference: https://github.com/dmlc/decord/issues/208
"""
def __init__(self, array):
self._array = array

def asnumpy(self):
return self._array

def __init__(self, video_path, *args, **kwargs):
import decord

class VideoReaderWrapper:
"""paddlecodec VideoDecoder wrapper with GIF support."""

def __init__(self, video_path, *args, **kwargs):
try:
# moviepy 1.0
import moviepy.editor as mp
Expand Down Expand Up @@ -91,22 +99,53 @@ def __init__(self, video_path, *args, **kwargs):
video_path = mp4_path
self.original_file = video_path # temp mp4, cleaned up in __del__

self._reader = decord.VideoReader(video_path, *args, **kwargs)
self._reader.seek(0)
with paddle.use_compat_guard(enable=True, scope={"torchcodec"}):
try:
import sys

from torchcodec.decoders import VideoDecoder

sys.modules["torchcodec"] = None

This comment was marked as outdated.

except (ImportError, RuntimeError) as e:
logger.error(
f"Failed to load 'torchcodec' backend via Paddle proxy.\n"
f" - Common Causes:\n"
f" 1. Conflict with official 'torch' or 'torchcodec' packages.\n"
f" 2. Missing FFmpeg libraries or System library mismatch (CXXABI).\n"
f" - Recommended Fix Steps:\n"
f" 1. Install dependencies: `conda install ffmpeg -c conda-forge` or `apt-get update && apt-get install ffmpeg` \n"
f" 2. Uninstall conflicts: `pip uninstall torchcodec paddlecodec -y`\n"
f" 3. Reinstall packages: `pip install paddlecodec --force-reinstall`\n"
f" - If you encounter 'CXXABI' or 'libstdc++' errors, your system libraries might be outdated.\n"
f" Try prioritizing Conda libraries by running: `LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python your_script.py`\n"
f" - Original Error: {e}"
)
raise
PADDLECODEC_NUM_THREADS = int(os.environ.get("PADDLECODEC_NUM_THREADS", 0))
self._decoder = VideoDecoder(
video_path,
seek_mode="exact",

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug requirements.txt 当前固定 paddlecodec==0.1.0,但这个版本通过 torchcodec.decoders.VideoDecoder 暴露的构造参数不包含 seek_mode,这里会在所有共享视频读取路径初始化时直接 TypeError

建议修复方式:要么去掉 seek_mode="exact" 并按 0.1.0 支持的参数构造 VideoDecoder,要么把 requirements*.txt 统一提升/固定到支持该参数的 paddlecodec 版本,并补一个真实构造 smoke test,避免 mock 吞掉非法参数。

num_ffmpeg_threads=PADDLECODEC_NUM_THREADS,
device=kwargs.get("device", "cpu"),
dimension_order="NHWC",
)

def __len__(self):
return len(self._reader)
return self._decoder.metadata.num_frames

def __getitem__(self, key):
frames = self._reader[key]
self._reader.seek(0)
return frames
if isinstance(key, (int, np.integer)):
frame = self._decoder.get_frames_at(indices=[int(key)]).data[0]
return _NumpyFrame(frame.numpy())
if isinstance(key, slice):
indices = list(range(*key.indices(len(self))))
else:
indices = list(key) if not isinstance(key, list) else key
frames = self._decoder.get_frames_at(indices=indices).data
return _NumpyFrame(frames.numpy())

def get_avg_fps(self):
return self._reader.get_avg_fps()

def seek(self, pos):
return self._reader.seek(pos)
return self._decoder.metadata.average_fps

def __del__(self):
original_file = getattr(self, "original_file", None)
Expand All @@ -118,11 +157,11 @@ def __del__(self):


# ---------------------------------------------------------------------------
# read_video_decord
# read_video_paddlecodec
# ---------------------------------------------------------------------------


def read_video_decord(video_path, save_to_disk: bool = False):
def read_video_paddlecodec(video_path, save_to_disk: bool = False):

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug 这个函数保留了旧的 bytes -> io.BytesIO 归一化,但新后端不再是 decord;默认固定的 paddlecodec 0.1 VideoDecoder 支持原始 bytes,不支持把非 GIF 视频作为 BytesIO 传入。所以上层传入视频字节流时会在 wrapper 内构造 decoder 失败。

建议修复方式:不要在这里把普通 bytes 转成 BytesIO;直接把原始 bytes 交给 VideoReaderWrapper,并在 wrapper 内只对 GIF bytes/BytesIO 做临时文件转码。非 GIF BytesIO 也应先读回原始 bytes 或落临时文件后再调用 VideoDecoder

"""Load a video file and return (video_reader, video_meta, video_path).

video_meta contains keys: "fps", "duration", "num_of_frame".
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ paddleformers>=1.1.1
redis
etcd3
httpx
tool_helpers
fast_dataindex
cupy-cuda12x
pybind11[global]
tabulate
Expand All @@ -23,7 +23,7 @@ xlwt
visualdl
setuptools-scm>=8
prometheus-client
decord
paddlecodec
moviepy
triton
crcmod
Expand Down
Loading
Loading