我想要一天分享一點「LLM從底層堆疊的技術」,並且每篇文章長度控制在三分鐘以內,讓大家不會壓力太大,但是又能夠每天成長一點。
def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
converted_len = int(clip_len * frame_sample_rate)
end_idx = np.random.randint(converted_len, seg_len)
start_idx = end_idx - converted_len
indices = np.linspace(start_idx, end_idx, num = clip_len)
indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
return indices
執行取樣,取樣 8 個 Frame:
file_path = hf_hub_download(repo_id = "nielsr/video-demo", filename = "eating_spaghetti.mp4", repo_type = "dataset")
container = av.open(file_path)
indices = sample_frame_indices(clip_len = 8, frame_sample_rate = 1, seg_len = container.streams.video[0].frames)
video = read_video_pyav(container, indices)

















