註冊 OpenAI 的 API ,並取得 SECRET KEY,然後填到程式裡面的 openai.api_key 裡。
基本的 Python 知識以及 Debug 能力。
import openai
from pydub import AudioSegment
import os
import codecs
import tempfile
# Set your OpenAI API key here
openai.api_key = 'your_openai_api_key'
def transcribe_audio_with_whisper(audio_file_path):
"""
Transcribe an audio file using OpenAI's Whisper API.
Args:
- audio_file_path: Path to the audio file to transcribe.
Returns:
- The transcribed text as a string.
"""
with open(audio_file_path, "rb") as audio_file:
response = openai.Audio.transcribe('whisper-1', audio_file)
return response['data']['text']
def split_and_transcribe_audio(file_path, segment_length_seconds=30):
try:
song = AudioSegment.from_file(file_path)
except Exception as e:
raise Exception(f"Error loading audio file: {e}")
segment_length_ms = segment_length_seconds * 1000 # Correct calculation of milliseconds
transcripts = []
with tempfile.TemporaryDirectory() as temp_dir:
for i, segment in enumerate([song[i:i+segment_length_ms] for i in range(0, len(song), segment_length_ms)]):
segment_file_path = os.path.join(temp_dir, f"segment_{i}.mp3")
segment.export(segment_file_path, format="mp3")
transcript = transcribe_audio_with_whisper(segment_file_path)
time_in_seconds = i * segment_length_seconds
timestamp = f"[{time_in_seconds // 60:02d}:{time_in_seconds % 60:02d}]"
transcripts.append(timestamp + " " + transcript)
output_file_name = os.path.splitext(os.path.basename(file_path))[0] + '.txt'
with codecs.open(output_file_name, 'w', encoding='utf-8') as f: # Using UTF-8 encoding
f.write("\n".join(transcripts))
# Example usage
split_and_transcribe_audio("test.mp3")
範例用法:程式最後展示了如何使用 split_and_transcribe_audio
函數來轉寫名為 "test.mp3" 的音訊檔案。