#!/usr/bin/env python3 """ Meeting Audio Summarizer Transcribes audio files using local Whisper and summarizes using OpenAI-compatible API """ import argparse import os from pathlib import Path from typing import Optional import whisper from openai import OpenAI class MeetingSummarizer: """Handles audio transcription and summarization of meetings""" def __init__( self, whisper_model: str = "base", api_base_url: str = "https://api.openai.com/v1", api_key: Optional[str] = None, model_name: str = "gpt-4", output_language: str = "english" ): """ Initialize the meeting summarizer Args: whisper_model: Whisper model size (tiny, base, small, medium, large) api_base_url: Base URL for OpenAI-compatible API api_key: API key (will use OPENAI_API_KEY env var if not provided) model_name: Name of the LLM model to use output_language: Language for the summary output (e.g., "english", "german", "spanish") """ print(f"Loading Whisper model '{whisper_model}'...") self.whisper_model = whisper.load_model(whisper_model) self.output_language = output_language self.api_key = api_key or os.getenv("OPENAI_API_KEY") if not self.api_key: raise ValueError( "API key not provided. Set OPENAI_API_KEY environment variable " "or pass api_key parameter" ) self.client = OpenAI( api_key=self.api_key, base_url=api_base_url ) self.model_name = model_name def transcribe_audio(self, audio_path: str) -> dict: """ Transcribe audio file using Whisper Args: audio_path: Path to audio file (mp3, wav, m4a, etc.) Returns: Dictionary with transcription results including text and segments """ print(f"Transcribing audio file: {audio_path}") if not Path(audio_path).exists(): raise FileNotFoundError(f"Audio file not found: {audio_path}") result = self.whisper_model.transcribe( audio_path, language=None, # Auto-detect language verbose=False ) print(f"Transcription complete. Length: {len(result['text'])} characters") return result def summarize_text(self, text: str) -> str: """ Summarize transcribed text using LLM Args: text: Transcribed text to summarize Returns: Summary text """ print("Generating summary using LLM...") system_prompt = f"""You are an assistant that summarizes meeting transcripts. Create a structured summary in {self.output_language} with the following points: 1. **Main Topics**: The most important topics discussed 2. **Decisions**: Decisions that were made 3. **Action Items**: Tasks and responsibilities 4. **Next Steps**: Planned next steps Be precise and concrete. Write your entire response in {self.output_language}.""" response = self.client.chat.completions.create( model=self.model_name, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Please summarize this meeting transcript:\n\n{text}"} ], temperature=0.3, max_tokens=2000 ) summary = response.choices[0].message.content print("Summary generated successfully") return summary def process_meeting( self, audio_path: str, output_dir: Optional[str] = None, save_transcript: bool = True ) -> tuple[str, str]: """ Complete pipeline: transcribe and summarize meeting audio Args: audio_path: Path to audio file output_dir: Directory to save outputs (default: same as audio file) save_transcript: Whether to save the full transcript Returns: Tuple of (transcript, summary) """ # Transcribe result = self.transcribe_audio(audio_path) transcript = result["text"] # Generate summary summary = self.summarize_text(transcript) # Save outputs if requested if output_dir or save_transcript: audio_file = Path(audio_path) if output_dir: output_path = Path(output_dir) else: output_path = audio_file.parent output_path.mkdir(parents=True, exist_ok=True) base_name = audio_file.stem if save_transcript: transcript_file = output_path / f"{base_name}_transcript.txt" transcript_file.write_text(transcript, encoding="utf-8") print(f"Transcript saved to: {transcript_file}") summary_file = output_path / f"{base_name}_summary.txt" summary_file.write_text(summary, encoding="utf-8") print(f"Summary saved to: {summary_file}") return transcript, summary def main(): parser = argparse.ArgumentParser( description="Transcribe and summarize meeting audio files" ) parser.add_argument( "audio_file", help="Path to audio file (mp3, wav, m4a, etc.)" ) parser.add_argument( "--whisper-model", default="base", choices=["tiny", "base", "small", "medium", "large"], help="Whisper model size (default: base)" ) parser.add_argument( "--api-base", default="https://api.openai.com/v1", help="Base URL for OpenAI-compatible API" ) parser.add_argument( "--api-key", help="API key (defaults to OPENAI_API_KEY env var)" ) parser.add_argument( "--model", default="gpt-4", help="LLM model name (default: gpt-4)" ) parser.add_argument( "--language", default="english", help="Output language for the summary (e.g., english, german, spanish) (default: english)" ) parser.add_argument( "--output-dir", help="Output directory for transcript and summary" ) parser.add_argument( "--no-transcript", action="store_true", help="Don't save the full transcript" ) args = parser.parse_args() try: summarizer = MeetingSummarizer( whisper_model=args.whisper_model, api_base_url=args.api_base, api_key=args.api_key, model_name=args.model, output_language=args.language ) transcript, summary = summarizer.process_meeting( audio_path=args.audio_file, output_dir=args.output_dir, save_transcript=not args.no_transcript ) print("\n" + "=" * 80) print("SUMMARY") print("=" * 80) print(summary) except Exception as e: print(f"Error: {e}") return 1 return 0 if __name__ == "__main__": exit(main())