initial commit

2025-11-04 20:54:13 +01:00
parent 214d0a2a77
commit 95fb5c7020
3 changed files with 468 additions and 0 deletions
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+Meeting Audio Summarizer
+Transcribes audio files using local Whisper and summarizes using OpenAI-compatible API
+"""
+
+import argparse
+import os
+from pathlib import Path
+from typing import Optional
+import whisper
+from openai import OpenAI
+
+
+class MeetingSummarizer:
+    """Handles audio transcription and summarization of meetings"""
+    
+    def __init__(
+        self,
+        whisper_model: str = "base",
+        api_base_url: str = "https://api.openai.com/v1",
+        api_key: Optional[str] = None,
+        model_name: str = "gpt-4",
+        output_language: str = "english"
+    ):
+        """
+        Initialize the meeting summarizer
+        
+        Args:
+            whisper_model: Whisper model size (tiny, base, small, medium, large)
+            api_base_url: Base URL for OpenAI-compatible API
+            api_key: API key (will use OPENAI_API_KEY env var if not provided)
+            model_name: Name of the LLM model to use
+            output_language: Language for the summary output (e.g., "english", "german", "spanish")
+        """
+        print(f"Loading Whisper model '{whisper_model}'...")
+        self.whisper_model = whisper.load_model(whisper_model)
+        self.output_language = output_language
+        
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            raise ValueError(
+                "API key not provided. Set OPENAI_API_KEY environment variable "
+                "or pass api_key parameter"
+            )
+        
+        self.client = OpenAI(
+            api_key=self.api_key,
+            base_url=api_base_url
+        )
+        self.model_name = model_name
+        
+    def transcribe_audio(self, audio_path: str) -> dict:
+        """
+        Transcribe audio file using Whisper
+        
+        Args:
+            audio_path: Path to audio file (mp3, wav, m4a, etc.)
+            
+        Returns:
+            Dictionary with transcription results including text and segments
+        """
+        print(f"Transcribing audio file: {audio_path}")
+        
+        if not Path(audio_path).exists():
+            raise FileNotFoundError(f"Audio file not found: {audio_path}")
+        
+        result = self.whisper_model.transcribe(
+            audio_path,
+            language=None,  # Auto-detect language
+            verbose=False
+        )
+        
+        print(f"Transcription complete. Length: {len(result['text'])} characters")
+        return result
+    
+    def summarize_text(self, text: str) -> str:
+        """
+        Summarize transcribed text using LLM
+        
+        Args:
+            text: Transcribed text to summarize
+            
+        Returns:
+            Summary text
+        """
+        print("Generating summary using LLM...")
+        
+        system_prompt = f"""You are an assistant that summarizes meeting transcripts.
+Create a structured summary in {self.output_language} with the following points:
+
+1. **Main Topics**: The most important topics discussed
+2. **Decisions**: Decisions that were made
+3. **Action Items**: Tasks and responsibilities
+4. **Next Steps**: Planned next steps
+
+Be precise and concrete. Write your entire response in {self.output_language}."""
+        
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Please summarize this meeting transcript:\n\n{text}"}
+            ],
+            temperature=0.3,
+            max_tokens=2000
+        )
+        
+        summary = response.choices[0].message.content
+        print("Summary generated successfully")
+        return summary
+    
+    def process_meeting(
+        self,
+        audio_path: str,
+        output_dir: Optional[str] = None,
+        save_transcript: bool = True
+    ) -> tuple[str, str]:
+        """
+        Complete pipeline: transcribe and summarize meeting audio
+        
+        Args:
+            audio_path: Path to audio file
+            output_dir: Directory to save outputs (default: same as audio file)
+            save_transcript: Whether to save the full transcript
+            
+        Returns:
+            Tuple of (transcript, summary)
+        """
+        # Transcribe
+        result = self.transcribe_audio(audio_path)
+        transcript = result["text"]
+        
+        # Generate summary
+        summary = self.summarize_text(transcript)
+        
+        # Save outputs if requested
+        if output_dir or save_transcript:
+            audio_file = Path(audio_path)
+            if output_dir:
+                output_path = Path(output_dir)
+            else:
+                output_path = audio_file.parent
+            
+            output_path.mkdir(parents=True, exist_ok=True)
+            base_name = audio_file.stem
+            
+            if save_transcript:
+                transcript_file = output_path / f"{base_name}_transcript.txt"
+                transcript_file.write_text(transcript, encoding="utf-8")
+                print(f"Transcript saved to: {transcript_file}")
+            
+            summary_file = output_path / f"{base_name}_summary.txt"
+            summary_file.write_text(summary, encoding="utf-8")
+            print(f"Summary saved to: {summary_file}")
+        
+        return transcript, summary
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Transcribe and summarize meeting audio files"
+    )
+    parser.add_argument(
+        "audio_file",
+        help="Path to audio file (mp3, wav, m4a, etc.)"
+    )
+    parser.add_argument(
+        "--whisper-model",
+        default="base",
+        choices=["tiny", "base", "small", "medium", "large"],
+        help="Whisper model size (default: base)"
+    )
+    parser.add_argument(
+        "--api-base",
+        default="https://api.openai.com/v1",
+        help="Base URL for OpenAI-compatible API"
+    )
+    parser.add_argument(
+        "--api-key",
+        help="API key (defaults to OPENAI_API_KEY env var)"
+    )
+    parser.add_argument(
+        "--model",
+        default="gpt-4",
+        help="LLM model name (default: gpt-4)"
+    )
+    parser.add_argument(
+        "--language",
+        default="english",
+        help="Output language for the summary (e.g., english, german, spanish) (default: english)"
+    )
+    parser.add_argument(
+        "--output-dir",
+        help="Output directory for transcript and summary"
+    )
+    parser.add_argument(
+        "--no-transcript",
+        action="store_true",
+        help="Don't save the full transcript"
+    )
+    
+    args = parser.parse_args()
+    
+    try:
+        summarizer = MeetingSummarizer(
+            whisper_model=args.whisper_model,
+            api_base_url=args.api_base,
+            api_key=args.api_key,
+            model_name=args.model,
+            output_language=args.language
+        )
+        
+        transcript, summary = summarizer.process_meeting(
+            audio_path=args.audio_file,
+            output_dir=args.output_dir,
+            save_transcript=not args.no_transcript
+        )
+        
+        print("\n" + "=" * 80)
+        print("SUMMARY")
+        print("=" * 80)
+        print(summary)
+        
+    except Exception as e:
+        print(f"Error: {e}")
+        return 1
+    
+    return 0
+
+
+if __name__ == "__main__":
+    exit(main())