Source code for documentation_generator

#!/usr/bin/env python3
"""
Documentation Generator - Creates comprehensive documentation and regeneration scripts
"""

import os
import json
import subprocess
import platform
from pathlib import Path
from typing import Dict, Any, List, Optional
from datetime import datetime



[docs]
class DocumentationGenerator:
    """Generates documentation and regeneration scripts for job application generation"""
    

[docs]
    def __init__(self, base_dir: str = "."):
        self.base_dir = Path(base_dir)

        

[docs]
    def generate_documentation(self, output_dir: Path, generation_info: Dict[str, Any], 
                             ai_content: Dict[str, str], profile_file: Path, 
                             job_file: Path) -> Dict[str, Path]:
        """Generate complete documentation package"""
        
        generated_docs = {}
        
        # Generate README.md
        readme_path = output_dir / "README.md"
        readme_content = self._generate_readme(generation_info, ai_content, profile_file, job_file)
        readme_path.write_text(readme_content, encoding='utf-8')
        generated_docs['README.md'] = readme_path
        
        # Generate regeneration scripts
        if os.getenv("GENERATE_REGENERATION_SCRIPTS", "true").lower() == "true":
            # Linux/macOS script
            script_sh = output_dir / "regenerate.sh"
            script_content_sh = self._generate_regeneration_script_unix(generation_info)
            script_sh.write_text(script_content_sh, encoding='utf-8')
            script_sh.chmod(0o755)  # Make executable
            generated_docs['regenerate.sh'] = script_sh
            
            # Windows script
            script_bat = output_dir / "regenerate.bat"
            script_content_bat = self._generate_regeneration_script_windows(generation_info)
            script_bat.write_text(script_content_bat, encoding='utf-8')
            generated_docs['regenerate.bat'] = script_bat
        
        return generated_docs

    
    def _generate_readme(self, generation_info: Dict[str, Any], ai_content: Dict[str, str], 
                        profile_file: Path, job_file: Path) -> str:
        """Generate comprehensive README.md content"""
        
        gen_info = generation_info.get("generation_info", {})
        ai_stats = generation_info.get("ai_client_stats", {})
        content_info = generation_info.get("generated_content", {})
        
        # Calculate content statistics
        total_chars = sum(content_info.get(f"{key}_length", 0) for key in ai_content.keys())
        total_words = sum(content_info.get(f"{key}_words", 0) for key in ai_content.keys())
        
        # Get system information
        system_info = self._get_system_info()
        
        readme_content = f"""# Job Application Generation Report

**Generated on:** {gen_info.get('timestamp', 'Unknown')}  
**AI Provider:** {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})  
**Output Folder:** `{gen_info.get('client_folder', 'Unknown')}`

---

## 📁 Generated Documents

This folder contains a complete German job application generated using AI:

- **📄 anschreiben.md** - Cover letter (Anschreiben)
- **📄 lebenslauf.md** - CV/Resume (Lebenslauf)  
- **📄 anlagen.md** - Attachments list (Anlagen)
- **📁 pdf/** - PDF versions of all documents
- **📊 generation_info.json** - Technical metadata
- **📜 generation.log** - Detailed generation log

---

## 🔄 Reproduction Instructions

To regenerate this exact application with the same configuration:

### Quick Start
```bash
# Linux/macOS
./regenerate.sh

# Windows
regenerate.bat
```

### Manual Reproduction
```bash
# Set environment variables
export AI_PROVIDER="{ai_stats.get('provider', 'auto')}"
export OUTPUT_STRUCTURE="by_model"
{self._get_env_vars_for_readme(gen_info)}

# Navigate to project root and run generation
cd {self.base_dir.resolve()}
make generate
```

---

## 📊 Input Files Used

- **Profile:** `{profile_file.name}`
- **Job Description:** `{job_file.name}`
- **Generation Method:** {self._get_generation_method(ai_stats)}

---

## 🤖 AI Content Analysis

| Content Type | Characters | Words | Description |
|--------------|------------|-------|-------------|
{self._generate_content_table(ai_content, content_info)}
| **Total** | **{total_chars:,}** | **{total_words:,}** | **Complete application** |

### Content Quality Indicators
- **AI Provider Available:** {'✅ Yes' if ai_stats.get('available', False) else '❌ No (using fallback)'}
- **Content Cached:** {'✅ Yes (from previous generation)' if self._is_cached_content(ai_stats) else '❌ No (freshly generated)'}
- **Generation Time:** {content_info.get('generation_time', 'Unknown')}

---

## ⚙️ System Requirements

### Required Dependencies
```bash
# Python packages
pip install -r requirements.txt

# System dependencies (for PDF generation)
{self._get_system_dependencies()}
```

### AI Provider Setup
{self._get_ai_provider_setup(ai_stats.get('provider', 'unknown'))}

---

## 🖥️ System Information

- **Operating System:** {system_info.get('os', 'Unknown')}
- **Python Version:** {system_info.get('python_version', 'Unknown')}
- **Generation Host:** {system_info.get('hostname', 'Unknown')}
- **Working Directory:** `{system_info.get('cwd', 'Unknown')}`

---

## 🔧 Troubleshooting

### Common Issues

**AI Provider Not Available**
```bash
# Check AI provider status
make test-providers

# Install missing dependencies
pip install anthropic  # For Claude
# OR install Ollama for local Llama models
```

**PDF Generation Failed**
```bash
# Install system dependencies
# macOS: brew install pango
# Ubuntu: sudo apt-get install libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0
```

**Permission Denied on Scripts**
```bash
# Make regeneration script executable
chmod +x regenerate.sh
```

### Validation
To verify this generation matches the original:
```bash
# Run regeneration test
python tests/test_regeneration.py --target="{gen_info.get('client_folder', 'unknown')}"
```

---

## 📈 Quality Metrics

- **Content Completeness:** {self._calculate_completeness_score(ai_content)}%
- **Template Coverage:** {len(ai_content)}/5 sections generated
- **File Generation:** {'✅ Success' if len(ai_content) > 0 else '❌ Failed'}

---

*Generated by [Bewerbung Generator](https://github.com/thsetz/Bewerbung) v1.0*  
*Documentation auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*
"""
        
        return readme_content
    
    def _generate_regeneration_script_unix(self, generation_info: Dict[str, Any]) -> str:
        """Generate Unix/Linux regeneration script"""
        
        gen_info = generation_info.get("generation_info", {})
        ai_stats = generation_info.get("ai_client_stats", {})
        
        script_content = f"""#!/bin/bash
# Auto-generated regeneration script for job application
# Created: {gen_info.get('timestamp', 'Unknown')}
# AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})

set -e  # Exit on any error

echo "🔄 Regenerating job application with same configuration..."
echo "📊 Original generation: {gen_info.get('timestamp', 'Unknown')}"
echo "🤖 AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})"

# Color output
RED='\\033[0;31m'
GREEN='\\033[0;32m'
YELLOW='\\033[1;33m'
NC='\\033[0m' # No Color

# Check if we're in the right directory
if [ ! -f "Makefile" ] || [ ! -d "src" ]; then
    echo "${{RED}}❌ Error: Not in project root directory${{NC}}"
    echo "Please run this script from the Bewerbung project root"
    exit 1
fi

# Check dependencies
echo "🔍 Checking dependencies..."

# Check Python
if ! command -v python3 &> /dev/null; then
    echo "${{RED}}❌ Python3 not found${{NC}}"
    exit 1
fi

# Check virtual environment
if [ ! -d ".venv" ]; then
    echo "${{YELLOW}}⚠️  Virtual environment not found, creating one...${{NC}}"
    python3 -m venv .venv
fi

# Activate virtual environment
source .venv/bin/activate

# Install dependencies
echo "📦 Installing dependencies..."
pip install -r requirements.txt

# Set environment variables for exact reproduction
{self._generate_env_vars_section(generation_info)}

# Check AI provider availability
echo "🤖 Checking AI provider availability..."
make test-providers || echo "${{YELLOW}}⚠️  Some AI providers may not be available${{NC}}"

# Run generation
echo "🚀 Starting generation..."
make generate

echo "${{GREEN}}✅ Regeneration completed successfully!${{NC}}"
echo "📁 Check output in: Ausgabe/"
echo "🔍 Compare with original using: python tests/test_regeneration.py"
"""
        
        return script_content
    
    def _generate_regeneration_script_windows(self, generation_info: Dict[str, Any]) -> str:
        """Generate Windows batch regeneration script"""
        
        gen_info = generation_info.get("generation_info", {})
        ai_stats = generation_info.get("ai_client_stats", {})
        
        script_content = f"""@echo off
REM Auto-generated regeneration script for job application
REM Created: {gen_info.get('timestamp', 'Unknown')}
REM AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})

echo 🔄 Regenerating job application with same configuration...
echo 📊 Original generation: {gen_info.get('timestamp', 'Unknown')}
echo 🤖 AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})

REM Check if we're in the right directory
if not exist "Makefile" (
    echo ❌ Error: Not in project root directory
    echo Please run this script from the Bewerbung project root
    exit /b 1
)

REM Check Python
python --version >nul 2>&1
if errorlevel 1 (
    echo ❌ Python not found
    exit /b 1
)

REM Check virtual environment
if not exist ".venv" (
    echo ⚠️  Virtual environment not found, creating one...
    python -m venv .venv
)

REM Activate virtual environment
call .venv\\Scripts\\activate.bat

REM Install dependencies
echo 📦 Installing dependencies...
pip install -r requirements.txt

REM Set environment variables for exact reproduction
{self._generate_env_vars_section_windows(generation_info)}

REM Run generation
echo 🚀 Starting generation...
make generate

echo ✅ Regeneration completed successfully!
echo 📁 Check output in: Ausgabe/
echo 🔍 Compare with original using: python tests/test_regeneration.py

pause
"""
        
        return script_content
    
    def _generate_env_vars_section(self, generation_info: Dict[str, Any]) -> str:
        """Generate environment variables section for Unix script"""
        gen_info = generation_info.get("generation_info", {})
        
        provider = gen_info.get("ai_provider", "auto")
        model = gen_info.get("ai_model", "")
        
        env_vars = [
            f'export AI_PROVIDER="{provider}"',
            'export OUTPUT_STRUCTURE="by_model"',
            'export INCLUDE_GENERATION_METADATA="true"'
        ]
        
        # Add provider-specific variables
        if provider == "llama":
            env_vars.append(f'export LLAMA_MODEL="{model}"')
        
        return "\n".join(env_vars)
    
    def _generate_env_vars_section_windows(self, generation_info: Dict[str, Any]) -> str:
        """Generate environment variables section for Windows script"""
        gen_info = generation_info.get("generation_info", {})
        
        provider = gen_info.get("ai_provider", "auto")
        model = gen_info.get("ai_model", "")
        
        env_vars = [
            f'set AI_PROVIDER={provider}',
            'set OUTPUT_STRUCTURE=by_model',
            'set INCLUDE_GENERATION_METADATA=true'
        ]
        
        # Add provider-specific variables
        if provider == "llama":
            env_vars.append(f'set LLAMA_MODEL={model}')
        
        return "\n".join(env_vars)
    
    def _generate_content_table(self, ai_content: Dict[str, str], content_info: Dict[str, Any]) -> str:
        """Generate markdown table of content analysis"""
        
        content_descriptions = {
            'einstiegstext': 'Opening paragraph introducing interest',
            'fachliche_passung': 'Technical qualifications and experience',
            'motivationstext': 'Motivation and enthusiasm for role',
            'mehrwert': 'Value proposition and achievements',
            'abschlusstext': 'Professional closing and call to action'
        }
        
        table_rows = []
        for key, content in ai_content.items():
            chars = content_info.get(f"{key}_length", len(content))
            words = content_info.get(f"{key}_words", len(content.split()))
            description = content_descriptions.get(key, 'Generated content section')
            
            table_rows.append(f"| {key.replace('_', ' ').title()} | {chars:,} | {words:,} | {description} |")
        
        return "\n".join(table_rows)
    
    def _get_system_info(self) -> Dict[str, str]:
        """Get system information for documentation"""
        import socket
        import sys
        
        return {
            'os': f"{platform.system()} {platform.release()}",
            'python_version': f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
            'hostname': socket.gethostname(),
            'cwd': str(Path.cwd())
        }
    
    def _get_generation_method(self, ai_stats: Dict[str, Any]) -> str:
        """Determine how content was generated"""
        if not ai_stats.get('available', False):
            return "Sample content (AI not available)"
        elif ai_stats.get('cache_enabled', False):
            return "AI generated (may use cache)"
        else:
            return "AI generated (fresh)"
    
    def _is_cached_content(self, ai_stats: Dict[str, Any]) -> bool:
        """Check if content was likely cached"""
        return ai_stats.get('cache_enabled', False)
    
    def _get_system_dependencies(self) -> str:
        """Get system dependencies based on platform"""
        system = platform.system()
        
        if system == "Darwin":  # macOS
            return "brew install pango"
        elif system == "Linux":
            return "sudo apt-get install libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0"
        else:  # Windows
            return "# Windows: Dependencies included with WeasyPrint"
    
    def _get_ai_provider_setup(self, provider: str) -> str:
        """Get setup instructions for specific AI provider"""
        
        setups = {
            'claude': """**Claude API Setup:**
```bash
# Get API key from https://console.anthropic.com/
# Add to .env.local:
echo "ANTHROPIC_API_KEY=your_api_key_here" >> .env.local
```""",
            
            'llama': """**Ollama Setup:**
```bash
# Install Ollama
curl -fsSL https://ollama.ai/install.sh | sh

# Start Ollama service
ollama serve

# Pull recommended model
ollama pull llama3.2:latest
```""",
            
            'sample': """**Sample Content:**
No additional setup required. Uses built-in sample content."""
        }
        
        return setups.get(provider, "Unknown AI provider")
    
    def _get_env_vars_for_readme(self, gen_info: Dict[str, Any]) -> str:
        """Get environment variables for README reproduction section"""
        env_vars = []
        
        # Add model-specific variables if needed
        model = gen_info.get('ai_model', '')
        if 'llama' in gen_info.get('ai_provider', '').lower():
            env_vars.append(f'export LLAMA_MODEL="{model}"')
        
        if env_vars:
            return "\n".join(env_vars)
        else:
            return "# No additional environment variables needed"
    
    def _calculate_completeness_score(self, ai_content: Dict[str, str]) -> int:
        """Calculate content completeness percentage"""
        expected_sections = ['einstiegstext', 'fachliche_passung', 'motivationstext', 'mehrwert', 'abschlusstext']
        completed_sections = len([key for key in expected_sections if key in ai_content and ai_content[key].strip()])
        
        return int((completed_sections / len(expected_sections)) * 100)