#!/usr/bin/env python3
"""
Documentation Generator - Creates comprehensive documentation and regeneration scripts
"""
import os
import json
import subprocess
import platform
from pathlib import Path
from typing import Dict, Any, List, Optional
from datetime import datetime
[docs]
class DocumentationGenerator:
"""Generates documentation and regeneration scripts for job application generation"""
[docs]
def __init__(self, base_dir: str = "."):
self.base_dir = Path(base_dir)
[docs]
def generate_documentation(self, output_dir: Path, generation_info: Dict[str, Any],
ai_content: Dict[str, str], profile_file: Path,
job_file: Path) -> Dict[str, Path]:
"""Generate complete documentation package"""
generated_docs = {}
# Generate README.md
readme_path = output_dir / "README.md"
readme_content = self._generate_readme(generation_info, ai_content, profile_file, job_file)
readme_path.write_text(readme_content, encoding='utf-8')
generated_docs['README.md'] = readme_path
# Generate regeneration scripts
if os.getenv("GENERATE_REGENERATION_SCRIPTS", "true").lower() == "true":
# Linux/macOS script
script_sh = output_dir / "regenerate.sh"
script_content_sh = self._generate_regeneration_script_unix(generation_info)
script_sh.write_text(script_content_sh, encoding='utf-8')
script_sh.chmod(0o755) # Make executable
generated_docs['regenerate.sh'] = script_sh
# Windows script
script_bat = output_dir / "regenerate.bat"
script_content_bat = self._generate_regeneration_script_windows(generation_info)
script_bat.write_text(script_content_bat, encoding='utf-8')
generated_docs['regenerate.bat'] = script_bat
return generated_docs
def _generate_readme(self, generation_info: Dict[str, Any], ai_content: Dict[str, str],
profile_file: Path, job_file: Path) -> str:
"""Generate comprehensive README.md content"""
gen_info = generation_info.get("generation_info", {})
ai_stats = generation_info.get("ai_client_stats", {})
content_info = generation_info.get("generated_content", {})
# Calculate content statistics
total_chars = sum(content_info.get(f"{key}_length", 0) for key in ai_content.keys())
total_words = sum(content_info.get(f"{key}_words", 0) for key in ai_content.keys())
# Get system information
system_info = self._get_system_info()
readme_content = f"""# Job Application Generation Report
**Generated on:** {gen_info.get('timestamp', 'Unknown')}
**AI Provider:** {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})
**Output Folder:** `{gen_info.get('client_folder', 'Unknown')}`
---
## 📁 Generated Documents
This folder contains a complete German job application generated using AI:
- **📄 anschreiben.md** - Cover letter (Anschreiben)
- **📄 lebenslauf.md** - CV/Resume (Lebenslauf)
- **📄 anlagen.md** - Attachments list (Anlagen)
- **📁 pdf/** - PDF versions of all documents
- **📊 generation_info.json** - Technical metadata
- **📜 generation.log** - Detailed generation log
---
## 🔄 Reproduction Instructions
To regenerate this exact application with the same configuration:
### Quick Start
```bash
# Linux/macOS
./regenerate.sh
# Windows
regenerate.bat
```
### Manual Reproduction
```bash
# Set environment variables
export AI_PROVIDER="{ai_stats.get('provider', 'auto')}"
export OUTPUT_STRUCTURE="by_model"
{self._get_env_vars_for_readme(gen_info)}
# Navigate to project root and run generation
cd {self.base_dir.resolve()}
make generate
```
---
## 📊 Input Files Used
- **Profile:** `{profile_file.name}`
- **Job Description:** `{job_file.name}`
- **Generation Method:** {self._get_generation_method(ai_stats)}
---
## 🤖 AI Content Analysis
| Content Type | Characters | Words | Description |
|--------------|------------|-------|-------------|
{self._generate_content_table(ai_content, content_info)}
| **Total** | **{total_chars:,}** | **{total_words:,}** | **Complete application** |
### Content Quality Indicators
- **AI Provider Available:** {'✅ Yes' if ai_stats.get('available', False) else '❌ No (using fallback)'}
- **Content Cached:** {'✅ Yes (from previous generation)' if self._is_cached_content(ai_stats) else '❌ No (freshly generated)'}
- **Generation Time:** {content_info.get('generation_time', 'Unknown')}
---
## ⚙️ System Requirements
### Required Dependencies
```bash
# Python packages
pip install -r requirements.txt
# System dependencies (for PDF generation)
{self._get_system_dependencies()}
```
### AI Provider Setup
{self._get_ai_provider_setup(ai_stats.get('provider', 'unknown'))}
---
## 🖥️ System Information
- **Operating System:** {system_info.get('os', 'Unknown')}
- **Python Version:** {system_info.get('python_version', 'Unknown')}
- **Generation Host:** {system_info.get('hostname', 'Unknown')}
- **Working Directory:** `{system_info.get('cwd', 'Unknown')}`
---
## 🔧 Troubleshooting
### Common Issues
**AI Provider Not Available**
```bash
# Check AI provider status
make test-providers
# Install missing dependencies
pip install anthropic # For Claude
# OR install Ollama for local Llama models
```
**PDF Generation Failed**
```bash
# Install system dependencies
# macOS: brew install pango
# Ubuntu: sudo apt-get install libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0
```
**Permission Denied on Scripts**
```bash
# Make regeneration script executable
chmod +x regenerate.sh
```
### Validation
To verify this generation matches the original:
```bash
# Run regeneration test
python tests/test_regeneration.py --target="{gen_info.get('client_folder', 'unknown')}"
```
---
## 📈 Quality Metrics
- **Content Completeness:** {self._calculate_completeness_score(ai_content)}%
- **Template Coverage:** {len(ai_content)}/5 sections generated
- **File Generation:** {'✅ Success' if len(ai_content) > 0 else '❌ Failed'}
---
*Generated by [Bewerbung Generator](https://github.com/thsetz/Bewerbung) v1.0*
*Documentation auto-generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*
"""
return readme_content
def _generate_regeneration_script_unix(self, generation_info: Dict[str, Any]) -> str:
"""Generate Unix/Linux regeneration script"""
gen_info = generation_info.get("generation_info", {})
ai_stats = generation_info.get("ai_client_stats", {})
script_content = f"""#!/bin/bash
# Auto-generated regeneration script for job application
# Created: {gen_info.get('timestamp', 'Unknown')}
# AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})
set -e # Exit on any error
echo "🔄 Regenerating job application with same configuration..."
echo "📊 Original generation: {gen_info.get('timestamp', 'Unknown')}"
echo "🤖 AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})"
# Color output
RED='\\033[0;31m'
GREEN='\\033[0;32m'
YELLOW='\\033[1;33m'
NC='\\033[0m' # No Color
# Check if we're in the right directory
if [ ! -f "Makefile" ] || [ ! -d "src" ]; then
echo "${{RED}}❌ Error: Not in project root directory${{NC}}"
echo "Please run this script from the Bewerbung project root"
exit 1
fi
# Check dependencies
echo "🔍 Checking dependencies..."
# Check Python
if ! command -v python3 &> /dev/null; then
echo "${{RED}}❌ Python3 not found${{NC}}"
exit 1
fi
# Check virtual environment
if [ ! -d ".venv" ]; then
echo "${{YELLOW}}⚠️ Virtual environment not found, creating one...${{NC}}"
python3 -m venv .venv
fi
# Activate virtual environment
source .venv/bin/activate
# Install dependencies
echo "📦 Installing dependencies..."
pip install -r requirements.txt
# Set environment variables for exact reproduction
{self._generate_env_vars_section(generation_info)}
# Check AI provider availability
echo "🤖 Checking AI provider availability..."
make test-providers || echo "${{YELLOW}}⚠️ Some AI providers may not be available${{NC}}"
# Run generation
echo "🚀 Starting generation..."
make generate
echo "${{GREEN}}✅ Regeneration completed successfully!${{NC}}"
echo "📁 Check output in: Ausgabe/"
echo "🔍 Compare with original using: python tests/test_regeneration.py"
"""
return script_content
def _generate_regeneration_script_windows(self, generation_info: Dict[str, Any]) -> str:
"""Generate Windows batch regeneration script"""
gen_info = generation_info.get("generation_info", {})
ai_stats = generation_info.get("ai_client_stats", {})
script_content = f"""@echo off
REM Auto-generated regeneration script for job application
REM Created: {gen_info.get('timestamp', 'Unknown')}
REM AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})
echo 🔄 Regenerating job application with same configuration...
echo 📊 Original generation: {gen_info.get('timestamp', 'Unknown')}
echo 🤖 AI Provider: {ai_stats.get('provider', 'Unknown')} ({ai_stats.get('model', 'Unknown')})
REM Check if we're in the right directory
if not exist "Makefile" (
echo ❌ Error: Not in project root directory
echo Please run this script from the Bewerbung project root
exit /b 1
)
REM Check Python
python --version >nul 2>&1
if errorlevel 1 (
echo ❌ Python not found
exit /b 1
)
REM Check virtual environment
if not exist ".venv" (
echo ⚠️ Virtual environment not found, creating one...
python -m venv .venv
)
REM Activate virtual environment
call .venv\\Scripts\\activate.bat
REM Install dependencies
echo 📦 Installing dependencies...
pip install -r requirements.txt
REM Set environment variables for exact reproduction
{self._generate_env_vars_section_windows(generation_info)}
REM Run generation
echo 🚀 Starting generation...
make generate
echo ✅ Regeneration completed successfully!
echo 📁 Check output in: Ausgabe/
echo 🔍 Compare with original using: python tests/test_regeneration.py
pause
"""
return script_content
def _generate_env_vars_section(self, generation_info: Dict[str, Any]) -> str:
"""Generate environment variables section for Unix script"""
gen_info = generation_info.get("generation_info", {})
provider = gen_info.get("ai_provider", "auto")
model = gen_info.get("ai_model", "")
env_vars = [
f'export AI_PROVIDER="{provider}"',
'export OUTPUT_STRUCTURE="by_model"',
'export INCLUDE_GENERATION_METADATA="true"'
]
# Add provider-specific variables
if provider == "llama":
env_vars.append(f'export LLAMA_MODEL="{model}"')
return "\n".join(env_vars)
def _generate_env_vars_section_windows(self, generation_info: Dict[str, Any]) -> str:
"""Generate environment variables section for Windows script"""
gen_info = generation_info.get("generation_info", {})
provider = gen_info.get("ai_provider", "auto")
model = gen_info.get("ai_model", "")
env_vars = [
f'set AI_PROVIDER={provider}',
'set OUTPUT_STRUCTURE=by_model',
'set INCLUDE_GENERATION_METADATA=true'
]
# Add provider-specific variables
if provider == "llama":
env_vars.append(f'set LLAMA_MODEL={model}')
return "\n".join(env_vars)
def _generate_content_table(self, ai_content: Dict[str, str], content_info: Dict[str, Any]) -> str:
"""Generate markdown table of content analysis"""
content_descriptions = {
'einstiegstext': 'Opening paragraph introducing interest',
'fachliche_passung': 'Technical qualifications and experience',
'motivationstext': 'Motivation and enthusiasm for role',
'mehrwert': 'Value proposition and achievements',
'abschlusstext': 'Professional closing and call to action'
}
table_rows = []
for key, content in ai_content.items():
chars = content_info.get(f"{key}_length", len(content))
words = content_info.get(f"{key}_words", len(content.split()))
description = content_descriptions.get(key, 'Generated content section')
table_rows.append(f"| {key.replace('_', ' ').title()} | {chars:,} | {words:,} | {description} |")
return "\n".join(table_rows)
def _get_system_info(self) -> Dict[str, str]:
"""Get system information for documentation"""
import socket
import sys
return {
'os': f"{platform.system()} {platform.release()}",
'python_version': f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
'hostname': socket.gethostname(),
'cwd': str(Path.cwd())
}
def _get_generation_method(self, ai_stats: Dict[str, Any]) -> str:
"""Determine how content was generated"""
if not ai_stats.get('available', False):
return "Sample content (AI not available)"
elif ai_stats.get('cache_enabled', False):
return "AI generated (may use cache)"
else:
return "AI generated (fresh)"
def _is_cached_content(self, ai_stats: Dict[str, Any]) -> bool:
"""Check if content was likely cached"""
return ai_stats.get('cache_enabled', False)
def _get_system_dependencies(self) -> str:
"""Get system dependencies based on platform"""
system = platform.system()
if system == "Darwin": # macOS
return "brew install pango"
elif system == "Linux":
return "sudo apt-get install libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0"
else: # Windows
return "# Windows: Dependencies included with WeasyPrint"
def _get_ai_provider_setup(self, provider: str) -> str:
"""Get setup instructions for specific AI provider"""
setups = {
'claude': """**Claude API Setup:**
```bash
# Get API key from https://console.anthropic.com/
# Add to .env.local:
echo "ANTHROPIC_API_KEY=your_api_key_here" >> .env.local
```""",
'llama': """**Ollama Setup:**
```bash
# Install Ollama
curl -fsSL https://ollama.ai/install.sh | sh
# Start Ollama service
ollama serve
# Pull recommended model
ollama pull llama3.2:latest
```""",
'sample': """**Sample Content:**
No additional setup required. Uses built-in sample content."""
}
return setups.get(provider, "Unknown AI provider")
def _get_env_vars_for_readme(self, gen_info: Dict[str, Any]) -> str:
"""Get environment variables for README reproduction section"""
env_vars = []
# Add model-specific variables if needed
model = gen_info.get('ai_model', '')
if 'llama' in gen_info.get('ai_provider', '').lower():
env_vars.append(f'export LLAMA_MODEL="{model}"')
if env_vars:
return "\n".join(env_vars)
else:
return "# No additional environment variables needed"
def _calculate_completeness_score(self, ai_content: Dict[str, str]) -> int:
"""Calculate content completeness percentage"""
expected_sections = ['einstiegstext', 'fachliche_passung', 'motivationstext', 'mehrwert', 'abschlusstext']
completed_sections = len([key for key in expected_sections if key in ai_content and ai_content[key].strip()])
return int((completed_sections / len(expected_sections)) * 100)