Source code for bewerbung_generator

#!/usr/bin/env python3
"""
Bewerbung Generator - Generates German job applications from profiles and job descriptions
"""

import os
import re
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional, Tuple, Dict


[docs]
class BewerbungGenerator:

[docs]
    def __init__(self, base_dir: str = "."):
        self.base_dir = Path(base_dir)
        self.profil_dir = self.base_dir / "profil"
        self.stellenbeschreibung_dir = self.base_dir / "Stellenbeschreibung"
        self.ausgabe_dir = self.base_dir / "Ausgabe"
        self.logger = None  # Will be set up when generation starts

    

[docs]
    def setup_logging(self, output_dir: Path) -> logging.Logger:
        """Setup structured logging for the generation process"""
        log_file = output_dir / "generation.log"
        
        # Create logger
        logger = logging.getLogger(f'bewerbung_generator_{id(self)}')
        logger.setLevel(logging.INFO)
        
        # Clear any existing handlers
        logger.handlers.clear()
        
        # Create formatters
        file_formatter = logging.Formatter(
            '%(asctime)s | %(filename)-8s.%(funcName)-8s:%(lineno)d | %(levelname)-8s | %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        
        console_formatter = logging.Formatter(
            '%(levelname)-8s | %(message)s'
        )
        
        # File handler
        file_handler = logging.FileHandler(log_file, encoding='utf-8')
        file_handler.setLevel(logging.INFO)
        file_handler.setFormatter(file_formatter)
        logger.addHandler(file_handler)
        
        # Console handler for errors/warnings
        console_handler = logging.StreamHandler()
        console_handler.setLevel(logging.WARNING)
        console_handler.setFormatter(console_formatter)
        logger.addHandler(console_handler)
        
        # Log initial info
        logger.info("=== Bewerbung Generation Started ===")
        logger.info(f"Output directory: {output_dir}")
        logger.info(f"Base directory: {self.base_dir}")
        logger.info(f"Timestamp: {datetime.now().isoformat()}")
        
        self.logger = logger
        return logger

    
    

[docs]
    def get_newest_file_by_date_pattern(self, directory: Path, pattern: str = r"(\d{8})_.*") -> Optional[Path]:
        """
        Find the newest file in directory based on YYYYMMDD date pattern
        """
        if not directory.exists():
            print(f"Directory {directory} does not exist")
            return None
            
        files = []
        for file_path in directory.iterdir():
            if file_path.is_file():
                match = re.match(pattern, file_path.name)
                if match:
                    date_str = match.group(1)
                    files.append((date_str, file_path))
        
        if not files:
            print(f"No files matching pattern found in {directory}")
            return None
            
        # Sort by date string (YYYYMMDD format sorts naturally)
        files.sort(key=lambda x: x[0], reverse=True)
        newest_file = files[0][1]
        
        print(f"Found newest file: {newest_file}")
        return newest_file

    

[docs]
    def read_newest_profile(self) -> Optional[Path]:
        """
        Step 1: Read the newest profile file from profil/ directory
        """
        print("=== Step 1: Reading newest profile ===")
        return self.get_newest_file_by_date_pattern(self.profil_dir, r"(\d{8})_.*\.pdf")

    

[docs]
    def read_newest_job_description(self) -> Optional[Path]:
        """
        Step 2: Read the newest job description from Stellenbeschreibung/ directory
        """
        print("=== Step 2: Reading newest job description ===")
        return self.get_newest_file_by_date_pattern(self.stellenbeschreibung_dir, r"(\d{8})_.*\.txt")

    

[docs]
    def extract_file_identifiers(self, profile_file: Path, job_file: Path) -> Tuple[str, str, str, str]:
        """
        Extract date and identifier parts from profile and job filenames
        Returns: (profile_date, profile_id, job_date, job_id)
        """
        # Extract profile parts
        profile_match = re.match(r"(\d{8})_(.*)\.pdf", profile_file.name)
        if not profile_match:
            raise ValueError(f"Invalid profile filename format: {profile_file.name}")
        profile_date, profile_id = profile_match.groups()
        
        # Extract job parts  
        job_match = re.match(r"(\d{8})_(.*)\.txt", job_file.name)
        if not job_match:
            raise ValueError(f"Invalid job filename format: {job_file.name}")
        job_date, job_id = job_match.groups()
        
        return profile_date, profile_id, job_date, job_id

    

[docs]
    def create_output_directory(self, profile_file: Path, job_file: Path) -> Path:
        """
        Step 3: Create output directory with proper naming pattern
        Pattern: {job_date}_{job_id}-{profile_date}_{profile_id}
        """
        print("=== Step 3: Creating output directory ===")
        
        profile_date, profile_id, job_date, job_id = self.extract_file_identifiers(profile_file, job_file)
        
        # Create output directory name
        output_dir_name = f"{job_date}_{job_id}-{profile_date}_{profile_id}"
        output_path = self.ausgabe_dir / output_dir_name
        
        # Create Ausgabe directory if it doesn't exist
        self.ausgabe_dir.mkdir(exist_ok=True)
        
        # Create output directory
        output_path.mkdir(exist_ok=True)
        
        print(f"Created output directory: {output_path}")
        return output_path

    

[docs]
    def generate_application_documents(self, output_dir: Path, profile_file: Path, job_file: Path) -> Dict[str, Path]:
        """
        Step 4: Generate application documents (cover letter, CV, attachments) with AI content
        
        Args:
            output_dir: Directory for generated documents
            profile_file: Path to profile file
            job_file: Path to job description file  
        """
        # Import AI classes locally to avoid import issues
        try:
            import sys
            sys.path.insert(0, str(Path(__file__).parent))
            from template_manager import TemplateManager
            from ai_client_factory import AIClientFactory
        except ImportError as e:
            print(f"Error importing AI modules: {e}")
            print("Falling back to basic document generation")
            return self._generate_basic_documents(output_dir, profile_file, job_file)
        
        # Initialize managers
        template_manager = TemplateManager(str(self.base_dir))
        ai_factory = AIClientFactory(str(self.base_dir))
        
        # Determine output structure and multi-provider generation
        output_structure = os.getenv("OUTPUT_STRUCTURE", "by_model").lower()
        include_metadata = os.getenv("INCLUDE_GENERATION_METADATA", "false").lower() == "true"
        generate_all_providers = os.getenv("GENERATE_ALL_PROVIDERS", "true").lower() == "true"
        
        # Get AI clients based on generation mode
        if generate_all_providers:
            ai_clients = ai_factory.get_all_available_clients()
            print(f"🔄 Multi-provider generation enabled: {len(ai_clients)} providers")
        else:
            ai_client = ai_factory.create_client()
            ai_clients = [ai_client]
            print(f"📁 Single-provider generation: {ai_client.get_client_model_folder()}")
        
        # Read input content once for all providers
        job_content = job_file.read_text(encoding='utf-8')
        # profile_content = f"Profile: {profile_file.name}"  # Placeholder for actual profile content

        with open(os.path.join(profile_file.parent, profile_file.stem + ".txt"), 'r', encoding='utf-8') as pf:
            profile_content = pf.read()
            print(f"📄 Used Profile: {profile_content[:50]}"   )


        # Initialize generated files collection
        generated_files = {}
        
        # Process each AI client
        for ai_client in ai_clients:
            client_model_folder = ai_client.get_client_model_folder()
            model_output_dir = output_dir / client_model_folder
            model_output_dir.mkdir(parents=True, exist_ok=True)
            print(f"📁 Processing provider: {client_model_folder}")
            
            # Setup logging for this provider
            logger = self.setup_logging(model_output_dir)
            logger.info(f"Output structure: {output_structure}")
            logger.info(f"Model output directory: {model_output_dir}")
            logger.info(f"AI client: {ai_client.__class__.__name__}")
            logger.info(f"AI model: {ai_client.get_model_name()}")
            logger.info(f"Include metadata: {include_metadata}")
            logger.info(f"Reading job description: {job_file.name}")
            logger.info(f"Reading profile: {profile_file.name}")
            
            # Extract company and position info for this provider
            logger.info("Extracting company and position information")
            if ai_client.is_available():
                company_info = ai_client.extract_company_and_position(job_content)
                company_name = company_info['company_name']
                position_title = company_info['position_title']
                logger.info(f"AI extraction successful: {company_name} - {position_title}")
            else:
                company_name = "Beispiel Unternehmen GmbH"
                position_title = "Software Engineer"
                company_info = {
                    'company_name': company_name,
                    'position_title': position_title,
                    'adressat_firma': company_name,
                    'adressat_strasse': 'Musterstraße 1',
                    'adressat_plz_ort': '12345 Musterstadt',
                    'adressat_land': 'Deutschland'
                }
                logger.info(f"Using fallback company info: {company_name} - {position_title}")
            
            print(f"Provider {client_model_folder}: {company_name}, {position_title}")
            
            # Generate AI content for cover letter for this provider
            logger.info("Starting AI content generation")
            if ai_client.is_available():
                print("Generating AI content...")
                logger.info("AI provider available - generating personalized content")
                ai_content = ai_client.generate_all_cover_letter_content(
                    job_description=job_content,
                    profile_content=profile_content,
                    company_name=company_name,
                    position_title=position_title
                )
                logger.info("AI content generation completed successfully")
            else:
                print("Using sample AI content...")
                logger.warning("AI provider not available - using sample content")
                from ai_content_generator import generate_sample_ai_content
                ai_content = generate_sample_ai_content()
                logger.info("Sample content loaded")
            
            # Addressee data for cover letter (lowercase for dynamic content)
            adressat_data = {
                'position': position_title,
            }
            
            # Set Adressat and job variables as environment variables (uppercase for template)
            os.environ['ADRESSAT_FIRMA'] = company_info.get('adressat_firma', company_name)
            os.environ['ADRESSAT_STRASSE'] = company_info.get('adressat_strasse', '')
            os.environ['ADRESSAT_PLZ_ORT'] = company_info.get('adressat_plz_ort', '')
            os.environ['ADRESSAT_LAND'] = company_info.get('adressat_land', 'Deutschland')
            os.environ['STELLE'] = company_info.get('stelle', position_title)
            os.environ['STELLEN_ID'] = company_info.get('stellen_id', '')
            
            logger.info(f"Adressat: {os.environ['ADRESSAT_FIRMA']}")
            logger.info(f"Stelle: {os.environ['STELLE']}")
            logger.info(f"Stellen-ID: {os.environ['STELLEN_ID']}")
            
            print(f"Adressat: {os.environ['ADRESSAT_FIRMA']}")
            print(f"Stelle: {os.environ['STELLE']}")
            print(f"Stellen-ID: {os.environ['STELLEN_ID']}")
            
            try:
                # Generate documents for this provider
                logger.info("Starting document generation")
                print("Rendering cover letter...")
                logger.info("Rendering cover letter template")
                anschreiben_md = template_manager.render_anschreiben(adressat_data, ai_content)
                
                print("Rendering CV...")
                logger.info("Rendering CV template")
                lebenslauf_md = template_manager.render_lebenslauf()
                
                print("Generating attachments list...")
                logger.info("Generating attachments list")
                attachments_content = self._generate_attachments_list(profile_file)
                
                # Save to model-specific directory (directory-only structure)
                logger.info(f"Saving documents to model directory: {model_output_dir}")
                self._save_documents_to_directory(
                    model_output_dir, 
                    anschreiben_md, 
                    lebenslauf_md, 
                    attachments_content,
                    template_manager
                )
                generated_files[f'{client_model_folder}_output_dir'] = model_output_dir
                logger.info("Documents saved to model directory successfully")
                
                # Generate metadata if requested
                if include_metadata:
                    logger.info("Generating metadata file")
                    metadata = self._generate_metadata(ai_client, job_file, profile_file, ai_content)
                    metadata_path = model_output_dir / "generation_info.json"
                    metadata_path.write_text(json.dumps(metadata, indent=2, ensure_ascii=False), encoding='utf-8')
                    print(f"📊 Generated metadata: {metadata_path}")
                    generated_files[f'{client_model_folder}_metadata'] = metadata_path
                    logger.info(f"Metadata saved: {metadata_path}")
                
                # Generate documentation if requested
                generate_docs = os.getenv("GENERATE_DOCUMENTATION", "true").lower() == "true"
                if generate_docs:
                    logger.info("Starting documentation generation")
                    try:
                        from documentation_generator import DocumentationGenerator
                        doc_generator = DocumentationGenerator(str(self.base_dir))
                        
                        # Use metadata if available, otherwise create basic metadata
                        doc_metadata = metadata if include_metadata else self._generate_metadata(ai_client, job_file, profile_file, ai_content)
                        
                        docs = doc_generator.generate_documentation(
                            model_output_dir, 
                            doc_metadata, 
                            ai_content, 
                            profile_file, 
                            job_file
                        )
                        
                        for doc_name, doc_path in docs.items():
                            generated_files[f'{client_model_folder}_{doc_name}'] = doc_path
                        print(f"📚 Generated documentation: README.md, regeneration scripts")
                        logger.info("Documentation generation completed successfully")
                        
                    except ImportError as e:
                        logger.error(f"Documentation generation failed - ImportError: {e}")
                        print(f"⚠️  Documentation generation failed: {e}")
                    except Exception as e:
                        logger.error(f"Documentation generation error: {e}")
                        print(f"⚠️  Error generating documentation: {e}")
                else:
                    logger.info("Documentation generation skipped (GENERATE_DOCUMENTATION=false)")
                
                logger.info(f"=== Provider {client_model_folder} Generation Completed Successfully ===")
                logger.info(f"Generated files: {[k for k in generated_files.keys() if client_model_folder in k]}")
                print(f"✓ {client_model_folder} documents generated successfully")
                
            except Exception as e:
                logger.error(f"Error during {client_model_folder} generation: {e}")
                print(f"❌ Error generating {client_model_folder} documents: {e}")
                # Continue with next provider
                continue
        
        # Return all generated files from all providers
        print(f"\n✅ Multi-provider generation completed! Generated {len(ai_clients)} provider outputs")
        return generated_files

    
    def _generate_basic_documents(self, output_dir: Path, profile_file: Path, job_file: Path) -> Dict[str, Path]:
        """Fallback method for basic document generation without AI"""
        print("Generating basic documents without AI...")
        
        generated_files = {}
        
        # Basic cover letter
        basic_anschreiben = f"""# Anschreiben

**Max Mustermann**
Musterstraße 123
12345 Berlin

---

Sehr geehrte Damen und Herren,

mit großem Interesse habe ich Ihre Stellenausschreibung gelesen.

Basierend auf der Stellenbeschreibung ({job_file.name}) und meinem Profil ({profile_file.name}) bewerbe ich mich hiermit um die ausgeschriebene Position.

Mit freundlichen Grüßen
Max Mustermann
"""
        
        anschreiben_path = output_dir / "anschreiben.md"
        anschreiben_path.write_text(basic_anschreiben, encoding='utf-8')
        generated_files['anschreiben.md'] = anschreiben_path
        
        # Basic CV
        basic_lebenslauf = f"""# Lebenslauf

**Max Mustermann**

Detaillierte Informationen siehe Profildokument: {profile_file.name}
"""
        
        lebenslauf_path = output_dir / "lebenslauf.md"
        lebenslauf_path.write_text(basic_lebenslauf, encoding='utf-8')
        generated_files['lebenslauf.md'] = lebenslauf_path
        
        # Attachments
        attachments_content = self._generate_attachments_list(profile_file)
        attachments_path = output_dir / "anlagen.md"
        attachments_path.write_text(attachments_content, encoding='utf-8')
        generated_files['anlagen.md'] = attachments_path
        
        return generated_files
    
    def _generate_attachments_list(self, profile_file: Path) -> str:
        """Generate attachments list"""
        return f"""# Anlagen

Die folgenden Dokumente sind dieser Bewerbung beigefügt:

1. Anschreiben
2. Lebenslauf
3. Profildokument: {profile_file.name}
4. Zeugnisse und Zertifikate
5. Referenzen

---

*Hinweis: Das Profildokument enthält detaillierte Informationen zu Qualifikationen und Berufserfahrung.*
"""
    

[docs]
    def create_pdf_directory(self, output_dir: Path) -> Path:
        """
        Step 5: Create pdf/ subdirectory in output directory(ies)
        """
        print("=== Step 5: Creating PDF directory ===")
        
        # Handle directory-only structure - create PDF dirs in all model-specific folders
        pdf_dirs = []
        for item in output_dir.iterdir():
            if item.is_dir() and not item.name.startswith('.'):
                # Check if this looks like a model folder (contains underscores)
                if '_' in item.name:
                    pdf_dir = item / "pdf"
                    pdf_dir.mkdir(parents=True, exist_ok=True)
                    pdf_dirs.append(pdf_dir)
                    print(f"Created PDF directory: {pdf_dir}")
        
        return pdf_dirs[0] if pdf_dirs else output_dir / "pdf"  # Return first one for compatibility

    

[docs]
    def convert_documents_to_pdf(self, markdown_files: Dict[str, Path], pdf_dir: Path) -> Dict[str, Path]:
        """
        Step 6: Convert documents to PDF format in all relevant directories
        """
        print("=== Step 6: Converting documents to PDF ===")
        
        # Import PDF generator locally
        try:
            import sys
            sys.path.insert(0, str(Path(__file__).parent))
            from pdf_generator import PDFGenerator
        except ImportError as e:
            print(f"Error importing PDF generator: {e}")
            return {}
        
        pdf_generator = PDFGenerator(str(self.base_dir))
        
        # Check if PDF generation is available
        validation = pdf_generator.validate_dependencies()
        if not validation['weasyprint']:
            print("⚠️  WeasyPrint not available - PDF generation skipped")
            print("   Install system dependencies: brew install pango")
            return {}
        
        # Find all model-specific directories that contain markdown files (directory-only structure)
        main_output_dir = pdf_dir.parent.parent  # Get back to main output directory 
        conversion_dirs = []
        
        for item in main_output_dir.iterdir():
            if item.is_dir() and '_' in item.name:  # Model folder
                md_files = list(item.glob("*.md"))
                if md_files:
                    pdf_subdir = item / "pdf"
                    pdf_subdir.mkdir(exist_ok=True)
                    conversion_dirs.append((item, pdf_subdir, md_files))
        
        generated_pdfs = {}
        total_converted = 0
        
        for source_dir, target_pdf_dir, md_files in conversion_dirs:
            print(f"Converting files in {source_dir.name}...")
            
            for md_path in md_files:
                try:
                    # Read markdown content
                    markdown_content = md_path.read_text(encoding='utf-8')
                    
                    # Generate PDF filename
                    pdf_name = md_path.name.replace('.md', '.pdf')
                    pdf_path = target_pdf_dir / pdf_name
                    
                    print(f"Converting markdown to PDF: {pdf_path}")
                    
                    # Convert to PDF
                    title = md_path.stem.replace('_', ' ').title()
                    pdf_generator.markdown_to_pdf(markdown_content, pdf_path, title)
                    
                    generated_pdfs[f"{source_dir.name}/{pdf_name}"] = pdf_path
                    total_converted += 1
                    
                    # Also save HTML preview
                    html_name = md_path.name.replace('.md', '.html')
                    html_path = target_pdf_dir / html_name
                    html_content = pdf_generator.markdown_to_html(markdown_content, title)
                    pdf_generator.save_html_preview(html_content, html_path)
                    
                except Exception as e:
                    print(f"Error converting {md_path.name} to PDF: {e}")
                    continue
        
        print(f"✓ Converted {total_converted} documents to PDF")
        return generated_pdfs

    
    def _save_documents_to_directory(self, target_dir: Path, anschreiben_md: str, 
                                   lebenslauf_md: str, attachments_content: str, 
                                   template_manager) -> Dict[str, Path]:
        """Save all documents to a specific directory"""
        generated_files = {}
        
        # Save cover letter
        anschreiben_path = target_dir / "anschreiben.md"
        template_manager.save_rendered_template(anschreiben_md, anschreiben_path)
        generated_files['anschreiben.md'] = anschreiben_path
        
        # Save CV
        lebenslauf_path = target_dir / "lebenslauf.md"
        template_manager.save_rendered_template(lebenslauf_md, lebenslauf_path)
        generated_files['lebenslauf.md'] = lebenslauf_path
        
        # Save attachments
        attachments_path = target_dir / "anlagen.md"
        attachments_path.write_text(attachments_content, encoding='utf-8')
        generated_files['anlagen.md'] = attachments_path
        
        return generated_files
    
    def _generate_metadata(self, ai_client, job_file: Path, profile_file: Path, 
                          ai_content: Dict[str, str]) -> Dict[str, any]:
        """Generate metadata about the content generation process"""
        import time
        from datetime import datetime
        
        metadata = {
            "generation_info": {
                "timestamp": datetime.now().isoformat(),
                "ai_provider": ai_client.get_provider_name(),
                "ai_model": ai_client.get_model_name(),
                "client_folder": ai_client.get_client_model_folder()
            },
            "input_files": {
                "job_description": job_file.name,
                "profile": profile_file.name
            },
            "generated_content": {
                "ai_variables": list(ai_content.keys()),
                "total_ai_variables": len(ai_content)
            },
            "ai_client_stats": ai_client.get_usage_stats()
        }
        
        # Add content lengths for analysis
        for key, content in ai_content.items():
            if isinstance(content, str):
                metadata["generated_content"][f"{key}_length"] = len(content)
                metadata["generated_content"][f"{key}_words"] = len(content.split())
        
        return metadata



[docs]
def main():
    """
    Main orchestration script - executes all 7 steps of the application generation process
    """
    import argparse
    
    parser = argparse.ArgumentParser(description='Generate German job applications with AI support')
    
    args = parser.parse_args()
    
    print("🚀 Starting Bewerbung Generator")
    print("=" * 50)
    
    generator = BewerbungGenerator()
    
    try:
        
        # Step 1: Read newest profile
        profile_file = generator.read_newest_profile()
        if not profile_file:
            print("❌ Error: No profile file found")
            return 1
        
        # Step 2: Read newest job description  
        job_file = generator.read_newest_job_description()
        if not job_file:
            print("❌ Error: No job description file found")
            return 1
            
        # Step 3: Create output directory
        output_dir = generator.create_output_directory(profile_file, job_file)
        
        # Step 4: Generate application documents with AI content
        print(f"=== Step 4: Generating application documents ===")
        markdown_files = generator.generate_application_documents(output_dir, profile_file, job_file)
        
        if not markdown_files:
            print("❌ Error: Failed to generate application documents")
            return 1
        
        # Step 5: Create PDF directory
        pdf_dir = generator.create_pdf_directory(output_dir)
        
        # Step 6: Convert documents to PDF
        pdf_files = generator.convert_documents_to_pdf(markdown_files, pdf_dir)
        
        # Summary
        print("\n" + "=" * 50)
        print("✅ Bewerbung generation completed successfully!")
        print(f"\n📁 Output Directory: {output_dir}")
        print(f"📄 Profile: {profile_file.name}")
        print(f"📄 Job Description: {job_file.name}")
        
        print(f"\n📝 Generated Documents:")
        for filename, path in markdown_files.items():
            print(f"   - {filename}")
        
        if pdf_files:
            print(f"\n📄 Generated PDFs:")
            for filename, pdf_path in pdf_files.items():
                size_kb = round(pdf_path.stat().st_size / 1024, 1)
                print(f"   - {pdf_path.name} ({size_kb} KB)")
        else:
            print(f"\n⚠️  No PDFs generated (WeasyPrint not available)")
        
        print(f"\n🎯 Ready for application submission!")
        return 0
        
    except Exception as e:
        print(f"\n❌ Error during generation: {e}")
        return 1


if __name__ == "__main__":
    exit(main())