import os
import re

def clean_content(text):
    # Split on "Autor:" and take only the content before it
    text = text.split("Autor:")[0].strip()
    
    # Remove all HTML tags and their content
    cleaned = re.sub(r'<[^>]+>|\&[^;]+;', '', text)
    
    # Remove empty lines while preserving important section headers
    lines = [line.strip() for line in cleaned.split('\n')]
    important_sections = ['Myśl przewodnia AA', 'Pytanie', 'Medytacja', 'Modlitwa']
    filtered_lines = []
    
    for line in lines:
        if line and (line in important_sections or filtered_lines):
            filtered_lines.append(line)
            
    return '\n\n'.join(filtered_lines)

def process_file(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            content = file.read()
        
        cleaned_content = clean_content(content)
        
        with open(filepath, 'w', encoding='utf-8') as file:
            file.write(cleaned_content)
        print(f"Successfully processed: {filepath}")
    except Exception as e:
        print(f"Error processing {filepath}: {str(e)}")

def main():
    folder_path = os.path.dirname(os.path.abspath(__file__))
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            process_file(file_path)

if __name__ == "__main__":
    main()