Changes in f325df7: cleanup

				24h/cleanup-1.py
			
          @@ -0,0 +1,43 @@

          +import os

          +import re

          +

          +def clean_content(text):

          +    # Split on "Autor:" and take only the content before it

          +    text = text.split("Autor:")[0].strip()

          +    

          +    # Remove all HTML tags and their content

          +    cleaned = re.sub(r'<[^>]+>|\&[^;]+;', '', text)

          +    

          +    # Remove empty lines while preserving important section headers

          +    lines = [line.strip() for line in cleaned.split('\n')]

          +    important_sections = ['Myśl przewodnia AA', 'Pytanie', 'Medytacja', 'Modlitwa']

          +    filtered_lines = []

          +    

          +    for line in lines:

          +        if line and (line in important_sections or filtered_lines):

          +            filtered_lines.append(line)

          +            

          +    return '\n\n'.join(filtered_lines)

          +

          +def process_file(filepath):

          +    try:

          +        with open(filepath, 'r', encoding='utf-8') as file:

          +            content = file.read()

          +        

          +        cleaned_content = clean_content(content)

          +        

          +        with open(filepath, 'w', encoding='utf-8') as file:

          +            file.write(cleaned_content)

          +        print(f"Successfully processed: {filepath}")

          +    except Exception as e:

          +        print(f"Error processing {filepath}: {str(e)}")

          +

          +def main():

          +    folder_path = os.path.dirname(os.path.abspath(__file__))

          +    for filename in os.listdir(folder_path):

          +        if filename.endswith('.txt'):

          +            file_path = os.path.join(folder_path, filename)

          +            process_file(file_path)

          +

          +if __name__ == "__main__":

          +    main()

          \ No newline at end of file

...	...	@@ -0,0 +1,43 @@
	1	+import os
	2	+import re
	3	+
	4	+def clean_content(text):
	5	+ # Split on "Autor:" and take only the content before it
	6	+ text = text.split("Autor:")[0].strip()
	7	+
	8	+ # Remove all HTML tags and their content
	9	+ cleaned = re.sub(r'<[^>]+>\|\&[^;]+;', '', text)
	10	+
	11	+ # Remove empty lines while preserving important section headers
	12	+ lines = [line.strip() for line in cleaned.split('\n')]
	13	+ important_sections = ['Myśl przewodnia AA', 'Pytanie', 'Medytacja', 'Modlitwa']
	14	+ filtered_lines = []
	15	+
	16	+ for line in lines:
	17	+ if line and (line in important_sections or filtered_lines):
	18	+ filtered_lines.append(line)
	19	+
	20	+ return '\n\n'.join(filtered_lines)
	21	+
	22	+def process_file(filepath):
	23	+ try:
	24	+ with open(filepath, 'r', encoding='utf-8') as file:
	25	+ content = file.read()
	26	+
	27	+ cleaned_content = clean_content(content)
	28	+
	29	+ with open(filepath, 'w', encoding='utf-8') as file:
	30	+ file.write(cleaned_content)
	31	+ print(f"Successfully processed: {filepath}")
	32	+ except Exception as e:
	33	+ print(f"Error processing {filepath}: {str(e)}")
	34	+
	35	+def main():
	36	+ folder_path = os.path.dirname(os.path.abspath(__file__))
	37	+ for filename in os.listdir(folder_path):
	38	+ if filename.endswith('.txt'):
	39	+ file_path = os.path.join(folder_path, filename)
	40	+ process_file(file_path)
	41	+
	42	+if __name__ == "__main__":
	43	+ main()
...	...	\ No newline at end of file