import re
import os



def clean_urls(pattern,txt):
         return re.sub(pattern, lambda x: x.group(0).replace(' ', ''), txt)

def remove_extra_spaces_html_txt(file_path):
    # 创建一个临时文件用于存储清理后的内容
    temp_file_path = file_path + '.tmp'

    # 正则表达式匹配以 http 或 https 开头的链接，允许中间有不规则空格
    pattern1 = r'(http ?s? ?:  ?/  ?/ ?[\w.-]+)'
    pattern2 = r'(https?://[\w. %/:-]+)'

    with open(file_path, 'r', encoding='utf-8') as file, open(temp_file_path, 'w', encoding='utf-8') as temp_file:
        for line in file:
            # 去掉行末的换行符后再处理
            line_content = line.rstrip('\n')
            line_content = clean_urls(pattern1,line_content)
            line_content = clean_urls(pattern2,line_content)

            # 将处理后的内容和原换行符写入临时文件
            temp_file.write(line_content + '\n')

    # 用处理后的内容替换原文件
    os.replace(temp_file_path, file_path)

if __name__ == '__main__':
    file_path = 'DCU.txt'
    remove_extra_spaces_html_txt(file_path)

#
# import re
#
#
# def remove_extra_spaces_html_txt(text):
#
#
#     # Remove extra spaces in URLs
#     cleaned_text = re.sub(r'(http ?s? ?:  ?/  ?/ ?[\w.-]+)', lambda x: x.group(0).replace(' ', ''), text)
#     print(cleaned_text)
#     # Function to remove spaces within URLs
#     def clean_urls(text):
#         return re.sub(r'(https?://[\w. %/:-]+)', lambda x: x.group(0).replace(' ', ''), text)
#
#     cleaned_text = clean_urls(cleaned_text)
#     print(cleaned_text)
# #
# text = """
# 预测：使用海光CPU/DCU进行预测与使用Intel CPU/Nvidia GPU预测相同，支持飞桨原生推理库(PaddleInference)，适用于高性能服务器端、云端推理。当前飞桨框架 ROCm版本完全兼容飞桨框架 CUDA版本的C++/PythonAPI，直接使用原有的GPU预测命令和参数即可。
# 完整训练及预测示例可参考官网海光DCU芯片运行飞桨文档。
# https : /  / www.pa ddlepadd le.org.cn /documenta tion/docs/zh/gu ides/hardware_sup port/rocmdocs/trainexamplecn.html
# http s : / /w w  w.pa   d dlep ddle.o rg.cn/d ocumenta tion/docs zh/guides/h ardware_support/rocmdocs/infer_example_cn.html
# SqueezeNet, EfficientNet
# Detection : FasterRCNN, MTCNN, SSD, YOLO
# 支持深度学习领域基本的K8S调度以及高性能计算场景下常用的SLURM调度。
#
# """
# remove_extra_spaces_html_txt(text)
