import os
import win32com.client
from tkinter import filedialog
from tkinter import Tk
def doc_to_docx(doc_path):
try:
word = win32com.client.Dispatch("Word.Application")
word.Visible = 0
doc = word.Documents.Open(doc_path)
# 生成新的文件名,替换原来的 .doc 扩展为 .docx
docx_path = os.path.splitext(doc_path)[0] + '.docx'
doc.SaveAs(docx_path, FileFormat=16)
print(f"Converted: {doc_path} to {docx_path}")
except Exception as e:
print(f"Failed to convert {doc_path} due to {str(e)}")
finally:
doc.Close()
word.Quit()
# 删除原.doc文件
os.remove(doc_path)
return docx_path
def convert_all_docs_in_folder(folder_path):
# 遍历文件夹中的所有文件
for file_name in os.listdir(folder_path):
# 获取文件的完整路径
full_file_name = os.path.join(folder_path, file_name)
# 检查文件是否是.doc文件
if os.path.splitext(file_name)[-1].lower() == '.doc':
# 转换文件
doc_to_docx(full_file_name)
if __name__ == '__main__':
# 创建一个Tk root窗口,但不显示
root = Tk()
root.withdraw()
# 打开文件夹选择对话框,让用户选择需要转换的文件夹
folder_path = filedialog.askdirectory()
if folder_path:
convert_all_docs_in_folder(folder_path)
优化了一下,转化速度大大提升了:使用glob模块来直接获取所有的.doc文件,而不是遍历所有文件,减少不必要的文件系统交互。
将Word.Application对象的创建和退出移到循环外部,只需要启动和退出一次Word应用程序,而不是为每个文件都启动和退出一次。
下面是修改后的代码:
import os
import win32com.client
from tkinter import filedialog
from tkinter import Tk
import glob
from datetime import datetime
def doc_to_docx(word, doc_path):
try:
doc = word.Documents.Open(doc_path)
docx_path = os.path.splitext(doc_path)[0] + '.docx'
doc.SaveAs(docx_path, FileFormat=16)
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f"{current_time} Converted: {doc_path} to {docx_path}")
doc.Close()
os.remove(doc_path)
return docx_path
except Exception as e:
print(f"Failed to convert {doc_path} due to {str(e)}")
def convert_all_docs_in_folder(folder_path):
word = win32com.client.Dispatch("Word.Application")
word.Visible = 0
for doc_path in glob.glob(os.path.join(folder_path, '*.doc')):
doc_to_docx(word, doc_path)
word.Quit()
if __name__ == '__main__':
root = Tk()
root.withdraw()
folder_path = filedialog.askdirectory()
if folder_path:
convert_all_docs_in_folder(folder_path)