在Python中将doc / docx文件转换为pdf

pb3s4cty  于 2023-06-07  发布在  Python
关注(0)|答案(3)|浏览(331)

有没有一个好的库可以将doc文件转换为pdf?有一些付费选项,如cloudconvert,convertApi等,但我正在寻找一个免费的选项。我的Python应用程序托管在EC2机器上。我还看了python-docx库,它可以让我读取doc文件的内容,但我想将内容写入pdf文件会破坏样式。

bwleehnv

bwleehnv1#

这里有一个VBA解决方案(我不知道如何使用Python来实现)。
如果您需要将多个Word文件转换为其他格式,如TXT,RTF,HTML或PDF,请运行下面的脚本。

Option Explicit On

Sub ChangeDocsToTxtOrRTFOrHTML()
    'with export to PDF in Word 2007
    Dim fs As Object
    Dim oFolder As Object
    Dim tFolder As Object
    Dim oFile As Object
    Dim strDocName As String
    Dim intPos As Integer
    Dim locFolder As String
    Dim fileType As String
    On Error Resume Next

    locFolder = InputBox("Enter the folder path to DOCs", "File Conversion", "C:\Users\your_path_here\")
    Select Case Application.Version
        Case Is < 12
            Do
                fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML", "File Conversion", "TXT"))
            Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML")
        Case Is >= 12
            Do
                fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML or PDF(2007+ only)", "File Conversion", "TXT"))
            Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF")
    End Select

    Application.ScreenUpdating = False
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set oFolder = fs.GetFolder(locFolder)
    Set tFolder = fs.CreateFolder(locFolder & "Converted")
    Set tFolder = fs.GetFolder(locFolder & "Converted")

    For Each oFile In oFolder.Files
        Dim d As Document
        Set d = Application.Documents.Open(oFile.Path)
        strDocName = ActiveDocument.Name
        intPos = InStrRev(strDocName, ".")
        strDocName = Left(strDocName, intPos - 1)
        ChangeFileOpenDirectory tFolder
        Select Case fileType
            Case Is = "TXT"
                strDocName = strDocName & ".txt"
                ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatText
        Case Is = "RTF"
                strDocName = strDocName & ".rtf"
                ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatRTF
        Case Is = "HTML"
                strDocName = strDocName & ".html"
                ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatFilteredHTML
        Case Is = "PDF"
                strDocName = strDocName & ".pdf"
                ActiveDocument.ExportAsFixedFormat OutputFileName:=strDocName, ExportFormat:=wdExportFormatPDF
        End Select
        d.Close
        ChangeFileOpenDirectory oFolder
    Next oFile
    Application.ScreenUpdating = True

End Sub
5jvtdoz2

5jvtdoz22#

您可以使用Aspose. Words Cloud SDK for Python。它支持DOC/DOCX到PDF的转换,并保持格式/样式不变。它的免费试用计划提供每月150次API调用。
P. S:我是Aspose的开发者布道者。

# For complete examples and data files, please go to https://github.com/aspose-words-cloud/aspose-words-cloud-python
# Import module
import asposewordscloud
import asposewordscloud.models.requests
from shutil import copyfile

# Please get your Client ID and Secret from https://dashboard.aspose.cloud.
client_id='xxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxx'
client_secret='xxxxxxxxxxxxxxxxxxxxxxxxxxxxx'

words_api = asposewordscloud.WordsApi(client_id,client_secret)
words_api.api_client.configuration.host='https://api.aspose.cloud'

filename = 'C:/Temp/02_pages.docx'
dest_name = 'C:/Temp/02_pages.pdf'
#Convert DOCX to PDF
request = asposewordscloud.models.requests.ConvertDocumentRequest(document=open(filename, 'rb'), format='pdf')
result = words_api.convert_document(request)
copyfile(result, dest_name)
print("Result {}".format(result))
yzuktlbb

yzuktlbb3#

import os
from pathlib import Path

from docx2pdf import convert

# The location where the files are located
input_path = r'c:\Folder7\input'
# The location where we will write the PDF files
output_path = r'c:\Folder7\output'

# Create the output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

# Check if the input directory exists
directory_path = Path(input_path)
if not directory_path.exists() or not directory_path.is_dir():
    print(directory_path, "is invalid")
    sys.exit(1)

# Convert each .docx file to .pdf
for file_path in directory_path.glob("*.docx"):
    print("Converting file:", file_path)
    output_file_path = os.path.join(output_path, file_path.stem + ".pdf")
    convert(file_path, output_file_path)
    print("Converted file:", file_path, "to", output_file_path)

相关问题