using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
namespace pdfreadertest
{
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
GetTextFromPDFFile(@"c:\example.pdf", 1);
}
public void GetTextFromPDFFile(string pdfFile, int pageNumber)
{
// Call the reader to read the pdf file
PdfReader pdfReader = new PdfReader(pdfFile);
// Extract the text from the pdf reader and put into a string
string pdfText = PdfTextExtractor.GetTextFromPage(pdfReader, pageNumber);
// Try and close the reader
try
{
pdfReader.Close();
}
catch{ }
// Put the string (pdf text) into a label to display on page
this.lblPdfText.Text = pdfText;
}
}
}
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Canvas.Parser;
using iText.Kernel.Pdf.Canvas.Parser.Listener;
namespace ScanTextInPDFs
{
internal class Program
{
public static async Task Main(string[] args)
{
string executingDirectory = AppContext.BaseDirectory;
byte[] bytes = await File.ReadAllBytesAsync($"{executingDirectory}PDFs\\Brochure.pdf");
string textToFind = "Lorem ipsum";
bool foundText = false;
using (MemoryStream memoryStream = new MemoryStream(bytes))
{
using PdfReader pdfReader = new PdfReader(memoryStream);
using PdfDocument pdfDocument = new PdfDocument(pdfReader);
for (int page = 1; page <= pdfDocument.GetNumberOfPages(); page++)
{
PdfPage pdfPage = pdfDocument.GetPage(page);
string pageText = PdfTextExtractor.GetTextFromPage(pdfPage, new SimpleTextExtractionStrategy());
if (pageText.Contains(textToFind, StringComparison.Ordinal))
foundText = true;
}
}
if (foundText)
Console.WriteLine($"Found '{textToFind}' in the pdf.");
else
Console.WriteLine($"Did not find '{textToFind}' in the pdf.");
}
}
}
3条答案
按热度按时间lymnna711#
ITextSharp是相当体面和相当容易实现..这里是一个小的例子,阅读的pdf和把文本到一个字符串,然后打印出来的标签上的webforms页面:
字符串
希望能帮上忙。
2ul0zpep2#
如果你在2023年遇到这个问题,你可以用C#(NET Core)阅读PDF中的文本。这可以通过
itext7
nuget包实现。(包含此代码的工作解决方案的链接可以在here中找到)。字符串
k4emjkb13#
我认为iTextSharp是最受欢迎的一个,即使有几个其他的库,如iText.Net,PDF夏普,夏普PDF等谷歌它,你会发现他们很多。我用过iTextSharp,我喜欢它。