Visual Studio 用于处理PDF数据提取的API

aij0ehis  于 2023-08-07  发布在  其他
关注(0)|答案(2)|浏览(106)

所以我在.NET Core Web API中创建了一个Web API。我添加了逻辑到API创建的所有代码,在swaggerUI上我测试了API,它显示错误:无法访问已释放的对象。对象名称:'ReferenceReadStream'。这是代码(C#)

using Aspose.Pdf;
using Aspose.Pdf.Text;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using System;
using System.Data;
using System.IO;
using System.Linq;

namespace TryingOutAPI.Controllers
{
    [Route("api/[controller]")]
    [ApiController]
    public class ValuesController : ControllerBase
    {
        [HttpPost]
        public IActionResult ProcessPdfTables(IFormFile pdfFile)
        {
            try
            {
                if (pdfFile == null || pdfFile.Length == 0)
                {
                    return BadRequest("No PDF file uploaded.");
                }

                // Load the PDF document from the uploaded file
                Aspose.Pdf.Document pdfDocument;
                using (var stream = pdfFile.OpenReadStream())
                {
                    pdfDocument = new Aspose.Pdf.Document(stream);
                }

                // Extract the pages with the tables
                DataTable[] tables = ExtractTablesFromPdf(pdfDocument, new int[] { 2, 3 });

                // Access the first table from the list of extracted tables
                DataTable table1 = tables[0];
                // Access the second table from the list of extracted tables
                DataTable table2 = tables[1];

                // Specify the correct column names
                string[] columnsToExtract = { "Peak Name", "RT", "Area", "% Area", "RT Ratio", "Height" };

                // Select the desired columns from table 1
                DataTable table1Subset = SelectColumnsFromTable(table1, columnsToExtract);
                table1Subset = RemoveRowsWithNullValues(table1Subset);

                // Select the desired columns from table 2
                DataTable table2Subset = SelectColumnsFromTable(table2, columnsToExtract);
                table2Subset = RemoveRowsWithNullValues(table2Subset);

                // Return the subsets of tables without null rows as JSON
                return Ok(new { Table1 = table1Subset, Table2 = table2Subset });
            }
            catch (Exception ex)
            {
                // Handle any exceptions and return an error response
                return StatusCode(StatusCodes.Status500InternalServerError, ex.Message);
            }
        }

        private DataTable[] ExtractTablesFromPdf(Aspose.Pdf.Document pdfDocument, int[] pages)
        {
            DataTable[] tables = new DataTable[pages.Length];

            for (int i = 0; i < pages.Length; i++)
            {
                int pageNumber = pages[i];
                Page pdfPage = pdfDocument.Pages[pageNumber];

                // Extract text from the page
                TextAbsorber textAbsorber = new TextAbsorber();
                pdfPage.Accept(textAbsorber);
                string pageContent = textAbsorber.Text;

                tables[i] = ConvertTextToDataTable(pageContent);
            }

            return tables;
        }
        private DataTable SelectColumnsFromTable(DataTable table, string[] columnsToExtract)
        {
            DataTable subset = new DataTable();

            foreach (string column in columnsToExtract)
            {
                DataColumn existingColumn = table.Columns.Cast<DataColumn>()
                    .FirstOrDefault(c => c.ColumnName == column);
                if (existingColumn != null)
                {
                    subset.Columns.Add(existingColumn.ColumnName);
                }
            }

            foreach (DataRow row in table.Rows)
            {
                DataRow newRow = subset.NewRow();
                foreach (DataColumn column in subset.Columns)
                {
                    newRow[column.ColumnName] = row[column.ColumnName];
                }
                subset.Rows.Add(newRow);
            }

            return subset;
        }
        private DataTable RemoveRowsWithNullValues(DataTable table)
        {
            DataTable filteredTable = table.Clone();

            foreach (DataRow row in table.Rows)
            {
                bool hasNullValues = row.ItemArray.Any(x => x is DBNull || string.IsNullOrWhiteSpace(x.ToString()));
                if (!hasNullValues)
                {
                    filteredTable.ImportRow(row);
                }
            }

            return filteredTable;
        }
        private DataTable ConvertTextToDataTable(string text)
        {
            DataTable dataTable = new DataTable();

            // Split the text into lines
            string[] lines = text.Split('\n');

            // Extract column names from the first line
            string[] columnNames = lines[0].Split('\t');

            // Add columns to the DataTable
            foreach (string columnName in columnNames)
            {
                dataTable.Columns.Add(columnName.Trim());
            }

            // Extract data rows from subsequent lines
            for (int i = 1; i < lines.Length; i++)
            {
                string[] rowValues = lines[i].Split('\t');

                // Create a new DataRow
                DataRow dataRow = dataTable.NewRow();

                // Set values for each column in the row
                for (int j = 0; j < columnNames.Length; j++)
                {
                    dataRow[j] = rowValues[j].Trim();
                }

                // Add the row to the DataTable
                dataTable.Rows.Add(dataRow);
            }

            return dataTable;
        }

    }
}

字符串
我尝试上传工作的文件,但它没有显示提取的数据

2wnc66cl

2wnc66cl1#

ProcessPdfTables中的using语句将在使用stream之前处理它。如果你使用的是C#版本8或更高版本,你可以像这样声明stream

using var stream = pdfFile.OpenReadStream();

字符串
使用此声明,stream将不会被释放,直到它超出范围。

jk9hmnmh

jk9hmnmh2#

它可能是你的using语句,你有一个对象引用到流,你是在你的using语句和结束处置。
尝试删除using语句,并在处理完pdf文档后释放流。

相关问题