import pandas
from pandas.io.excel._openpyxl import OpenpyxlReader
import numpy as np
from pandas._typing import FilePathOrBuffer, Scalar
def _convert_cell(self, cell, convert_float: bool) -> Scalar:
from openpyxl.cell.cell import TYPE_BOOL, TYPE_ERROR, TYPE_NUMERIC
# here we adding this hyperlink support:
if cell.hyperlink and cell.hyperlink.target:
return cell.hyperlink.target
# just for example, you able to return both value and hyperlink,
# comment return above and uncomment return below
# btw this may hurt you on parsing values, if symbols "|||" in value or hyperlink.
# return f'{cell.value}|||{cell.hyperlink.target}'
# here starts original code, except for "if" became "elif"
elif cell.is_date:
return cell.value
elif cell.data_type == TYPE_ERROR:
return np.nan
elif cell.data_type == TYPE_BOOL:
return bool(cell.value)
elif cell.value is None:
return "" # compat with xlrd
elif cell.data_type == TYPE_NUMERIC:
# GH5394
if convert_float:
val = int(cell.value)
if val == cell.value:
return val
else:
return float(cell.value)
return cell.value
def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
from openpyxl import load_workbook
# had to change read_only to False:
return load_workbook(
filepath_or_buffer, read_only=False, data_only=True, keep_links=False
)
OpenpyxlReader._convert_cell = _convert_cell
OpenpyxlReader.load_workbook = load_workbook
from openpyxl import load_workbook
workbook = load_workbook('test.xlsx')
worksheet = workbook.active
column_indices = [9]
for row in range(2, worksheet.max_row + 1):
for col in column_indices:
filelocation = worksheet.cell(column=col, row=row) # this is hyperlink
text = worksheet.cell(column=col + 1, row=row) # thi is your text
worksheet.cell(column=col + 1, row=row).value = '=HYPERLINK("' + filelocation.value + '","' + text.value + '")'
workbook.save('test.xlsx')
4条答案
按热度按时间a7qyws3x1#
这可以用openpyxl完成,我不确定它是否可以用Pandas。我是这么做的:
你也可以使用iPython,并设置一个变量等于超链接对象:
t = ws.cell(row=2, column=1).hyperlink
然后做
t.
和按tab键,以查看所有的选项,你可以做什么或访问从该对象.ufj5ltwl2#
快速猴子修补,没有转换器或类似的东西,如果你想把所有的单元格与超链接作为超链接,更复杂的方式,我想,至少能够选择,哪些列视为超链接或收集数据,或以某种方式保存数据和超链接在同一个单元格在dataframe。使用转换器,不知道。(顺便说一句,我也玩过
data_only
,keep_links
,没有帮助,只有改变read_only
结果确定,我想它可以减慢你的代码速度)。P.S.:仅适用于xlsx,即引擎是openpyxl
P.P.S.:如果你在未来阅读这篇评论并发布https://github.com/pandas-dev/pandas/issues/13439仍然打开,不要忘记在
pandas.io.excel._openpyxl
上查看_convert_cell
和load_workbook
的更改并相应地更新它们。在python文件中添加了上面的代码后,您将能够调用
df = pandas.read_excel(input_file)
在写了这些东西之后,我突然想到,也许使用openpyxl本身会更简单,更干净^_^
czq61nw13#
正如slaw评论的那样,它不抓取超链接,而只抓取文本
这里text.xlsx在第9列中包含链接
8qgya5xd4#
你不能在Pandas身上这样做。你可以试试other libraries designed to deal with excel files。