pandas 从雅虎检索高频交易数据

eqoofvh9  于 2023-03-16  发布在  其他
关注(0)|答案(1)|浏览(112)

我正在尝试检索数据到csv。
而且我正在得到很多错误,我已经安装了Pandas和Python,有人能帮忙吗?

import requests
import pandas as pd
import arrow
import datetime

def get_quote_data(symbol='SBIN.NS', data_range='1d', data_interval='1m'):
res = requests.get('https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?range={data_range}&interval={data_interval}'.format(**locals()))
data = res.json()
body = data['chart']['result'][0]
dt = datetime.datetime
dt = pd.Series(map(lambda x: arrow.get(x).to('Asia/Calcutta').datetime.replace(tzinfo=None), body['timestamp']), name='Datetime')
df = pd.DataFrame(body['indicators']['quote'][0], index=dt)
dg = pd.DataFrame(body['timestamp'])

return df.loc[:, ('open', 'high', 'low', 'close', 'volume')]
data = get_quote_data('SBIN.NS', '5d', '1m')
data.dropna(inplace=True) #removing NaN rows
print(data)
data.to_csv('output.csv')

这些是我得到的错误:

>>> import requests
>>> import pandas as pd
>>> import arrow
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ModuleNotFoundError: No module named 'arrow'
>>> import datetime
>>> 
>>> def get_quote_data(symbol='SBIN.NS', data_range='1d', data_interval='1m'):
... res = requests.get('https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?range={data_range}&interval={data_interval}'.format(**locals()))
  File "<stdin>", line 2
    res = requests.get('https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?range={data_range}&interval={data_interval}'.format(**locals()))
    ^
IndentationError: expected an indented block after function definition on line 1
>>> data = res.json()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'res' is not defined
>>> body = data['chart']['result'][0]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'data' is not defined
>>> dt = datetime.datetime
>>> dt = pd.Series(map(lambda x: arrow.get(x).to('Asia/Calcutta').datetime.replace(tzinfo=None), body['timestamp']), name='Datetime')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'body' is not defined
>>> df = pd.DataFrame(body['indicators']['quote'][0], index=dt)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'body' is not defined
>>> dg = pd.DataFrame(body['timestamp'])
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'body' is not defined
>>> 
>>> return df.loc[:, ('open', 'high', 'low', 'close', 'volume')]
  File "<stdin>", line 1
SyntaxError: 'return' outside function
>>> data = get_quote_data('SBIN.NS', '5d', '1m')
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'get_quote_data' is not defined
>>> data.dropna(inplace=True) #removing NaN rows
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'data' is not defined
>>> print(data)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'data' is not defined
>>> data.to_csv('output.csv')

我不是很熟悉编码,但我需要检索这些数据来运行回归。有什么变通办法可以让它工作吗?谢谢

m1m5dgzv

m1m5dgzv1#

您必须缩进函数下面的代码,并将headers设置为requests的参数,以避免403错误。

import requests
import pandas as pd
import arrow
import datetime

def get_quote_data(symbol='SBIN.NS', data_range='1d', data_interval='1m'):
    headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0'}
    res = requests.get('https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?range={data_range}&interval={data_interval}'.format(**locals()), headers=headers)
    data = res.json()
    body = data['chart']['result'][0]
    dt = datetime.datetime
    dt = pd.Series(map(lambda x: arrow.get(x).to('Asia/Calcutta').datetime.replace(tzinfo=None), body['timestamp']), name='Datetime')
    df = pd.DataFrame(body['indicators']['quote'][0], index=dt)
    dg = pd.DataFrame(body['timestamp'])

    return df.loc[:, ('open', 'high', 'low', 'close', 'volume')]
    
data = get_quote_data('SBIN.NS', '5d', '1m')
data.dropna(inplace=True) #removing NaN rows
print(data)
data.to_csv('output.csv')

更新

您可以使用yfinance包从Yahoo下载数据:

# pip install yfinance
import yfinance as yf

data = yf.download('SBIN.NS', period='5d', interval='1m')
print(data)

# Output
                               Open      High       Low     Close  Adj Close  Volume
Datetime                                                                            
2023-03-03 09:15:00+05:30 541.75000 563.54999 541.75000 554.59998  554.59998       0
2023-03-03 09:16:00+05:30 555.00000 555.00000 549.79999 551.45001  551.45001  584386
2023-03-03 09:17:00+05:30 551.29999 551.29999 549.25000 549.75000  549.75000  367080
2023-03-03 09:18:00+05:30 549.54999 550.15002 547.59998 549.40002  549.40002  260746
2023-03-03 09:19:00+05:30 549.50000 551.34998 549.29999 551.04999  551.04999  278368
...                             ...       ...       ...       ...        ...     ...
2023-03-10 15:26:00+05:30 548.00000 548.04999 547.45001 547.59998  547.59998   54936
2023-03-10 15:27:00+05:30 547.54999 547.84998 547.54999 547.75000  547.75000   39313
2023-03-10 15:28:00+05:30 547.79999 547.79999 547.59998 547.75000  547.75000   35021
2023-03-10 15:29:00+05:30 547.75000 547.84998 547.59998 547.84998  547.84998   38026
2023-03-10 15:30:00+05:30 547.65002 547.65002 547.65002 547.65002  547.65002       0

[1871 rows x 6 columns]

相关问题