import numpy as np
from numpy.polynomial.polynomial import polyfit
import matplotlib.pyplot as plt
# Sample data
x = np.arange(10)
y = 5 * x + 10
# Fit with polyfit
b, m = polyfit(x, y, 1)
plt.plot(x, y, '.')
plt.plot(x, b + m * x, '-')
plt.show()
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
X = np.random.rand(100)
Y = X + np.random.rand(100)*0.1
results = sm.OLS(Y,sm.add_constant(X)).fit()
print(results.summary())
plt.scatter(X,Y)
X_plot = np.linspace(0,1,100)
plt.plot(X_plot, X_plot * results.params[1] + results.params[0])
plt.show()
import matplotlib.pyplot as plt
import numpy as np
def scatter_plot_with_correlation_line(x, y, graph_filepath):
'''
http://stackoverflow.com/a/34571821/395857
x does not have to be ordered.
'''
# Create scatter plot
plt.scatter(x, y)
# Add correlation line
axes = plt.gca()
m, b = np.polyfit(x, y, 1)
X_plot = np.linspace(axes.get_xlim()[0],axes.get_xlim()[1],100)
plt.plot(X_plot, m*X_plot + b, '-')
# Save figure
plt.savefig(graph_filepath, dpi=300, format='png', bbox_inches='tight')
def main():
# Data
x = np.random.rand(100)
y = x + np.random.rand(100)*0.1
# Plot
scatter_plot_with_correlation_line(x, y, 'scatter_plot.png')
if __name__ == "__main__":
main()
#cProfile.run('main()') # if you want to do some profiling
import numpy as np
import matplotlib.pyplot as plt
# generate random vectors
rng = np.random.default_rng(0)
x = rng.random(100)
y = 5*x + rng.rayleigh(1, x.shape)
plt.scatter(x, y, alpha=0.5)
# compute slope m and intercept b
m, b = np.polyfit(x, y, deg=1)
# plot fitted y = m*x + b
plt.axline(xy1=(0, b), slope=m, color='r', label=f'$y = {m:.2f}x {b:+.2f}$')
plt.legend()
plt.show()
import numpy as np
import matplotlib.pyplot as plt # To visualize
import pandas as pd # To read data
from sklearn.linear_model import LinearRegression
data = pd.read_csv('data.csv') # load data set
X = data.iloc[:, 0].values.reshape(-1, 1) # values converts it into a numpy array
Y = data.iloc[:, 1].values.reshape(-1, 1) # -1 means that calculate the dimension of rows, but have 1 column
linear_regressor = LinearRegression() # create object for the class
linear_regressor.fit(X, Y) # perform linear regression
Y_pred = linear_regressor.predict(X) # make predictions
plt.scatter(X, Y)
plt.plot(X, Y_pred, color='red')
plt.show()
8条答案
按热度按时间hgncfbus1#
qaxu7uf22#
我喜欢Seaborn的regplot或lmplot:
为此,请执行以下操作:
anhgbhbe3#
我偏爱scikits.statsmodels,下面是一个例子:
唯一棘手的部分是
sm.add_constant(X)
,它将一列1添加到X
,以获得截距项。dojqjjoe4#
绘制最佳拟合线的this excellent answer的单行版本为:
使用
np.unique(x)
代替x
可以处理x
未排序或具有重复值的情况。调用
poly1d
是写出m*x + b
(如this other excellent answer)的替代方法。iqih9akk5#
另一种方法是使用
axes.get_xlim()
:ldioqlga6#
matplotlib 3.3中的新增功能
给定斜率
m
和截距b
,使用新的**plt.axline
**绘制y = m*x + b
:plt.axline
与np.polyfit
的示例:这里的方程是一个图例条目,但是如果要沿着直线本身绘制方程,请参见how to rotate annotations to match lines。
ijxebb2r7#
对比
j13ufse28#
您可以使用Adarsh Menon https://towardsdatascience.com/linear-regression-in-6-lines-of-python-5e1d0cd05b8d提供的本教程
这是我发现的最简单的方法,它基本上看起来像: