当我尝试将线性曲线拟合到我的数据点时,曲线与我的数据不匹配。这条线低于我的数据点,斜率也太深了。这可能是由于从x=10开始以及相应的y值,但我不确定。最小二乘法应该是正确的,因为我尝试了多种计算斜率和截距的方法,但所有的方法都产生相同的输出。
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#define PATH_GNUPLOT "your/path/gnuplot"
void ERROR(char* error_string);
int main() {
// first 10 data points are irrelevant
double data[100] = {0,0,0,0,0,0,0,0,0,0, 0.242910, 0.235523, 0.228976, 0.223114, 0.217820, 0.213005, 0.208597, 0.204539, 0.200786, 0.197299, 0.194047, 0.191003, 0.188146, 0.185456, 0.182916, 0.180513, 0.178234, 0.176068, 0.174006, 0.172039, 0.170160, 0.168362, 0.166639, 0.164987, 0.163399, 0.161872, 0.160402, 0.158984, 0.157617, 0.156297, 0.155020, 0.153785, 0.152590, 0.151431, 0.150308, 0.149218, 0.148159, 0.147131, 0.146131, 0.145158, 0.144211, 0.143289, 0.142391, 0.141515, 0.140661, 0.139827, 0.139014, 0.138219, 0.137442, 0.136683, 0.135941, 0.135215, 0.134504, 0.133809, 0.133128, 0.132461, 0.131807, 0.131166, 0.130538, 0.129922, 0.129318, 0.128725, 0.128142, 0.127571, 0.127010, 0.126458, 0.125916, 0.125384, 0.124861, 0.124346, 0.123840, 0.123342, 0.122853, 0.122371, 0.121897, 0.121430, 0.120970, 0.120518, 0.120072, 0.119633, 0.119200, 0.118774, 0.118354, 0.117939, 0.117531, 0.117128, 0.116731, 0.116340, 0.115953, 0.115572};
int n = 100;
/* calc fit */
double sumx, sumy, sumx2, sumxy, a, b = 0.0;
// => log(y) = a * log(x) + b
// data starts at 10 for x value
for (int i = 10; i < n; i++) {
double x = log(i);
double y = log(data[i]);
sumx += x;
sumx2 += x * x;
sumy += y;
sumxy += y * x;
}
a = (n * sumxy - sumx * sumy) / (n * sumx2 - sumx*sumx);
b = (sumy * sumx2 - sumx * sumxy) / (n * sumx2 - sumx*sumx);
/* end calc fit */
/* save data and plot */
FILE *fp = fopen("data.dat", "w");
if (fp == NULL) {
ERROR("failed to open data.dat");
}
for (int i = 10; i < n; i++)
fprintf(fp, "%d %.16lf\n", i, data[i]);
fclose(fp);
FILE *GNUpipe = popen(PATH_GNUPLOT, "w");
if (GNUpipe == NULL) {
ERROR("failed to open gnuplot");
}
fprintf(GNUpipe, "set term png\n");
fprintf(GNUpipe, "set logscale xy\n");
// log(y) = a * log(x) + b
// => y = x**a + 10**b
fprintf(GNUpipe, "Fit(x) = x**%lf * 10**%lf\n", a, b);
fprintf(GNUpipe, "plot [0.1:%d*1.1] 'data.dat' title 'error', ", n);
fprintf(GNUpipe, "Fit(x) title 'fit'\n");
pclose(GNUpipe);
}
void ERROR(char* error_string) {
fprintf(stderr, error_string);
exit(EXIT_FAILURE);
}
编辑:
到目前为止,已实现所有更改的代码:
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#define PATH_GNUPLOT "your/path/gnuplot"
void ERROR(char* error_string);
int main() {
// first 10 data points are irrelevant
double data[100] = {0,0,0,0,0,0,0,0,0,0, 0.242910, 0.235523, 0.228976, 0.223114, 0.217820, 0.213005, 0.208597, 0.204539, 0.200786, 0.197299, 0.194047, 0.191003, 0.188146, 0.185456, 0.182916, 0.180513, 0.178234, 0.176068, 0.174006, 0.172039, 0.170160, 0.168362, 0.166639, 0.164987, 0.163399, 0.161872, 0.160402, 0.158984, 0.157617, 0.156297, 0.155020, 0.153785, 0.152590, 0.151431, 0.150308, 0.149218, 0.148159, 0.147131, 0.146131, 0.145158, 0.144211, 0.143289, 0.142391, 0.141515, 0.140661, 0.139827, 0.139014, 0.138219, 0.137442, 0.136683, 0.135941, 0.135215, 0.134504, 0.133809, 0.133128, 0.132461, 0.131807, 0.131166, 0.130538, 0.129922, 0.129318, 0.128725, 0.128142, 0.127571, 0.127010, 0.126458, 0.125916, 0.125384, 0.124861, 0.124346, 0.123840, 0.123342, 0.122853, 0.122371, 0.121897, 0.121430, 0.120970, 0.120518, 0.120072, 0.119633, 0.119200, 0.118774, 0.118354, 0.117939, 0.117531, 0.117128, 0.116731, 0.116340, 0.115953, 0.115572};
int n = 100;
/* calc fit */
double sumx = 0.0;
double sumy = 0.0;
double sumx2 = 0.0;
double sumxy = 0.0;
double a = 0.0;
double b = 0.0;
// => log(y) = a * log(x) + b
// data starts at 10 for x value
for (int i = 10; i < n; i++) {
double x = log(i);
double y = log(data[i]);
sumx += x;
sumx2 += x * x;
sumy += y;
sumxy += y * x;
}
int N = n - 10;
double denom = N*sumx2 - sumx * sumx;
b = (sumy * sumx2 - sumx * sumxy)/ denom;
a = (N*sumxy - sumx * sumy)/ denom;
printf("%g %g\n",a , b);
/* end calc fit */
/* save data and plot */
FILE *fp = fopen("data.dat", "w");
if (fp == NULL) {
ERROR("failed to open data.dat");
}
for (int i = 10; i < n; i++)
fprintf(fp, "%d %.16le\n", i, data[i]);
fclose(fp);
FILE *GNUpipe = popen(PATH_GNUPLOT, "w");
if (GNUpipe == NULL) {
ERROR("failed to open gnuplot");
}
fprintf(GNUpipe, "set term png\n");
fprintf(GNUpipe, "set logscale xy\n");
// log(y) = a * log(x) + b
// => y = x**a * 10**b
fprintf(GNUpipe, "Fit(x) = x**%.16lf * 10**%.16lf\n", a, b);
fprintf(GNUpipe, "plot [0.1:%d*1.1] 'data.dat' title 'error', ", n);
fprintf(GNUpipe, "Fit(x) title 'fit'\n");
pclose(GNUpipe);
}
void ERROR(char* error_string) {
fprintf(stderr, error_string);
exit(EXIT_FAILURE);
}
“data.dat”:link stdout:-0.323977 -0.66909
用于printf("%g %g\n",a , b);
1条答案
按热度按时间wooyq4lh1#
至少这些问题:
需要初始化为零
保存时间
启用更多警告:“警告:数组初始值设定项中的元素过多”
初始化器太多
避免精度损失
错误
n
代码使用
n
。因为for (int i = 10; i < n; i++)
在a, b
公式中应该是n-10
。也许是配方错误**
看起来不对,需要更深入的审查。
我期望:ref。
啊,我看到我们把
a, b
颠倒了。[追加]
错误基地
代码使用
log()
(log base-e)。如果"Fit(x) = x**%lf * 10**%lf\n"
很重要,可能需要log10()
。