1️、boost::math::quantile
详解
boost::math::quantile
是 Boost.Math 中的函数,用于计算分布的分位点(分位数),也就是给定累积分布函数值 ppp,求出对应的 xxx 使得:
P(X≤x)=p P(X \le x) = p P(X≤x)=p
函数签名
template<class Distribution>
typename Distribution::value_type quantile(const Distribution& dist, double p);
Distribution
:Boost.Math 提供的概率分布类型,如normal
,chi_squared
,t
,fisher_f
等p
:累积概率 0<p<10 < p < 10<p<1- 返回值:对应的分位点 xxx
注意:p
必须在 (0,1) 之间,否则会抛出异常。
2️、常见分布使用示例
#include <iostream>
#include <boost/math/distributions/normal.hpp>
#include <boost/math/distributions/chi_squared.hpp>
#include <boost/math/distributions/students_t.hpp>
#include <boost/math/distributions/fisher_f.hpp>
using namespace boost::math;
int main() {
// =========================
// 1. 正态分布 N(0,1)
// =========================
normal dist_norm(0.0, 1.0);
double q_norm = quantile(dist_norm, 0.975); // 95% 双尾临界值
std::cout << "Normal(0,1) 97.5% quantile = " << q_norm << std::endl;
// =========================
// 2. 卡方分布 χ²(df=5)
// =========================
chi_squared dist_chi(5.0);
double q_chi = quantile(dist_chi, 0.95); // 95% 分位点
std::cout << "Chi-squared(df=5) 95% quantile = " << q_chi << std::endl;
// =========================
// 3. Student t 分布 t(df=10)
// =========================
students_t dist_t(10);
double q_t = quantile(dist_t, 0.975); // 95% 双尾临界值
std::cout << "t(df=10) 97.5% quantile = " << q_t << std::endl;
// =========================
// 4. F 分布 F(df1=5, df2=10)
// =========================
fisher_f dist_f(5, 10);
double q_f = quantile(dist_f, 0.95); // 95% 分位点
std::cout << "F(df1=5, df2=10) 95% quantile = " << q_f << std::endl;
return 0;
}
3️、输出示例(参考值)
Normal(0,1) 97.5% quantile = 1.95996
Chi-squared(df=5) 95% quantile = 11.0705
t(df=10) 97.5% quantile = 2.22814
F(df1=5, df2=10) 95% quantile = 3.3258
4️、四个应用场景
-
统计假设检验
- 例如 t 检验、F 检验、卡方检验中,用分位点作为临界值。
- 代码中可直接用
quantile
计算 p 值对应的临界点。
-
置信区间计算
-
对正态分布或 t 分布,求置信区间上限/下限时可用:
CI=xˉ±quantile(tdf,1−α/2)⋅SE CI = \bar{x} \pm quantile(t_{df}, 1-\alpha/2) \cdot SE CI=xˉ±quantile(tdf,1−α/2)⋅SE
-
-
生成分布样本边界(阈值)
- 在 Monte Carlo 或仿真中,需要限制样本落在特定概率范围内,可用
quantile
获取边界值。
- 在 Monte Carlo 或仿真中,需要限制样本落在特定概率范围内,可用
-
异常值检测
- 根据分布分位点判断数据是否属于异常值。
- 例如卡方分布用于多元正态异常值检测:若 Mahalanobis 距离大于
quantile(chi_squared(df), 0.975)
,则判定为异常。
5、四个应用场景代码示例
1️、统计假设检验(t 检验示例)
#include <iostream>
#include <boost/math/distributions/students_t.hpp>
int main() {
double sample_mean = 5.2;
double population_mean = 5.0;
double sample_std = 1.2;
int n = 15;
// 自由度
int df = n - 1;
// t 分布
boost::math::students_t dist_t(df);
// 95% 双尾临界值
double t_crit = boost::math::quantile(dist_t, 0.975);
// t 统计量
double t_stat = (sample_mean - population_mean) / (sample_std / std::sqrt(n));
std::cout << "t_stat = " << t_stat << ", t_crit = " << t_crit << std::endl;
if (std::abs(t_stat) > t_crit) {
std::cout << "拒绝原假设" << std::endl;
} else {
std::cout << "无法拒绝原假设" << std::endl;
}
}
2️、置信区间计算(正态分布示例)
#include <iostream>
#include <boost/math/distributions/normal.hpp>
int main() {
double mean = 10.0;
double stddev = 2.0;
int n = 30;
boost::math::normal dist_norm(0.0, 1.0); // 标准正态
double alpha = 0.05;
double z = boost::math::quantile(dist_norm, 1 - alpha/2); // 95% 双尾
double margin = z * (stddev / std::sqrt(n));
std::cout << "95% 置信区间: [" << mean - margin << ", " << mean + margin << "]" << std::endl;
}
3️、生成分布样本边界(Monte Carlo 示例)
#include <iostream>
#include <boost/math/distributions/chi_squared.hpp>
#include <vector>
#include <random>
int main() {
int df = 4;
boost::math::chi_squared dist_chi(df);
double upper = boost::math::quantile(dist_chi, 0.95); // 95% 上边界
double lower = boost::math::quantile(dist_chi, 0.05); // 5% 下边界
std::cout << "样本边界: [" << lower << ", " << upper << "]" << std::endl;
// 生成随机样本
std::default_random_engine rng;
std::chi_squared_distribution<double> chi_dist(df);
std::vector<double> samples(10);
for (auto &x : samples) {
x = chi_dist(rng);
std::cout << "样本: " << x;
if (x < lower || x > upper)
std::cout << " -> 超出边界!";
std::cout << std::endl;
}
}
4️、异常值检测(Mahalanobis 距离 + 卡方分布)
#include <iostream>
#include <vector>
#include <cmath>
#include <boost/math/distributions/chi_squared.hpp>
int main() {
int dim = 3; // 数据维度
boost::math::chi_squared dist_chi(dim);
double threshold = boost::math::quantile(dist_chi, 0.975); // 95% 上分位点
std::vector<std::vector<double>> data = {{1.0,2.0,3.0}, {10.0, 10.0, 10.0}, {2.0,1.5,2.5}};
std::vector<double> mean = {2.0, 2.0, 2.0};
for (auto &x : data) {
double mahalanobis_sq = 0.0;
for (int i = 0; i < dim; i++)
mahalanobis_sq += (x[i] - mean[i])*(x[i] - mean[i]); // 假设协方差单位矩阵
std::cout << "Mahalanobis^2 = " << mahalanobis_sq;
if (mahalanobis_sq > threshold)
std::cout << " -> 异常!";
std::cout << std::endl;
}
}