a = 2 b = 3 size = 100 list = runif(size,min = 0,max = 6) # Génération d'un ensemble de données hétéroscédastique data = data.frame(X=list, Y=a*list+b+ (1-cos(list))*rnorm(size)) plot(Y~X,data = data) #Nous avons peu de données : nous allons donc tenter le jackknife. #Le jackknife sort un ensemble de prédiction pour un X donné. #Pour cela, on a besoin du score de conformité associé à chaque (X_i,Y_i) score <- function(x,i,data){ # On crée un modèle linéaire à partir du data frame \ i-ème donnée datai = data[-i,] model = lm(Y~X,data = datai) a_hat = as.numeric(model$coefficients["X"]) b_hat = as.numeric(model$coefficients["(Intercept)"]) score_plus = a_hat*x+b_hat+abs(a_hat*data$X[i]+b_hat-data$Y[i]) score_moins = a_hat*x+b_hat-abs(a_hat*data$X[i]+b_hat-data$Y[i]) return(c(score_moins,score_plus)) } pred_ens <- function(x,alpha,data){ S_plus <- rep(NULL, length(data$X)) S_moins <- rep(NULL, length(data$X)) for (i in 1:length(data$X)){ score_pm = score(x,i,data) S_moins[i]= score_pm[1] S_plus[i] = score_pm[2] } quantile_moins = quantile(S_moins,alpha/2) quantile_plus = quantile(S_plus,1-alpha/2) return(c(quantile_moins,quantile_plus)) } temps = seq(from = 0, to = 6, length.out = size ) u_bound <- rep(NULL, length(temps)) l_bound <- rep(NULL, length(temps)) {i=1 for(t in temps){ bounds = pred_ens(t,0.05,data) l_bound[i] = bounds[1] u_bound[i] = bounds[2] i = i+1 }} model_princ = lm(Y~X,data = data) a_hat1 = as.numeric(model_princ$coefficients["X"]) b_hat1 = as.numeric(model_princ$coefficients["(Intercept)"]) library(ggplot2) ggplot(data = data,aes(x= X, y=Y)) + geom_point()+ geom_abline(aes(intercept = b, slope = a))+ geom_abline(aes(intercept = b_hat1, slope = a_hat1),color = 'red')+ geom_ribbon(aes(x = temps, ymin = l_bound, ymax = u_bound),alpha=0.2,fill = "steelblue2")