*************************************************************** * * * Common Mean Problem * * * * Graybill-Deal estimator * * and approximate confidence intervals * * * * Data example: Meier (1953), * * percentage of albumin in plasma protein in human subjects * * * * n = sample size * * mean = observed mean * * var = observed variance * ***************************************************************; OPTIONS NODATE NONUMBER; DATA albumin; INPUT experiment $ n mean var; weight = n / var; weight_mean = weight * mean; DATALINES; A 12 62.3 12.986 B 15 60.3 7.840 C 7 59.5 33.433 D 16 61.5 18.513 ; /* Calculate: "sum n /s^2" and "sum mean * n / s^2"; */ PROC SUMMARY DATA=albumin SUM; VAR weight weight_mean; OUTPUT OUT=gd SUM=sum_weight sum_weight_mean; RUN; /* _NULL_ data set for creating macro variables; */ DATA _NULL_; SET gd; gd_est = sum_weight_mean / sum_weight; * Graybill-Deal estimator as macro variable; CALL SYMPUT ("gd_estimator", gd_est); * Inverse of variance as macro variable; CALL SYMPUT ("gd_var_inv", sum_weight); RUN; /* Calculate some required terms; */ DATA a; SET albumin; sinha_1 = 4 * (weight / &gd_var_inv - weight**2 / (&gd_var_inv)**2) / (n + 1); meier_1 = 4 * (weight / &gd_var_inv - weight**2 / (&gd_var_inv)**2) / (n - 1); hart_1 = weight * (mean - &gd_estimator)**2; df_1 = (weight / &gd_var_inv)**2 /(n-1); RUN; PROC SUMMARY DATA=a SUM N; VAR sinha_1 meier_1 hart_1 df_1; OUTPUT OUT=b SUM=sinha_sum meier_sum hart_sum df_sum N=k; RUN; /* Large sample 95% - confidence interval; */ DATA ci_1; gd = &gd_estimator; lower_ci = gd - sqrt(1/&gd_var_inv) * QUANTILE('normal',0.975,0,1); upper_ci = gd + sqrt(1/&gd_var_inv) * QUANTILE('normal',0.975,0,1); half_length = sqrt(1/&gd_var_inv) * QUANTILE('normal',0.975,0,1); method = "Large Sample"; RUN; /* 95% - confidence interval using Sinha's variance formula; */ DATA ci_2; SET b (KEEP=sinha_sum df_sum); gd = &gd_estimator; sinha_var = (1 + sinha_sum) / &gd_var_inv; lower_ci = gd - sqrt(sinha_var) * QUANTILE('T',0.975,1/df_sum); upper_ci = gd + sqrt(sinha_var) * QUANTILE('T',0.975,1/df_sum); half_length = sqrt(sinha_var) * QUANTILE('T',0.975,1/df_sum); method = "Sinha's variance"; DROP sinha_var sinha_sum df_sum; RUN; /* 95% - confidence interval using Meier's variance formula; */ DATA ci_3; SET b (KEEP=meier_sum df_sum); gd = &gd_estimator; meier_var = (1 + meier_sum) / &gd_var_inv; lower_ci = gd - sqrt(meier_var) * QUANTILE('T',0.975,1/df_sum); upper_ci = gd + sqrt(meier_var) * QUANTILE('T',0.975,1/df_sum); half_length = sqrt(meier_var) * QUANTILE('T',0.975,1/df_sum); method = "Meier's variance"; DROP meier_var meier_sum df_sum; RUN; /* 95% - confidence interval using Hartung's variance formula; */ DATA ci_4; SET b (KEEP=hart_sum k); gd = &gd_estimator; hart_var = hart_sum /(&gd_var_inv * (k-1)); lower_ci = gd - sqrt(hart_var) * QUANTILE('T',0.975,k-1); upper_ci = gd + sqrt(hart_var) * QUANTILE('T',0.975,k-1); half_length = sqrt(hart_var) * QUANTILE('T',0.975,k-1); method = "Hartung's variance"; DROP hart_var hart_sum k; RUN; /* Collect all the results */ DATA final; LENGTH method $20.; SET ci_1 ci_2 ci_3 ci_4; RUN; PROC REPORT DATA=final HEADLINE HEADSKIP NOWINDOWS SPLIT="*"; COLUMN gd lower_ci upper_ci half_length method; DEFINE gd / ORDER "Graybill-*Deal*estimator" FORMAT = 10.2; DEFINE lower_ci / DISPLAY "95% lower*bound" FORMAT = 10.2; DEFINE upper_ci / DISPLAY "95% upper*bound" FORMAT = 10.2; DEFINE half_length / DISPLAY "Half of * CI length" FORMAT = 10.2; DEFINE method / DISPLAY "Method"; TITLE "Graybill-Deal estimator and four approximate confidence intervals"; TITLE3 "Albumin example"; RUN;