Вы находитесь на странице: 1из 7

/*EDD MACRO*/

%macro EDD(inp_ds
=,
forced_drop_vars
=,
xls_file_location =
);
********************************************************************************
*****************************;
***** 1. Drop Forced variables
*****;
********************************************************************************
*****************************;
data work.dummy;
set &inp_ds (drop = &forced_drop_vars);
run;
********************************************************************************
*****************************;
***** 2a. Take proc contents to get a list of all the variables and their types
*****;
***** 2b. Create datasets which contains variable names of numeric and character
variables
*****;
********************************************************************************
*****************************;
proc contents data=&inp_ds out=work.varlist noprint; run;
data work.num_var work.char_var
set work.varlist(keep =
if type = 1
then
else
run;

;
name type) ;
output work.num_var
output work.char_var

;
;

********************************************************************************
*****************************;
***** 3. Take EDD of numeric variables
*****;
********************************************************************************
*****************************;
proc sql noprint;
SELECT NAME
INTO :char_variable separated by ' '
FROM work.char_var;
quit;
data _NULL_;
set work.num_var end=final;
call symput(compress("num_varname"!!_N_),name);
if final then call symput ("tot_num_vars",compress(_N_));
run;
data work.num_var_ds_dummy;
set work.dummy (drop=&char_variable.);

run;
%do i = 1 %to &tot_num_vars;
proc means data = work.num_var_ds_dummy(keep = &&&num_varname&i)
NOPRINT;
var &&&num_varname&i;
output out = means_dataset
N(&&&num_varname&i)
= numobs
Nmiss(&&
&num_varname&i) = missing_obs
mean(&&&
num_varname&i) = mean_or_top1
std(&&&n
um_varname&i) = stddev_or_top2
min(&&&n
um_varname&i) = min_or_top3
p1(&&&nu
m_varname&i)
= p1_or_top4
p5(&&&nu
m_varname&i)
= p5_or_top5
p25(&&&n
um_varname&i) = p25_or_top6
p50(&&&n
um_varname&i) = median_or_bot5
p75(&&&n
um_varname&i) = p75_or_bot4
p95(&&&n
um_varname&i) = p95_or_bot3
p99(&&&n
um_varname&i) = p99_or_bot2
max(&&&n
um_varname&i) = max_or_bot1;
run;
proc freq data=work.num_var_ds_dummy (keep = &&&num_varname&i) N
OPRINT;
tables &&&num_varname&i
/ list out= tmp_num_freq;
run;
data _null_;
set tmp_num_freq nobs = tot_obs;
if (_N_=1) then call symput(compress("nunique_num_"!!&i)
,tot_obs);
run;
proc contents data = work.num_var_ds_dummy (keep = &&&num_varnam
e&i) out = contents1 NOPRINT; run;
data contents1;
retain merge_var NAME Label Type LENGTH nunique;
set contents1 (keep = NAME Label Type LENGTH);
merge_var = 1;
nunique = &&&nunique_num_&i;
run;
data means_dataset;
set means_dataset;
merge_var =1;
run;

data one_var_EDD (drop = _type_ _freq_ merge_var);


merge contents1
means_dataset ;
by merge_var;
run;
%if &i eq 1 %then
%do;
Data EDD;
set one_var_EDD;
run;
%end;
%else %do;
Data EDD;
set EDD one_var_EDD;
run;
%end;
%end;
data EDD;
set EDD;
format new_mean_or_top1
format new_stddev_or_top2
format new_min_or_top3
format new_p1_or_top4
format new_p5_or_top5
format new_p25_or_top6
format new_median_or_bot5
format new_p75_or_bot4
format new_p95_or_bot3
format new_p99_or_bot2
format new_max_or_bot1
new_mean_or_top1
new_stddev_or_top2
new_min_or_top3
new_p1_or_top4
new_p5_or_top5
new_p25_or_top6
new_median_or_bot5
new_p75_or_bot4
new_p95_or_bot3
new_p99_or_bot2
new_max_or_bot1

=
=
=
=
=
=
=
=
=
=

$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;

= mean_or_top1 ;
stddev_or_top2;
min_or_top3 ;
p1_or_top4
;
p5_or_top5
;
p25_or_top6 ;
median_or_bot5;
p75_or_bot4 ;
p95_or_bot3 ;
p99_or_bot2 ;
max_or_bot1 ;

drop mean_or_top1
stddev_or_top2
min_or_top3
p1_or_top4
p5_or_top5
p25_or_top6
median_or_bot5
p75_or_bot4
p95_or_bot3
p99_or_bot2
max_or_bot1 ;
rename new_mean_or_top1 = mean_or_top1 ;
rename new_stddev_or_top2 = stddev_or_top2;
rename new_min_or_top3
= min_or_top3 ;

rename
rename
rename
rename
rename
rename
rename
rename
run;

new_p1_or_top4
new_p5_or_top5
new_p25_or_top6
new_median_or_bot5
new_p75_or_bot4
new_p95_or_bot3
new_p99_or_bot2
new_max_or_bot1

=
=
=
=
=
=
=
=

p1_or_top4
;
p5_or_top5
;
p25_or_top6 ;
median_or_bot5;
p75_or_bot4 ;
p95_or_bot3 ;
p99_or_bot2 ;
max_or_bot1 ;

********************************************************************************
*****************************;
***** 3a. Create EDD of character variables
*****;
***** 3b. Take the freq of chacter variables which satify the cutoff condition
*****;
********************************************************************************
*****************************;
data work.char_var_ds_dummy;
set work.dummy (keep=&char_variable.);
run;
data _NULL_;
set work.char_var end=final;
call symput(compress("char_varname"!!_N_),name);
if final then call symput ("tot_char_vars",compress(_N_));
run;
%do i = 1 %to &tot_char_vars;
proc freq data = work.char_var_ds_dummy(keep = &&&char_varname&i
) noprint;
tables &&&char_varname&i/list missing out=work.tmp_char_
freq;
run;
proc sort data = tmp_char_freq;;
by descending COUNT;
run;
data freq_data (Keep = merge_var
nunique
numobs
missing_obs
mean_or_top1
stddev_or_top2
min_or_top3
p1_or_top4
p5_or_top5
p25_or_top6
median_or_bot5
p75_or_bot4
p95_or_bot3
p99_or_bot2
max_or_bot1

);

set tmp_char_freq
format mean_or_top1
format stddev_or_top2
format min_or_top3
format p1_or_top4
format p5_or_top5
format p25_or_top6
format median_or_bot5
format p75_or_bot4
format p95_or_bot3
format p99_or_bot2
format max_or_bot1
format numobs

nobs=tot_obs;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
$32.;
32.;

retain merge_var
0
retain nunique
retain numobs
retain missing_obs
retain mean_or_top1
retain stddev_or_top2
retain min_or_top3
retain p1_or_top4
retain p5_or_top5
retain p25_or_top6
retain median_or_bot5
retain p75_or_bot4
"";
retain p95_or_bot3
"";
retain p99_or_bot2
"";
retain max_or_bot1
"";

;
0 ;
0 ;
0 ;
"";
"";
"";
"";
"";
"";
"";

if(_N_ = 1) then do;


merge_var = 1;
nunique = tot_obs;
mean_or_top1 = compress(&&&char_varname&i !! ":"
!! COUNT);
end;
if(_N_ = 2) then stddev_or_top2 = compress(&&&char_varna
me&i !! ":" !! COUNT);
if(_N_ = 3) then min_or_top3 = compress(&&&char_varname&
i !! ":" !! COUNT);
if(_N_ = 4) then p1_or_top4 = compress(&&&char_varname&i
!! ":" !! COUNT);
if(_N_ = 5) then p5_or_top5 = compress(&&&char_varname&i
!! ":" !! COUNT);
if(_N_ = 6) then p25_or_top6 = compress(&&&char_varname&
i !! ":" !! COUNT);
if((tot_obs - _N_) eq 4 and _N_ gt 6) then median_or_bot
5 = compress(&&&char_varname&i !! ":" !! COUNT);
if((tot_obs - _N_) eq 3 and _N_ gt 6) then p75_or_bot4 =
compress(&&&char_varname&i !! ":" !! COUNT);
if((tot_obs - _N_) eq 2 and _N_ gt 6) then p95_or_bot3 =
compress(&&&char_varname&i !! ":" !! COUNT);
if((tot_obs - _N_) eq 1 and _N_ gt 6) then p99_or_bot2 =
compress(&&&char_varname&i !! ":" !! COUNT);
if((tot_obs - _N_) eq 0 and _N_ gt 6) then max_or_bot1 =
compress(&&&char_varname&i !! ":" !! COUNT);
if(&&&char_varname&i = "") then missing_obs = COUNT;
numobs = numobs + COUNT;
if(_N_ = tot_obs) then do;
numobs = numobs - missing_obs;

output;
end;
run;
proc contents data = work.char_var_ds_dummy (keep = &&&char_varn
ame&i) out = contents_char NOPRINT; run;
data contents_char;
retain merge_var NAME Label Type LENGTH ;
set contents_char (keep = NAME Label Type LENGTH);
merge_var = 1;
run;
data one_var_EDD (drop = merge_var);
merge contents_char
freq_data ;
by merge_var;
run;
%if &tot_num_vars eq 0 %then
%do;
Data EDD;
set one_var_EDD;
run;
%end;
%else %do;
Data EDD;
set EDD one_var_EDD;
run;
%end;
%end;
data EDD(rename = (numobs = non_miss_obs));
set EDD;
fill_rate = numobs/(numobs + missing_obs);
run;
ods html file = &xls_file_location;
proc print data = EDD;
run;
ods html close;
********************************************************************************
*****************************;
%mend EDD;
/*libname catalog "Z:\MacroToolkit";*/
/*option mstored sasmstore = catalog;*/
/*%edd(*/
/*inlib = lib8,*/
/*inputdata = MA_Errors,*/
/*outlib = lib8,*/
/*outdata = output_edd*/
/*);*/

/*
options obs = max compress = yes ls = max ps = max mergenoby = error nocenter no

xwait noxsync macrogen mprint;


%EDD(inp_ds
= outlib.test_data,
forced_drop_vars = ,
xls_file_location = "EDD_MACRO.xls"
);
*/
%EDD(inp_ds

=sashelp.prdsal3,
forced_drop_vars
=,
xls_file_location ="D:\Prep\n1.xls"
);

%EDD(inp_ds

=mody3,
forced_drop_vars
=,
xls_file_location ="n2.xls"
);

proc summary data=mody3 print n mean std min max nmiss sum nway;
var price1-price3;
output out=mody3_o1 n= pt1 pt2 pt3
sum= t1 t2 t3
nmiss= pnm1 pnm2 pnm3;
run;
proc summary data=mody3 print n mean std min max nmiss sum nway;
var price1-price3;
output out=mody3_o1 n=
sum=
nmiss= /autoname;
run;
proc contents data=sashelp.prdsal3 varnum out=contents_prdsal3 varnum;
run;
proc univariate data=sashelp.prdsal3 plot ;
var _numeric_;
output n= total nmiss=total_missing mean=mean2 median=median_value std=std_devia
tion;
run;
proc means data=class n nmiss mean median;/*these options tell proc means what t
hings to be calculated*/
var age height weight;
output n=nmv1_age nmv2_height nmv3_weight
nmiss=mv1_age mv2_height mv3_weight
mean=mean1_age mean2_height mean3_weight
median=median1_age median2_height median3_weight out=zomby;/* these options tell
proc means what things to be outputed*/
run;
proc means data=sashelp.class noprint ;
class sex;
output;
run;

Вам также может понравиться