|
Hi. The below macro actually works, but is much slower than I'd hoped.
What it does is compare every variable in the first set to the same
variable in the second set, if that same variable exists in the second
set.
It compares cases by performing a merge, done on the MERGEBY macro
argument, between slices of the two sets, and then subtracting one from
the other. Typically, the mergeby variable is the case_id, so that it
effectively compares every cell in the first set to that in the second.
Any advice for making it faster would be appreciated. Maybe use prod
sql instead?
.......................................................................................................
EXAMPLE MACRO CALL:
%COMPAREBYSET(SET1=&Q106JF,IF1=,SET2=&Q106FULL,IF2=IF
(q1860_1=1|q1860_2=1),MERGEBY=CASE_ID); RUN;
.......................................................................................................
MACRO CODE:
%MACRO COMPAREBYSET(SET1=,IF1=,SET2=,IF2=,MERGEBY=);
/*SUPPRESS WINDOW OUTPUT*/
ods NOresults;
/*SUPPRESS TABLES*/
ods listinG CLOSE;
PROC SORT DATA=&SET1;
BY &MERGEBY;
RUN;
PROC SORT DATA=&SET2;
BY &MERGEBY;
RUN;
%DO SETCOUNT=1 %TO 2;
PROC CONTENTS NOPRINT DATA=&&SET&SETCOUNT OUT=PCOUT&SETCOUNT; RUN;
DATA PCOUT&SETCOUNT ;
SET PCOUT&SETCOUNT (KEEP = NAME);
ROWNUM=_N_;
RUN;
DATA PCOUT&SETCOUNT; SET PCOUT&SETCOUNT;
IF NAME="&MERGEBY." THEN DELETE;
RUN;
%LET TEMPMAX=MAX&SETCOUNT; RUN;
PROC MEANS NOPRINT DATA=PCOUT&SETCOUNT;
VAR ROWNUM;
OUTPUT OUT=MAXOUT MAX(ROWNUM)=&TEMPMAX;
RUN;
DATA _NULL_; SET MAXOUT;
CALL SYMPUT("MAX&SETCOUNT",&TEMPMAX);
RUN;
%PUT &&MAX&SETCOUNT;
%END; /*%DO SETCOUNT=1 %TO 2;*/
/*GO THROUGH EACH VAR IN FIRST SET*/
/*IF VAR IN FIRST SET IS FOUND IN SECOND SET,
THEN RUN A COMPARISON PER MERGE VARIABLE*/
%DO VARCOUNT=1 %TO &MAX1;
DATA TEMP1; SET PCOUT1;
IF _N_ = &VARCOUNT;
RUN;
DATA _NULL_; SET TEMP1;
CALL SYMPUT('TEMPVAR',NAME);
RUN;
%DO VARCOUNT2=1 %TO &MAX2;
DATA TEMP1; SET PCOUT2;
%PUT &VARCOUNT2;
IF _N_ = &VARCOUNT2;
RUN;
DATA _NULL_; SET TEMP1;
CALL SYMPUT('TESTVAR',NAME);
RUN;
%IF &TEMPVAR=&TESTVAR %THEN %DO;
%PUT TEMPVAR=&TEMPVAR;
%PUT TESTVAR=&TESTVAR;
%PUT IF1=&IF1;
%PUT IF2=&IF2;
DATA TEMPSET1 (KEEP=&MERGEBY &TESTVAR); SET &SET1;
&IF1;
RUN;
DATA TEMPSET2 (KEEP=&MERGEBY &TESTVAR); SET &SET2;
&IF2;
RUN;
DATA FORRENAME;
TESTSUM=TRIM("&TESTVAR.")||"SUM";
TESTDIFF=TRIM("&TESTVAR.")||"DIFF";
FORRENAME=TRIM("&TESTVAR.")||"="||TRIM("&TESTVAR.")||"TEST";
FORTEST=TRIM("&TESTVAR.")||"DIFF="||TRIM("&TESTVAR.")||"-"||TRIM("&TESTVAR.")||"TEST";
RUN;
DATA _NULL_; SET FORRENAME;
CALL SYMPUT("TESTSUM",TESTSUM);
CALL SYMPUT("TESTDIFF",TESTDIFF);
CALL SYMPUT("FORTEST",FORTEST);
CALL SYMPUT("FORRENAME",FORRENAME);
RUN;
%PUT &FORRENAME;
DATA TEMPSET2 (RENAME=(&FORRENAME)); SET TEMPSET2; RUN;
DATA TEMPFINAL;
MERGE TEMPSET1 TEMPSET2;
BY &MERGEBY;
RUN;
DATA TEMPFINAL; SET TEMPFINAL;
&FORTEST;
RUN;
PROC MEANS NOPRINT DATA=TEMPFINAL;
VAR &TESTDIFF;
OUTPUT OUT=OUTSUM SUM(&TESTDIFF)=&TESTSUM;
RUN;
DATA OUTSUM; SET OUTSUM (KEEP=&TESTSUM); RUN;
DATA OUTSUM; SET OUTSUM;
FORMAT VARNAME $14.;
VARNAME="&TESTSUM";
RUN;
DATA OUTSUM (RENAME=(&TESTSUM=SUMMATIONS)); SET OUTSUM; RUN;
%IF &VARCOUNT=1 %THEN %DO;;
DATA FINALSUMS; SET OUTSUM; RUN;
%END;
%IF &VARCOUNT>1 %THEN %DO;
DATA FINALSUMS; SET FINALSUMS OUTSUM; RUN;
%END;
DATA PCOUT2; SET PCOUT2;
IF NAME=&TESTVAR THEN DELETE;
RUN;
%LET VARCOUNT2=&MAX2;
%END; /*%IF &TEMPVAR=&TESTVAR %THEN %DO;*/
%END; /*%DO VARCOUNT2=1 %TO &MAX2;*/
%END; /*%DO VARCOUNT=1 %TO &&MAX&SETCOUNT;*/
/*ALLOW WINDOW OUTPUT*/
ods RESULTS;
/*ALLOW TABLES*/
ods LISTING;
%MEND;
|