|
I received two lovely coding options in response to my question
about time and space usage by SAS 6.09e in MVS environment.
The quickest is reproduced first, below. It needed a small
change to include patients with exactly one record. The
other approaches follow. I added a SORT by STA3N (at a cost
of .04 - .05 CPU sec) to get the PROC FREQ by STA3N. Do these
results mean that the big time-utilizer is the arrays?
Many thanks for the help! -Laurel
NOTE: The data set WORK.OPC97A has 630009 observations
*** MMZ ***;
*BATCH A*;
PROC SQL;
CREATE TABLE WORK.OPC97a AS
SELECT OPC.STA3N, OPC.RACE, OPC.CL1, OPC.CL2,
OPC.CL3, OPC.CL4, OPC.CL5, OPC.CL6, OPC.CL7, OPC.CL8, OPC.CL9,
OPC.CL10, OPC.CL11, OPC.CL12, OPC.CL13, OPC.CL14, OPC.CL15,
OPC.SCRSSN
FROM OPC97.SF97 AS OPC
WHERE STA3N in(358,402,405,436,437,438,442,452);
* Format to tally AODstop and MEDstop below *;
PROC FORMAT;
value AODclfm 137,507,508,513,514,517,518,555,556,560="1" other="0";
value MEDclfm 101,143,165,501,502,503,504,505,506,509,510,511,512,
515,516,540,541,550,551,552,553,554,557,558,559,562,571,572,
137,507,508,513,514,517,518,555,556,560,207,213="0" other="1";
run;
DATA work.OPC97a(keep=scrssn race sta3n aodstop medstop);
set work.OPC97a;
*Count up N stops per record*;
AODStop=0;
MEDStop=0;
array ClinStop(15) cl1 cl2 cl3 cl4 cl5 cl6 cl7 cl8 cl9 cl10
cl11 cl12 cl13 cl14 cl15;
DO I=1 to 15;
clinstp=clinstop(I);
if clinstp^=. Then DO;
AODcl=input(put(clinstp,AODclfm.),3.);
IF AODcl EQ 1 then AODstop=AODstop+1;
MEDcl=input(put(clinstp,medclfm.),3.);
IF MEDcl EQ 1 then MEDstop=MEDstop+1;
END;
END;
label AODstop='Outpt Visit: AOD'
MEDstop='Outpt Visit: MED';
run;
* Reduce to Totals per Patient;
PROC SORT; by SCRSSN;
data work.OPC97A;
RETAIN AODstops MEDstops;
set work.OPC97A;
by scrssn;
if first.SCRSSN then DO;
AODstops=AODstop; MEDStops=MEDstop;
END;
ELSE DO;
AODstops=AODstop+AODstops; MedStops=MEDstop+MedStops;
END;
IF last.scrssn then DO;
IF AODstops GE 1 then do;
IF race=7 then race=.;
OUTPUT OPC97a;
END;
END;
label AODstops='FY Outpt Vis: AOD';
label MEDstops='FY Outpt Vis: Medical';
run;
proc append base=OPCZ data=OPC97A;
run;
proc datasets nolist;
delete OPC97A;
quit;
proc sort data=OPCZ; by STA3n;
proc means;var medstops aodstops; class sta3n;
proc freq; table race; by sta3n;
run;
*;
/* NOTE: The SAS session used 66.25 CPU seconds and 5248K. */
*** MSZ ***;
*BATCH A*;
data opc97a;
array clinstop(15) cl1-cl15;
set opc97.sf97 (where=(sta3n in (358,402,405,436,437,438,442,452)));
aodstop = 0;
medstop = 0;
do i= 1 to 15;
if clinstop(i) in(137,507,508,513,514,517,518,555,556,560)
then aodstop+1;
if clinstop(i) ^in (101, 143, 165, 501, 502, 503, 504, 505,
506, 509, 510, 511, 512, 515, 516, 540,
541, 550, 551, 552, 553, 554, 557, 558,
559, 562, 571, 572, 137, 507, 508, 513,
514, 517, 518, 555, 556, 560, 207, 213)
and clinstop(i) ^=.
then medstop+1;
end;
label aodstop='outpt visit: aod'
medstop='outpt visit: med';
run;
proc sort data=opc97a;
by scrssn;
run;
data opc97a;
set opc97a;
by scrssn;
* set aodstops and medstops equal to 0 for each new scrssn;
if first.scrssn then do;
aodstops=0;
medstops=0;
end;
aodstops+aodstop;
medstops+medstop;
* one record per SCRSSN for only those people with at least one AODSTOP;
if (last.scrssn) and (aodstops gt 0) then output;
label aodstops='fy outpt vis: aod';
label medstops='fy outpt vis: medical/other';
run;
proc sort data=OPC97a; by STA3n;
proc means; var medstops aodstops; class sta3n;
proc freq; table race; by sta3n;
run;
*;
/* NOTE: The SAS session used 207.06 CPU seconds and 5375K.
Primarily in the 191 sec DATA step */
*** OLD ***;
*BATCH A*;
PROC SQL;
CREATE TABLE WORK.OPC97a AS
SELECT OPC.STA3N, OPC.RACE, OPC.CL1, OPC.CL2,
OPC.CL3, OPC.CL4, OPC.CL5, OPC.CL6, OPC.CL7, OPC.CL8, OPC.CL9,
OPC.CL10, OPC.CL11, OPC.CL12, OPC.CL13, OPC.CL14, OPC.CL15,
OPC.SCRSSN
FROM OPC97.SF97 AS OPC
WHERE STA3N in(358,402,405,436,437,438,442,452);
DATA WORK.OPC97A; SET WORK.OPC97A;
array ClinStop(15) CL1 CL2 CL3 CL4 CL5 CL6 CL7 CL8
CL9 CL10 CL11 CL12 CL13 CL14 CL15;
array AODCL(15) AODCL1 AODCL2 AODCL3 AODCL4 AODCL5 AODCL6
AODCL7 AODCL8 AODCL9 AODCL10 AODCL11 AODCL12 AODCL13
AODCL14 AODCL15;
array MEDCL(15) MEDCL1 MEDCL2 MEDCL3 MEDCL4 MEDCL5 MEDCL6
MEDCL7 MEDCL8 MEDCL9 MEDCL10 MEDCL11 MEDCL12 MEDCL13
MEDCL14 MEDCL15;
* Fill output arrays *;
DO i=1 to 15;
IF clinstop(i) in(137, 507, 508, 513, 514, 517, 518, 555,
556,560)
THEN aodcl(i)=1;
IF clinstop(i) ^in(101, 143, 165, 501, 502, 503, 504, 505,
506, 509, 510, 511, 512, 515, 516, 540,
541, 550, 551, 552, 553, 554, 557, 558,
559, 562, 571, 572, 137, 507, 508, 513,
514, 517, 518, 555, 556, 560, 207, 213)
AND clinstop(i) ^=.
THEN medcl(i)=1;
END;
*Count up N stops per record*;
AODStop=n(AODCL1, AODCL2, AODCL3, AODCL4, AODCL5, AODCL6, AODCL7,
AODCL8, AODCL9, AODCL10, AODCL11, AODCL12, AODCL13, AODCL14,
AODCL15);
MEDStop=n(MEDCL1, MEDCL2, MEDCL3, MEDCL4, MEDCL5, MEDCL6, MEDCL7,
MEDCL8, MEDCL9, MEDCL10, MEDCL11, MEDCL12, MEDCL13, MEDCL14,
MEDCL15);
label AODstop='Outpt Visit: AOD'
MEDstop='Outpt Visit: MED';
drop AODCL1 AODCL2 AODCL3 AODCL4 AODCL5
AODCL6 AODCL7 AODCL8 AODCL9 AODCL10 AODCL11 AODCL12 AODCL13
AODCL14 AODCL15 MEDCL1 MEDCL2 MEDCL3 MEDCL4 MEDCL5 MEDCL6
MEDCL7 MEDCL8 MEDCL9 MEDCL10 MEDCL11 MEDCL12 MEDCL13 MEDCL14
MEDCL15;
run;
* Reduce to Totals per Patient;
PROC SORT; by SCRSSN;
data work.OPC97A; set work.OPC97A; by scrssn;
if first.SCRSSN then DO;
AODstops=AODstop; MedStops=MEDstop;
END;
if ^first.SCRSSN then DO;
AODstops=AODstop+AODstops; MedStops=MEDstop+MedStops;
END;
retain AODstops MEDSTOPS;
label AODstops='FY Outpt Vis: AOD';
label MEDstops='FY Outpt Vis: Medical/other';
run;
* DATA: one record per patient, at least 1 AOD stop *;
data work.opc97a; set work.opc97A; by scrssn;
if last.scrssn;
if AODstops GE 1; run;
proc sort; by STA3n;
proc means; var medstops aodstops;class sta3n;
proc freq; table race; by sta3n;
run;
/* NOTE: The SAS session used 218.33 CPU seconds and 5167K.
Primarily in the 171 sec DATA step. */
~*~
Laurel Copeland Tel (313) 930-5132 Fax (313) 930-5159
Data Analyst Ann Arbor VAMC
|