Date: Thu, 26 Jul 2001 15:51:02 GMT
Reply-To: JP <lecru_R_E_guel@epid.jgh._M_O_V_E_mcgill.ca>
Sender: "SAS(r) Discussion" <SAS-L@LISTSERV.UGA.EDU>
From: JP <lecru_R_E_guel@EPID.JGH._M_O_V_E_MCGILL.CA>
Subject: Re: SAS/IML and logistic regression
I have some code if you want Philippe. There is a SAS sample code for logit
in IML. look for "ingot"(name of the data).
I have modified the code so that you can read some data from outside IML,
and not in the form of the IML code, which is not very handy.
I did some simulations and had to use IML, as it was something like 30 times
faster in may case.
But Proc Logistic is better/simpler.
JP
I pasted some code bellow, it does more that what you need, but you can take
the main routines from it.
LIBNAME mcat1 "/home/cruguel/iml/macro";
OPTIONS MSTORED SASMSTORE=mcat1;
%MACRO peiml(rep, n, v_x, v_u, forceiv, v_iv, beta0, beta1, pas, nbboot,
tousbeta, dalpha, dbt_alp) /STORE ;
PROC IML;
START binest;
b = REPEAT(0,NCOL(x),1); oldb=b+1; /* STARTing values */
DO iter=1 TO 20 WHILE(MAX(ABS(b-oldb))>1e-8); oldb=b;
z = x*b;
RUN f;
loglik =SUM( ((y=1)#LOG(p) + (y=0)#LOG(1-p))#wgt);
btransp = b`;
w = wgt /(p#(1-p));
xx = f # x;
xpxi = INV(xx`*(w#xx));
b = b + xpxi*(xx`*(w#(y-p)));
END;
p0 = SUM((y=1)#wgt)/SUM(wgt); /* average response */
loglik0 =SUM( ((y=1)#LOG(p0) + (y=0)#LOG(1-p0))#wgt);
chisq = ( 2 # (loglik-loglik0));
df = NCOL(x)-1;
prob = 1-PROBCHI(chisq,df);
stderr = SQRT(VECDIAG(xpxi));
beta = {0,1}; *0=beta0 (intercept), 1 = beta1;
estim_i = beta[,]|| b[,] || stderr[,]||cond;
FINISH;
/*---routine to yield distribution function and density---*/
START f;
p=1/(1+EXP(-z)); f=p#p#EXP(-z);
FINISH;
lebeta={0 0 0 0 0};
bt_lebeta={0 0 0 0 0};
CREATE &dalpha (RENAME = (col1=beta col2=limite col3=alpha col4=repet
col5=nbptreg )) FROM lebeta;
CREATE &dbt_alp (RENAME = (col1=beta col2=limite col3=alpha col4=repet
col5=nbptreg )) FROM bt_lebeta;
DO repet = 1 TO &rep;
SETOUT &dalpha;
FREE lebeta;
n = &n;
v_x = &v_x;
v_u = &v_u;
forceiv = &forceiv;
v_iv = &v_iv;
beta0 = &beta0;
beta1 = &beta1;
px =J(n,1,0);
u =J(n,1,0);
x =J(n,1,0);
y =J(n,1,0);
proby=J(n,1,0);
ybin =J(n,1,0);
iv =J(n,1,0);
DO i=1 TO n;
px[i,1]= NORMAL(0)*v_x**0.5;
u[i,1]= NORMAL(0)*v_u**0.5;
x[i,1]= px[i,1]+u[i,1];
y[i,1]= beta0 + beta1*px[i,1];
proby[i,1]= 1/(1+EXP(-y[i,1]));
ybin[i,1]= RANBIN(0,1,proby[i,1]);
iv[i,1]= px[i,1]*forceiv+ NORMAL(0)*v_iv**0.5 ;
END;
rang = RANK(iv);
data = x||ybin||iv||rang;
pas = &pas;
debut = 0;
fin = MIN(150,INT(n/(4*pas))); *on travaille sur 50%, avec 2 cotés, d
ou fateur 4;
DO k = debut TO fin;
min = k*pas + 1;
max = n -k*pas;
data2 = data[LOC(min <= data[,4] & data[,4]<= max),];
n2=NROW(data2);
x = REPEAT(1,n2,1) || ( data2[,{1}]);
y = data2[,2];
wgt = J(n2,1,1);
parm = {intercept, Xobs};
*VARIANCE DE X;
Un=J(n2,1,1);
MoyenneX=(Un`*x[,2])/n2;
VarX=(x[,2]`*x[,2]-n2*MoyenneX**2)/(N2-1);
invvarx = 1/varx;
varxi = repet||k||n2||varx||invvarx;
cond = repet ||k|| n2;
cond = REPEAT(cond,2,1);
RUN binest;
lesvar = lesvar//varxi;
estim = estim//estim_i;
END;
limite = FLOOR(n*0.5);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.55);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.6);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.65);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.7);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.75);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.8);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.85);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.90);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
limite = FLOOR(n*0.95);
beta1 = estim[LOC(estim[,1]=1 & estim[,4]=repet & estim[,6]>=
limite),];
npt = NROW(beta1);
lesvari = lesvar[LOC(lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
lebeta = lebeta//(g1||echant);
APPEND FROM lebeta;
* BOOTSTRAP;
nbboot = &nbboot;
DO boot = 1 TO nbboot;
SETOUT &dbt_alp;
FREE bt_lesvar bt_estim rowalea ;
FREE bt_lebeta;
DO i=1 TO n;
alea=INT(UNIFORM(0)*n+1);
rowalea=rowalea//alea;
END;
newdata=data[rowalea,];
DO k = debut TO fin;
min = k*pas + 1;
max = n -k*pas;
data2 = newdata[LOC(min <= newdata[,4] &
newdata[,4]<= max),];
n2=NROW(data2);
x = REPEAT(1,n2,1) || ( data2[,{1}]);
y = data2[,2];
wgt = J(n2,1,1);
parm = {intercept, Xobs};
*VARIANCE DE X;
Un=J(n2,1,1);
MoyenneX=(Un`*x[,2])/n2;
VarX=(x[,2]`*x[,2]-n2*MoyenneX**2)/(n2-1);
invvarx = 1/varx;
varxi = repet||k||n2||varx||invvarx;
cond = repet ||k|| n2;
cond = REPEAT(cond,2,1);
RUN binest;
bt_lesvar = bt_lesvar//varxi;
bt_estim = bt_estim//estim_i;
END;
limite = FLOOR(n*0.5);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.55);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.6);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.65);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.7);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.75);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.8);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.85);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.90);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
limite = FLOOR(n*0.95);
beta1 = bt_estim[LOC(bt_estim[,1]=1 & bt_estim[,4]=repet &
bt_estim[,6]>= limite),];
npt = NROW(beta1);
lesvari = bt_lesvar[LOC(bt_lesvar[,1]=repet),];
reg1=J(NROW(beta1),1,1);
reg =reg1||lesvari[LOC(lesvari[,3] >= limite),5];
g1=INV(reg`*reg)*(reg`*beta1[,2]);
echant =
REPEAT(limite,2,1)||{0,1}||REPEAT(repet,2,1)||REPEAT(npt,2,1);
bt_lebeta = bt_lebeta//(g1||echant);
APPEND FROM bt_lebeta;
END;
END;
/* CREATE &tousbeta (RENAME = (col1=typebeta col2=Beta col3=Stderr
col4=repet col5=iter col6=ni)) FROM estim;
APPEND FROM estim;*/
QUIT;
PROC MEANS DATA = &dbt_alp NOPRINT;
VAR beta;
CLASS repet limite;
WHERE alpha = 0;
OUTPUT OUT = &tousbeta (WHERE = (repet NE . AND limite NE .) DROP =_TYPE_
RENAME = (_FREQ_=nbboot)) MEAN=m_alpha STD=s_alpha VAR=var_alpha;
RUN;
%MEND peiml;
"Steve Rowe" <steverowe@EMAIL.MSN.COM> wrote in message
news:200107261217.f6QCHFS163978@listserv.cc.uga.edu...
> Any reason not to simply use
> proc logistic;
> ?
>
>
> Stephen Rowe, Ph.D.
> statistician
> marketing research / data mining
> looking for work
> Chicago area
|