Date: Thu, 27 Jan 2005 05:13:13 -0800
Reply-To: RolandRB <rolandberry@HOTMAIL.COM>
Sender: "SAS(r) Discussion" <SAS-L@LISTSERV.UGA.EDU>
From: RolandRB <rolandberry@HOTMAIL.COM>
Organization: http://groups.google.com
Subject: Re: Utility to scan string variables for non-printable characters?
Content-Type: text/plain; charset="iso-8859-1"
They needed fixing as some of the characters they were flagging were OK
and enhancements added.
/*
/ Program : hexcnt.sas
/ Version : 1.0
/ Author : Roland Rashleigh-Berry
/ Date : 26-Jan-2005
/ Purpose : To count the strange hex character in character variables
/ SubMacros : %nvarsc
/ Notes : It is not possible to implement this as a function-style
macro due
/ to the data step boundary so the results will be written
out to a
/ global macro variable. What you do with the list created
is
/ entirely up to you. The variable will be directly
followed by an
/ equal sign followed directly by the hex value count.
Variables
/ with zero hex count values will not be shown.
/ Usage :
%hexcnt(dsname,droplist,globcnt=_hexcnt_,globvars=_hexvars_);
/================================================================================
/ PARAMETERS:
/-------name-------
-------------------------description-------------------------
/ ds Dataset (pos) (must be pure dataset name and have
no keep,
/ drop, where or rename associated with it).
/ drop List of variables (pos - unquoted and separated by
spaces) to
/ drop from the analysis.
/ globcnt=_hexcnt_ Name of the global macro variable to set up to
contain the
/ list of variables and their hex count.
/ globvars=_hexvars_ Name of the global macro variable to set up to
contain the
/ list of variables with a detected hax count.
/================================================================================
/ AMENDMENT HISTORY:
/ init --date-- mod-id
----------------------description-------------------------
/
/===============================================================================*/
%macro hexcnt(ds,drop,globcnt=_hexcnt_,globvars=_hexvars_);
%local dsname;
%let dsname=&ds;
%if %length(&drop) GT 0 %then %do;
%let dsname=_hexcnt;
data _hexcnt;
set &ds(drop=&drop);
run;
%end;
%local nvarsc;
%global &globcnt &globvars;
%let &globcnt=;
%let &globvars=;
%let nvarsc=%nvarsc(&dsname);
%if &nvarsc %then %do;
data _null_;
array _chex {&nvarsc} 8 _temporary_ (&nvarsc*0);
set &dsname end=last;
array _char {*} _character_;
do i=1 to &nvarsc;
len=length(_char(i));
do j=1 to len;
rank=rank(substr(_char(i),j,1));
if rank<0020x or (007Ex < rank < 00C0x)
and rank not in (00B0x, 00B4x, 00B5x, 00AEx) then do;
_chex(i)=_chex(i)+1;
j=len;
end;
end;
end;
if last then do;
do i=1 to &nvarsc;
if _chex(i) GT 0 then do;
call execute('%let &globcnt=&&&globcnt '||
trim(vname(_char(i)))||'='||compress(put(_chex(i),11.))||';');
call execute('%let &globvars=&&&globvars
'||trim(vname(_char(i)))||';');
end;
end;
end;
run;
%end;
%if %length(&drop) GT 0 %then %do;
proc datasets nolist;
delete _hexcnt;
run;
%end;
%mend;
/*
/ Program : showhex.sas
/ Version : 1.0
/ Author : Roland Rashleigh-Berry
/ Date : 26-Jan-2005
/ Purpose : To create a new dataset where hex characters in
character
/ variables are highlighted.
/ SubMacros : %varlistc %words
/ Notes : Variables in the output dataset will have the same
name as those
/ in the input dataset but they will be changed to show
up hex
/ characters as hex numbers in < > brackets and the
variable
/ length will be as defined to the length= parameter. If
no
/ variable list is specified then all character
variables are
/ assumed. If badonly=yes then an extra variable named
__obs is
/ retained in the output dataset set to the matching
observation
/ number in the input dataset.
/ Usage : %showhex(test1,test2,cvar1 cvar2 cvar3)
/
/================================================================================
/ PARAMETERS:
/-------name-------
-------------------------description-------------------------
/ dsin (pos) name of inout dataset (no modifiers)
/ dsout (pos) name of output dataset (no modifiers)
/ vars (pos) (optional) character variables (separated by
spaces)
/ length=200 Length of the new character variables in the output
dataset
/ badonly=yes By default keep only those observations where hex
characters
/ were found in one or more of the listed character
variables.
/================================================================================
/ AMENDMENT HISTORY:
/ init --date-- mod-id
----------------------description-------------------------
/
/===============================================================================*/
%macro showhex(dsin,dsout,vars,length=200,badonly=yes);
%if not %length(&badonly) %then %let badonly=yes;
%let badonly=%upcase(%substr(&badonly,1,1));
%if not %length(&vars) %then %let vars=%varlistc(&dsin);
%local i var words error;
%let error=0;
%if not %length(&dsin) %then %do;
%put ERROR: (showhex) No input dataset specified;
%let error=1;
%end;
%if not %length(&dsout) %then %do;
%put ERROR: (showhex) No output dataset specified;
%let error=1;
%end;
%if &error %then %goto error;
%let words=%words(&vars);
data &dsout;
length __char $ 1 __temp1 __temp2 &vars $ &length;
set &dsin(keep=&vars rename=(
%do i=1 %to &words;
%let var=%scan(&vars,&i,%str( ));
&var=_&var
%end;
));
__bad=0;
__obs=_n_;
%do i=1 %to &words;
%let var=%scan(&vars,&i,%str( ));
__temp1=_&var;
link conv;
&var=__temp2;
%end;
%if "&badonly" EQ "Y" %then %do;
if __bad then output;
%end;
%else %do;
drop __obs;
%end;
drop __temp1 __temp2 __pos __rank __char __i __bad
%do i=1 %to &words;
%let var=%scan(&vars,&i,%str( ));
_&var
%end;
;
return;
conv:
*- converts what is in __temp1 to __temp2 with hex expanded -;
__temp2=' ';
__pos=1;
do __i=1 to length(__temp1);
__char=substr(__temp1,__i,1);
__rank=rank(__char);
if __rank<0020x or (007Ex < __rank < 00C0x)
and __rank not in (00B0x, 00B4x, 00B5x, 00AEx) then do;
substr(__temp2,__pos,4)='<'||put(__rank,hex2.)||'>';
__pos=__pos+4;
__bad=1;
end;
else do;
substr(__temp2,__pos,1)=__char;
__pos=__pos+1;
end;
end;
return;
run;
%goto skip;
%error:
%put ERROR: Leaving showhex macro due to error(s) listed;
%skip:
%mend;