0% found this document useful (0 votes)
115 views5 pages

SAS Virtual Learning Environment Guide

SAS training notes

Uploaded by

sydney.pulsipher
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
115 views5 pages

SAS Virtual Learning Environment Guide

SAS training notes

Uploaded by

sydney.pulsipher
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

SAS Virtual Learning Environment Sessions: [Link].

com
Lesson 1 Essentials libname '/people/m277325/consult/EPG1V2/data’

/ /
********************************************* *********************************************
**/ */
/* In the line of code below, replace /* You must run [Link] before you run
FILEPATH */ */
/* with the path to your EPG1V2/data folder /* this code to create your data for EPG1V2
*/ */
/ /*
********************************************* */
**/ /* DO NOT EDIT THE CODE BELOW
*/
%let /
path=/people/m277325/consult/EPG1V2/data;
data myclass; /* Reads [Link] table and creates a copy called my class*/ Data Step vs Proc Step
/ set [Link]; - Data: creates/edits info for
run; a table
- Proc: processes a table in a
proc print data=myclass; specific, pre-defined way
run; libname statement
libname libref engine “path”;
data [Link]; libname: creates library
set [Link]; - Must be <= 8 characters
NetSales=Sales-Returns; /*This creates a new variable*/ - Start w/ letter or _
run; libref: library name
engine: set of instructions
proc means data=[Link] mean sum maxdec=2; /*This rounds the statistics to 2 decimal path: location
places*/ ex: libname mylib
var NetSales; *Gives us info. about NetSales by region; “s:/workshop/data”;
class region;
Lesson
run; 2 Accessing Data libname '/people/m277325/consult/EPG1V2/data

***********************************************************; /*MACRO Practice*/


* Lesson 2 - Accessing Data *; libname pg1 "/people/m277325/consult/EPG1V2/data";
***********************************************************; proc freq data=pg1.np_species;
data class_copy1; tables Abundance Conservation_Status;
set [Link]; where Species_ID like "YOSE%" and
Category="Mammal"; run;
run;
proc print data=pg1.np_species;
var Species_ID Category Scientific_Name
/*Example 1 csv file*/ Common_Names;
proc import where Species_ID like "YOSE%" and double quotes.
Remember
datafile="/people/m277325/consult/EPG1V2/data/class_birthdat Category="Mammal"; run;
[Link]" If you need to remove
dbms=csv /*Now using Macros instead*/ warning, use single quotes
out=work.class_birthdate_import %let ParkCode=YOSE;
replace; run; %let SpeciesCat=Mammal;

proc contents data=class_birthdate_import; run; proc freq data=pg1.np_species;


tables Abundance Conservation_Status;
where Species_ID like "&ParkCode%" and
/*Example 2* Excel file*/
Category="&SpeciesCat"; run;
proc import
datafile="/people/m277325/consult/EPG1V2/data/eu_sport_trade proc print data=pg1.np_species;
.xlsx" var Species_ID Category Scientific_Name
dbms=xlsx Common_Names;
out=eu_sport_trade where Species_ID like "&ParkCode%" and
replace; run; Category="&SpeciesCat"; run;

proc contents data=eu_sport_trade; run; /*And changing the Macros to explore others*/

/*Example 3* delimiter file*/ %let ParkCode=ZION;


proc import %let SpeciesCat=Bird;
datafile="/people/m277325/consult/EPG1V2/data/np_traffic.dat
proc freq data=pg1.np_species;
"
tables Abundance Conservation_Status;
dbms=dlm
where Species_ID like "&ParkCode%" and
out=traffic2 Category="&SpeciesCat"; run;
replace;
guessingrows=3000; proc print data=pg1.np_species;
delimiter="|"; run; var Species_ID Category Scientific_Name

Lesson 3 Exploring and Validating Data libname '/people/m277325/consult/EPG1V2/data

/*SAS Procedures*/
/*SAS Procedures PROC PRINT*/
proc PRINT data=pg1.storm_summary (obs=10); /*Prints only the first 10 rows*/
var Season Name Basin MaxWindMPH; /*limits the columns included in the report */
/*Ctrl 'select' allows you to choose them from the library, then drag them
over */ run;

/*SAS Procedures PROC MEANS*/ /*Just basic summary statistics*/


proc MEANS data=pg1.storm_summary;
var MaxWindMPH MinWindPressure; run;

/*SAS Procedures PROC MEANS*/


proc UNIVARIATE data=pg1.storm_summary; /*More in-depth summary statistics, including extreme
values*/
var MaxWindMPH MinWindPressure; run;
/*Filtering using 'where'*/
libname pg1 "/people/m277325/consult/EPG1V2/data" (OBS=10); /*Prints only the first 10
observations*/
proc print data=pg1.storm_summary;
where Type="SUV" and MSRP >= 3000;
where Type="SUV" or Type="Truck" or Type="Wagon";
where Type in ("SUV", "Truck", "Wagon"); /*Easier way of using the OR */
where MaxWindMPH >= 156; /*Numeric*/
where Basin='WP'; /*Character*/
where Basin in ('SI', "NI"); /*contain SI -or- NI*/
Punctuation matters here
where StartDate >= "01jan2010"d; /*Dates end in d*/
where MaxWindMPH = . ; /*Missing Numeric values*/
where MaxWindMPH = “ “; /*Missing Charac values*/
where MaxWindMPH is missing; /*You could also use is missing*/
where MaxWindMPH is not missing; /*You could also use is not missing*/
where City like "New%" ; /*% is a wild card for any number of characters*/
/*Formatting Practice*/ /* This includes New York, New Delhi, Newport, Newcastle, New, etc*/
where City like "Sant_ %"; /*_
libname pg1 "/people/m277325/consult/EPG1V2/data"; is a wild card for a single character*/REMEMBER - Procedures
/* This includes Santa Clara, Santa Cruz, Santo Domigo, Santo Tomas,
proc print data=pg1.class_birthdate; - Print: creates listing of all rows
format Height Weight 3. Birthdate date9.; and columns
format Weight 5.; /* 45972 - 5 whole digits*/ - Means: calculates simple
format Weight 5.2; /* 45972.78 - 5 whole digits & 2 decimal summary stats (n, mean, std
places*/
dev, min, max)
format Weight 2.4; /* 45.9728 - 2 whole digits & 4 decimal
- Univariate: generates sum stats
places*/
format Weight COMMA8.1; /* 45,927.8 - 8 total values & 1 decimal and includes 5 lowest and 5
place*/ highest extreme values
format Weight DOLLAR10.2; /* $12,345.67 - 10 total values & 2 decimal - Freq: creates frequency table
places*/ for each column, can help find
format Weight DOLLAR10.; /* $12,346 - just rounds to the nearest problems in the data
dollar*/ - Contents: shows types,
format Weight Z8. ; /* 00001350 - 8 whole digits with zeros in formats, etc.
front*/
format Date date7.; /* 15JAN18 - 7 values with date*/
formatPractice*/
/*Sorting Date date9.; /* 15JAN2018 - 9 values with date*/
format
libname pg1Date date11.; /* 26-OCT-2018 - 11 values with date*/
"/people/m277325/consult/EPG1V2/data";
procformat Date MMDDYY10.;
sort data=pg1.class_test2 /* 01/15/2018 - month, date, year*/
out=test_sort;
by Subject; /*sorts autuomatically by ascending Subject*/
by descending Subject; /*Sorts by descending Subject*/
by Subject descending TestScore; /*Sorts by subject then descending TestScore*/ run;
All this code can go on one line. Formatted currently to include notes
proc sort data=pg1.class_test2
out=test_sort_noduplicates
nodupkey /* nodupkey removes duplicate rows*/
dupout=test_dups; /* dupout contains the duplicate rows that were removed by nodupkey*/
by _all_; /* _all_ This removes entirely duplicated rows*/
by student_id; /*this keeps only the first occurrence for each unique value (here
student_id)*/ run;

proc sort data=pg1.np_summary out=np_sort;


by Reg descending DayVisits;
where Type="NP"; run;
Lesson 4 Preparing Data libname '/people/m277325/consult/EPG1V2/data
proc sort data=pg1.eu_occ(keep=geo country) out=countryList /* Keep allows you to keep only these pieces */
nodupkey;
libname pg1 "/people/m277325/consult/EPG1V2/data";
by Geo Country; run;
data myclass;
set [Link];
/**Using WHERE**/
where Age >=15;
where Course = 'Algebra';
where substr(Basin,2,1)="p"; /*where the 2nd letter of Basin is P; i.e. SP, EP, WP are included*/
/*Using Basin, start with the second position, and go for 1 position*/
where upcase(Common_Names) like'%FOX%' /*includes Red Fox, Common Squirrel Fox, Fox Bear*/
and upcase(Common_Names) not like '%SQUIRREL'; /*includes all Fox but not Squirrel - so NO Common
Squirrel Fox*/
where YearMon like "2016%";

keep Name Age Height; /*you can keep portions*/


drop Sex Weight; /*you can delete portions */
format Height 4.1 Weight 3.; /*rounding to nearest tenth & nearest whole #*/
/** Using IF - THEN **/
if MinPressure = . then PressureGroup=.;
else if MinPressure <=920 then PressureGroup=0;
else PressureGroup=1;

length CarType $12; /*length statement needs to be coded BEFORE if-then statements*/
if MSRP<6000 then CarType="Basic";
else CarType="Luxury"; Pay attention to the +1 here when calculating # of days
/** Making new Variables **/
MaxWindKM = MaxWindMPH * 1.60935; /*New Column now called MaxWindKM*/
/*Creating Multiple Output-Tables in one DATA /*Creating Permanant Tables our Tables in the SAS
step*/ Libraries*/
data indian atlantic pacific; /*you can create a permanent table in the
set pg1.storm_summary; EPG1V2/output folder*/
length Ocean $8; libname out "path-to-EPG1V2/output";
keep Basin Season Name MaxWindMPH Ocean; data out.storm_cat5;
Basin=upcase(Basin); . . .
OceanCode=substr(Basin,2,1); run;
if OceanCode="I" then do;
Ocean="Indian"; /*This creates a new storm_new table in the OUT
output indian; library!*/
end; data out.storm_new;
if OceanCode="A" then do; set pg1.storm_summary;
Ocean="Atlantic"; run;
output atlantic;
end;
if OceanCode="P" then do;
Ocean="Pacific";
output pacific;
end;
run;
Lesson 5: Analyzing and Reporting on Data libname '/people/m277325/consult/EPG1V2/data
/*One-Way Frequency Report*/
pro freq. data=pg1.storm_final order=freq nlevels; /*order=freq puts observations in descending order by
frequency*/
tables BasinName Season; /*nlevels adds table at the top to show number of obs*/
run;

ods graphics on;


ods noproctitle; /*gets ride of PROC FREQ title on table*/
proc rfeq data=pg1.storm_final order=freq nlevels;
tables BaisnName StartDate /
nocum plots=freqplot(orient=horitzonal scale=percent); /*plot=freqplot will produce a barchart*/
format StartDate monname.;
run;
title; /*turns off title*/
ods proctitle; /*turns PROC FREQ automatic title back on*/

/*Two-Way Frequency Report*/


proc freq data=og1.storm_final;
tables BasinName*StartDate /*BasinName will label rows and StartDate will label
columns*/
/ norow nocol nopercent; other no
/*no row and column percentage, options in this statement:
overall crosslist, list
percentages*/
format StartDate monname.;
label BasinName= “Basin”
StartDate= “Storm Month”;
run;

/*Example: Two-Way Frequency Report*/


title "Selected Park Types by Region";
ods graphics on;
proc freq data=pg1.np_codelookup order=freq;
tables Type*Region / nocol crosslist
plots=freqplot (groupby=row orient=horizontal scale=grouppercent);
where Type in ("National Historic Site", "National Monument", "National Park");
run;
title;

/*Creating Summary Statistic Reports w/ PROC MEANS*/


proc means data=pg1.storm_final mean median min max maxdec=0; /*maxdec=0 round to whole number*/
var MaxWindMPH;
class BasinName; /*Groups data, calculates stats for each unique value of
BasinName*/
run;

OUTPUT OUT = output-table <statistic=col-name>;


proc means data=pg1.storm_final mean median min max maxdec=0;
var MaxWindMPH;
class BainName StormType;
ways 0 1 2; /*ways=0 -> 0 classification values, calculates stats for whole
Lesson
table 6: Exporting Results libname '/people/m277325/consult/EPG1V2/data
run; ways=1 -> 2 separate tables, one for StormType & one for
BasinName ways=2 -> combo of BainName and StormType to calculate
/*Example of exporting an Excel workbook (data)*/ /*Example of exporting a csv (results)*/
stats*/
libname xlout xlsx “&outpath/[Link]”; ods csvall file= “&outpath/[Link]”;
data xlout.south_pacific; proc print data=[Link] noobs;
set pg1.storm_final; var make model type msrp mpg_city
/*Creating an Output Summary Table*/
where Basin=“SP”; mpg_highway;
proc means data=[Link] noprint;
run; format msrp dollar8.;
var Weight;
run;
class Chol_Status;
proc means data=pg1.storm_final noprint maxdec=1; ods csvall close;
where Basin=“SP”;
var MaxWindKM;
class Season;
ways 1;
output out=xlout.Season_Stats n=Count
mean=AvgMaxWindKM max=StrongestWindKM;
run;
libname xlout clear;
/*Example of exporting results to Excel*/
ods excel file="&outpath/[Link]"
style=snow options(sheet_name='South Pacific Summary'); /*Named the first sheet in Excel
workbook*/
ods noproctitle;

proc means data=pg1.storm_detail maxdec=0 median max; /*First sheet is named “South Pacific
Summary*/
class Season;
var Wind;
where Basin='SP' and Season in (2014,2015,2016);
run;

ods excel options(sheet_name='Detail'); /*Second sheet for 2014 is named


“Detail”, third
proc print data=pg1.storm_detail noobs; sheet named “Detail 2” for 2015, and so
on*/
where Basin='SP' and Season in (2014,2015,2016);
Lessonby7:Season;
Using SQL in SAS libname '/people/m277325/consult/EPG1V2/data
run;

/*Filtering Data with SQL*/ /*Joining tables using SQL inner join*/
title ‘International Storms Since proc sql;
2000’; select class_update.name, grade, age, teacher /*Need this
proc sql; qualifier*/
select propcase(Name) as Name, from pg1.class_update inner join pg1.class_teachers
Age, Height, Birthday format=date9. on class_update.Name=class_teachers.Name;
from pg.class_birthdate quit;
where age > 14 and Height > 156
order by Heigh desc, Name;
quit; /*Assigning an alias*/
title; proc sql;
select [Link], grade, age, teacher
/*Creating a table from a query*/ from pg1.class_updated as u
proc sql; inner join pg1.class_teachers as t
create table [Link] as on [Link]=[Link];
select name, age, height run;
from pg1.class_birthdate
where age > 14
order by height desc;
quit;
Common Formats in SAS

Common questions

Powered by AI

The PROC MEANS statement in SAS is used to calculate summary statistics like mean, median, and maximum for specified variables. The CLASS statement, when used with PROC MEANS, groups the data by unique values of the CLASS variable, allowing for separate statistics to be calculated for each group. This enhances the analysis by providing insights into how different groups perform relative to each other, rather than treating the dataset as a whole .

PROC MEANS and PROC UNIVARIATE both summarize data, but they have methodological differences. PROC MEANS provides basic descriptive statistics like mean, median, min, max, and sum for specified variables, primarily for general summary purposes. In contrast, PROC UNIVARIATE offers a more detailed statistical analysis, including all basic statistics plus distributional measures and extreme values, giving a deeper insight into data characteristics .

Macros in SAS enhance flexibility and efficiency by allowing the reuse of code segments across different tasks and datasets with minimal modifications. They enable parameterization, where certain values in the code are replaced by macro variables that can be easily adjusted. This reduces redundancy and error rates, especially when processing large datasets or performing repetitive tasks .

Sorting with the descending option in PROC SORT in SAS orders the data from highest to lowest for the specified variable, which can significantly influence data analysis by highlighting top performers or most significant values. This is useful in scenarios like identifying the highest sales figures, top students, or peak metrics in a dataset, thereby enabling more focused and relevant analysis .

The KEEP and DROP options in the DATA statement play a crucial role in managing data efficiently by allowing users to include only necessary variables in a dataset, or exclude irrelevant ones, respectively. This capability helps in reducing dataset size, enhancing performance by avoiding unnecessary processing of extraneous variables, and focusing the analysis on pertinent data .

PROC IMPORT in SAS facilitates data integration by allowing users to import data from various file formats, such as CSV, Excel, and delimiter files, directly into SAS datasets. This versatility enables seamless integration and processing of data from diverse sources without needing to manually convert or reformat these files beforehand, making data handling more efficient .

SAS SQL procedures enhance data manipulation capabilities by enabling more complex querying, filtering, and joining operations that might be cumbersome with traditional data step methods. SQL in SAS allows for creating, modifying, and querying tables using a familiar query language, offering a more concise and efficient way to manipulate large datasets and execute operations like joins in one step, which can be challenging with just data step coding .

Formatting in SAS provides the advantage of making data more readable and comprehensible in output reports by applying human-readable formats to numerical data, dates, and other values. It ensures that values are displayed in a way that conforms to user expectations or report standards, such as formatting currency, rounding numbers to a certain decimal, or displaying dates in a familiar format. This enhances the clarity of the reports and aids in effective data interpretation .

In SAS programming, the data step is used primarily for creating or modifying data tables, such as adding new variables or filtering observations. In contrast, the proc step is utilized for processing data and generating reports based on predefined procedures like PROC PRINT or PROC MEANS. The choice between data step and proc step depends on whether the task requires data transformation or data summarization and reporting .

The WHERE statement in SAS, when used with conditional expressions, improves data filtering by enabling precise selection of observations based on specific criteria. It allows for conditions like comparison operators, patterns, and range selections, thus filtering data efficiently at the point of data access rather than after data has been read. This improves data processing speed and aligns datasets more closely with analysis needs .

You might also like