03 Data Input Output
03 Data Input Output
Scripts
03a_basics_of_data_input_output.R:
http://1drv.ms/1DRMOTC
03b_intermediate_data_input_output:
http://1drv.ms/1JKeKwi
PostgreSQL
02_populare_bd_vinzari_PostgreSQL .sql:
http://1drv.ms/1JKfem5
Data Import/Export
http://cran.r-project.org/doc/manuals/r-re
lease/R-data.html
Beginner's guide to R: Get your data
into R
http://www.computerworld.com/article/2497
164/business-intelligence/beginner-s-guid
e-to-r-get-your-data-into-r.html
Reading/Writing Data: Part 1
https://www.youtube.com/watch?v=aBzA
els6jPk&index=9&list=PLjTlxb-wKvXNSD
fcKPFH2gzHGyjpeCZmJ
Sources
http://www.r-bloggers.com/importing-data-i
nto-r-from-different-sources/
Data Import & Export in R
http://science.nature.nps.gov/im/datamgmt
/statistics/r/fundamentals/index.cfm
Reading data from the new version of
Google Spreadsheets
http://blog.revolutionanalytics.com/2014/
06/reading-data-from-the-new-version-of-g
Direct import from external data files (Excel, CSV, text files etc.) using
their menus
Save intermediate results from the data sources into common format
files (XML, CSV, JSON ) and then import these intermediate files into
the package;
Some
Import data from web servers log into NoSQL data stores
Hadoop
NoSQL
Data
Stores
previous presentation
Many packages include datasets, such as ggplot2; aftera package is
loaded, all of its datasets are available:
> library(ggplot2)
> str(diamonds)
'data.frame': 53940 obs. of 10 variables:
$ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
$ cut
: Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
$ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
$ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4
5 ...
$ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
$ table : num 55 61 65 58 58 57 57 55 61 61 ...
$ price : int 326 326 327 334 335 336 336 337 337 338 ...
$ x
: num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
$ y
: num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
$ z
: num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
On
switch(Sys.info()[['sysname']],
Windows= {births2006 <- read.table(
"births2006\\births2006.txt",
fileEncoding = "UTF-8", header = TRUE, sep="\t")},
Linux = {births2006 <- read.table(
"births2006/births2006.txt",
fileEncoding = "UTF-8", header = TRUE, sep="\t")},
Darwin = {births2006 <- read.table(
"births2006/births2006.txt",
fileEncoding = "UTF-8", header = TRUE, sep="\t")} )
switch(Sys.info()[['sysname']],
Windows= {comp <- read.table(
"IrinaDan\\companyinfo.csv",
header=TRUE, sep=";", stringsAsFactors=FALSE)},
Linux = {comp <- read.table(
"IrinaDan/companyinfo.csv",
header=TRUE, sep=";", stringsAsFactors=FALSE)},
Darwin = {comp <- read.table(
"IrinaDan/companyinfo.csv",
header=TRUE, sep=";", + stringsAsFactors=FALSE)}
)
http://courses.statistics.com/software/R/tables4R.htm
http://courses.statistics.com/Intro1/Lesson2/heartatk4R.txt
> heart.att = read.table(
http://courses.statistics.com/Intro1/Lesson2/heartatk
4R.txt
, header=TRUE)
> head(heart.att)
Patient DIAGNOSIS SEX DRG DIED CHARGES LOS AGE
1
1
41041
F 122
0 4752.00 10 79
2
2
41041
F 122
0 3941.00
6 34
3
3
41091
F 122
0 3657.00
5 76
4
4
41081
F 122
0 1481.00
2 80
5
5
41091
M 122
0 1681.00
1 55
>
download.file("http://archive.ics.uci.edu/ml/machinelearning-databases/arrhythmia//arrhythmia.data",
destfile="data.csv")
trying URL 'http://archive.ics.uci.edu/ml/machinelearning-databases/arrhythmia//arrhythmia.data'
Content type 'text/plain; charset=UTF-8' length 402355
bytes (392 Kb)
opened URL
==================================================
downloaded 392 Kb
2. import the downloaded file
RO D BC
gdata
xlsReadW rite
XLConnect
xlsx
> library(xlsx)
ADL
switch(Sys.info()[['sysname']],
+
Windows=
{ "ADL\\ADL2013_Studenti.xlsx"},
+
Darwin = { "ADL/ADL2013_Studenti.xlsx"})
a connection
On Windows systems:
password="bd2014")
On Mac OS
user="sales2014", password="sales2014")
the PostgreSQL query; the result of the query will be saved into data
frame invoice_detailed:
dbGetQuery(con,
FROM invoices i
> head(invoice_detailed,3)
invoiceno invoicedate customerid customername place
1
1111
2012-08-01
Iasi
1111
2012-08-01
Iasi
1111
2012-08-01
Iasi
countyname
Iasi Moldova
<NA>
Iasi Moldova
<NA>
Iasi Moldova
<NA>
productname unitofmeasurement
category quantity
Product 1
b500ml Category A
50
Product 2
kg Category B
75
Product 5
unit Category A
50
1000
50000
62000
1050
78750
88200
7060
353000 437720
Path
switch(Sys.info()[['sysname']],
Windows=
{ "sales\\invoice_detailed.RData"},
Darwin = { "sales/invoice_detailed.RData"})
> save(invoice_detailed, file = file.name)
After
Close connections/drivers
After
Close
> dbUnloadDriver(drv)
>
Sys.setenv(JAVA_HOME='/Library/Java/JavaVirtualMachines/jdk1.7
.0_45.jdk/Contents/Home')
> options(java.parameters="-Xmx2g")
> install.packages("rJava")
Java version
> .jinit()
> print(.jcall("java/lang/System", "S", "getProperty",
"java.version"))
> .jclassPath()
Load RJDBC package
> Install.packages(RJDBC)
> library(RJDBC)
Create connection driver and open connection
> jdbcDriver <JDBC(driverClass="oracle.jdbc.OracleDriver",
classPath="/Users/admin/Downloads/ojdbc6.jar")
Open connection
"jdbc:oracle:thin:@//10.10.0.7:1521/orcl",
"bd2",
"bd2")
Launch the Oracle query and store the result into the data
frame st
> st <- dbGetQuery(jdbcConnection,
+ "SELECT * FROM studenti")
Close connection
> dbDisconnect(jdbcConnection)
> install.packages("XML")
> library(XML)
> myURL <"http://www.jaredlander.com/2012/02/another-kindof-super-bowl-pool/"
> dfHTML <- readHTMLTable(myURL, which=1,
header=FALSE,
+
stringsAsFactors = FALSE)
> head(dfHTML,3)
V1
V2
V3
> library(XML)
Import
8.3
70
10.3
8.6
65
10.3
8.8
63
10.2
10.5
72
16.4
needed: foreign
> install.packages("foreign")
> library(foreign)
Read a Stata data file (.dta)
> states <- read.dta("states.dta")
Read a local SPSS file
> spss1 <- read.spss("p004.sav",
+ use.value.labels = TRUE, to.data.frame =
TRUE)
Import the SPSS file directly from web address
> spss2 <read.spss("http://www.ats.ucla.edu/stat/spss/ex
amples/chp/p004.sav",
+ use.value.labels = TRUE,
> save(invoice.details.ro,
+ file = "invoice.details.ro.RData")
> save(states, spss2, dat.xls,
+ file = "temp.RData")