28.    More on R - data structure, function, R files, debug

November 21, 2018
home

Contents

01. R simple data

        boxplot(ChickWeight$weight)
            

02. R data structure and plots

Dimensions single type multiple type
1 vector   c() list   list()
2 matrix   matrix() data frame   data.frame()
x <- c(1,2,3) # num[1:30] 1 2 3 # In R, it is not called an array. x # [1] 1 2 3 # One vector output result, 1 2 3 typeof(x) # double class(x) # numeric y <- c(1. "2", TRUE) # different types y # [1] "1" "2" "TRUE" typeof(y) # character class(y) # character hist(ChickWeight$weight, breaks = fivenum(ChickWeight$weight)) # plotting with one line. #Without argument breaks, the default values will be created. #find out what is returned from function fivenum five.values <- fivenum(ChickWeight$weight) #from the env window, five.values num[1:5]. It is a vector. #They are min, 1st quartile, median, 3rd quartile, max. #There are 4 bins.
l <- list("Luck", 32, TRUE) class(l) # list typeof(l) # list
    ?datasets                #package, Base R datasets
    ?iris                    #about data frame iris 
    ?head(iris)              #the contents of the first six rows
                             #There are five column names.
                             #no row name is defined, use default, 1,2,3...
    class(iris)              #data.frame
                             #taking care of the whole data.
    typeof(iris)             #list         
    str(iris)                #structure of a row  
                             #iris data frame has 5 elements.Each column is for each element.     
            
    data(package=’ggplot2’)     #list  the datasets in ‘ggplot2’
    ?diamonds                   #get to know one data set, diamonds
    View(diamonds)              #in script window
    summary(diamonds)           #min ..mean..3rd qunatile...for all variables.
    s <- subset(diamonds,   cut %in% ‘Fair’  &  price < 1000)$price    #subset and select
    mean(s)                     #get one of its statistical data.                
            
        boxplot(weight ~ Diet, data = ChickWeight)
            
        library(ggplot2)
        g <- ggplot(diamonds, aes(x = carat, y = price))
        g <- g + geom_point(aes(color=clarity))
        g

        # see the trend
        library(mgcv)
        c = g + geom_smooth(color='yellow')
        c

        # see the trend in linear model
        l =  g + geom_smooth(method='lm', color='red')
        l
            
#data frame index #The index in R data frame is 1-based. # create a data frame name <- c('happy', 'lucky', 'joy') age <- c(1, 3, 5) my.df <- cbind(name, age) #access the data- frame my.df #output [1,] happy 1 # [2,] lucky 3 # [3,] joy 5 my.df[,1] # happy, luck, joy my.df[2,2] # 3 #merge for inner join, full join, left join, right join df1 <- data.frame(LETTERS, share.keys = 1:26) #26 rows df2 <- data.frame(letters, share.keys = c(1:9, 11, 12,13, 14, 22:34)) #26 rows merge(df1,df2) # inner join 18 rows merge(df1,df2, all = TRUE) # full join 34 rows, <NA> for mistmatch merge(df1,df2, all.x = TRUE) # left join 26 rows, all the left + matched right merge(df1,df2, all.y = TRUE) # right join 26 rows, all the right + matched left #combine the rows from two data frames name <- c('John', 'Mary', "Mike") age <- c(20, 30, 40) df1 <- data.frame(name, age) df1 name <- c('Wiwi', 'Tairo', "Emi") age <- c(5, 6, 7) df2 <- data.frame(name, age) df2 two <- rbind(df1, df2) two data(ChickWeight) t <- table(ChickWeight$Diet) class(t) t ----- result ---------------------- table 1 2 3 4 220 120 120 118

03. function

#define a function my.function <- function(a, b){ sum = a + b double.sum = sum * 2 return (double.sum) } #set function arguments, and call the function p1 <- 2 p2 <- 3 result <- my.function(p1, p2) #output the result print(result)

04. debug

05. R files

#peter_add.R add = function(a, b) { return(a + b) } #peter_main.R source('peter_add.R') add(5,7) setwd('~/documents/peter_r')