# ppt of Rstudio analysis

*label*Excel

*timer*Asked: Apr 28th, 2017

**Question description**

**integrate the data analysis I have in hand using RStudio and excel into powerpoint slides. We analyze the freethrow percentage** with few model to determine corelation and other models. Please put all the things we have into a few slides based on the instruction I will attach.

setwd("C:/Users/kaziz/Desktop"

library(readr)

Basketball <- read_csv("C:/Users/kaziz/Deskt

# to help generate correlation plots

install.packages("PerformanceA

library(PerformanceAnalytics)

# to help visualize correlation in color

install.packages("corrplot", repos = "http://cran.us.r-project.org"

library(corrplot)

#See some Descriptive statistics about our Basketball dataset

summary(Basketball)

attach(Basketball)

plot(Basketball$FTP,Basketball

```{r, results='hide'}

# Using the function chart.Correlation from "PerformanceAnalytics" package,

# we can create a correlation matrix easily, much easier than built in functions

# However, before that, we need to pick out the numerical variables

# because we cannot run correlation matrix with categorical data or missing data

Basketball.num = sapply(Basketball, is.numeric) # label TRUE FALSE for numerical variables

num = Basketball[,Basketball.num] # selecting only numerical variables

```

chart.Correlation(num)

correlation = cor(num, use = "complete.obs")

corrplot(correlation, type="upper")

Basketball$Post= ifelse(Basketball$Pos=="PG",1,

```{r}

# first load package "caTools"

library(caTools)

# based on probability 70% training data / 30% test data split.

# We create an item variable called "indicator", where indicator = TRUE takes up 70% of data

indicator = sample.split(Basketball, SplitRatio = 0.7)

# Extract out the data based on whether indicator variable is TRUE or FALSE

testing = Basketball[!indicator,] # getting 30% of the data as testing

training = Basketball[indicator,] # getting 70% of the data as training

# Attach training data first

attach(training)

# To build a linear regression model, give this model a name "linear":

linear = lm(FTP~ Post + FGP + `3PP` + AST + TRB+ TOV+BLK+`PS/G`+ MP)

# To see the result of model:

summary(linear)

plot(linear)

# To predict the gross of data from testing dataset using the linear model we built

testing$linear_prediction = predict(linear, newdata = testing)

# To see the accuracy of prediction:

accuracy = testing$linear_prediction - testing$FTP

percent = accuracy/testing$FTP

mean(accuracy,na.rm = TRUE) # to see how much percentage away from the actual