Untitled
unknown
r
4 years ago
3.6 kB
5
Indexable
library(caTools)
library(tidyverse)
library(readr)
library(lessR)
library(dplyr)
library(psych)
library(haven)
library(magrittr)
library(plm)
library(stargazer)
library(readxl)
library(lmtest)
# Load Data
RAWDATA <- read.csv("C:/Users/shame/OneDrive/Desktop/ResearchPaper/DataF.csv")
as.data.frame(RAWDATA)
# Its important that all variables are in the correct format.
# The TOBINS Q ratio can remain as is since but we need to change the
# To convert the leverage ratios to a decimal, we apply the following :
RAWDATA$LEV <- RAWDATA$LEV/100
#Reorganising the Data
#This step is optional. We chose to reorganise the dataframe according to our research proposal.
DATA <- RAWDATA[c(1,2,3,10,11,6,5,4,7,8,9,12)]
View(DATA)
# Describe and summarise the data:
DATA %>% str
# from this you can tell we have 12 variables and 386 observations.
# to view the first 10 observations of our dataframe, we use the "head()"function :
DATA %>% head(10)
#To generate a summary of your data.
# However, not quite correct - why?
DATA %>% stargazer(type = "text")
# Number of observations
# Number of observations by ID variable
DATA %>% group_by(ID) %>% summarise(n = n())
# The results generated in the n column indicate the number of years worth
# of data is being used per firm. Therefore, the table indicates that we
# used 5 years of data per firm.
DATA %>% group_by(YEAR) %>% summarise(n = n())
# The results generated show us how many firms data are present in each of
# the years. Therefore, 75 firms have data for 2016 and
# 78 firms have data for the year 2017 etc.
#VARIATION ANALYSIS
# Overall Variation : is how the variables vary across firms and over time
DATA %>%
select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>%
mutate_all(function(x) {x - mean(x, na.rm=T)})%>%
stargazer(type="text", omit.summary.stat = "mean", digits = 2)
# Between Variation : is how the variables varies from one firm to the next
DATA %>% group_by(ID) %>%
select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>%
summarize_all(mean) %>%
as.data.frame %>%
select(-ID) %>%
stargazer(type = "text", digits = 2)
# Within variation : shows how the variables varies from within the firm
DATA %>% group_by(ID) %>%
select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>%
mutate_all(function(x) {x - mean(x)}) %>% #demean
as.data.frame %>%
select(-ID) %>%
stargazer(type = "text", omit.summary.stat = "mean", digits = 2)
# 1. CHECK FOR MULTI COLINEARITY
# [INCLUDE WHY WE NEED TO CHECK FOR MULTI COLINEARITY]
# we create a data frame called "corr_1" as follows :
corr_1 <- DATA %>%
select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG)
corr_1
# To generate the correlation matrix, we use the "corr.test"function as follows:
corr.test(corr_1, method="pearson")
# Pooled OLS estimator
# 1.TOBIN Q
Tobin_Q <- plm(formula = TOBINQ ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG,
data = DATA,
index = c(2,3),
model = "pooling")
summary(Tobin_Q)
# 2.ROA
ROA <- plm(formula = ROA ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG,
data = DATA,
index = c(2,3),
model = "pooling")
summary(ROA)
# 3.ROE
ROE <- plm(formula = ROE ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG,
data = DATA,
index = c(2,3),
model = "pooling")
summary(ROE)Editor is loading...