Untitled

mail@pastecode.io avatar
unknown
r
2 years ago
3.6 kB
0
Indexable
Never
library(caTools)
library(tidyverse)
library(readr)
library(lessR)
library(dplyr)
library(psych)
library(haven)
library(magrittr)
library(plm)
library(stargazer)
library(readxl)
library(lmtest)

# Load Data
RAWDATA <- read.csv("C:/Users/shame/OneDrive/Desktop/ResearchPaper/DataF.csv")
as.data.frame(RAWDATA)

# Its important that all variables are in the correct format. 
# The TOBINS Q ratio can remain as is since but we need to change the 
# To convert the leverage ratios to a decimal, we apply the following : 
RAWDATA$LEV <- RAWDATA$LEV/100
#Reorganising the Data 
#This step is optional. We chose to reorganise the dataframe according to our research proposal. 
DATA <- RAWDATA[c(1,2,3,10,11,6,5,4,7,8,9,12)]
View(DATA)
# Describe and summarise the data:
DATA %>% str
# from this you can tell we have 12 variables and 386 observations. 
# to view the first 10 observations of our dataframe, we use the "head()"function : 
DATA %>% head(10)
#To generate a summary of your data.
# However, not quite correct - why?
DATA %>% stargazer(type = "text")

# Number of observations
# Number of observations by ID variable
DATA %>% group_by(ID) %>% summarise(n = n()) 
# The results generated in the n column indicate the number of years worth 
# of data is being used per firm. Therefore, the table indicates that we 
# used 5 years of data per firm. 

DATA %>% group_by(YEAR) %>% summarise(n = n()) 
# The results generated show us how many firms data are present in each of
# the years. Therefore, 75 firms have data for 2016 and
# 78 firms have data for the year 2017 etc. 

#VARIATION ANALYSIS 

# Overall Variation : is how the variables vary across firms and over time
DATA %>%
  select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>%
  mutate_all(function(x) {x - mean(x, na.rm=T)})%>%
  stargazer(type="text", omit.summary.stat = "mean", digits = 2)

# Between Variation : is how the variables varies from one firm to the next 
DATA %>% group_by(ID) %>%
  select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>%
  summarize_all(mean) %>%
  as.data.frame %>%
  select(-ID) %>%
  stargazer(type = "text", digits = 2)


# Within variation : shows how the variables varies from within the firm 
DATA %>% group_by(ID) %>%
  select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>%
  mutate_all(function(x) {x - mean(x)}) %>% #demean 
  as.data.frame %>%
  select(-ID) %>%
  stargazer(type = "text", omit.summary.stat = "mean", digits = 2)



# 1. CHECK FOR MULTI COLINEARITY 
# [INCLUDE WHY WE NEED TO CHECK FOR MULTI COLINEARITY]

# we create a data frame called "corr_1" as follows : 
corr_1 <- DATA %>% 
  select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG)
corr_1

# To generate the correlation matrix, we use the "corr.test"function as follows: 
corr.test(corr_1, method="pearson")


# Pooled OLS estimator 

# 1.TOBIN Q 
Tobin_Q <- plm(formula = TOBINQ ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG,
               data = DATA,
               index = c(2,3),
               model = "pooling")
summary(Tobin_Q)

# 2.ROA
ROA <- plm(formula = ROA ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG,
               data = DATA,
               index = c(2,3),
               model = "pooling")
summary(ROA)

# 3.ROE 
ROE <- plm(formula = ROE ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG,
           data = DATA,
           index = c(2,3),
           model = "pooling")
summary(ROE)