Untitled
unknown
r
4 years ago
3.6 kB
4
Indexable
library(caTools) library(tidyverse) library(readr) library(lessR) library(dplyr) library(psych) library(haven) library(magrittr) library(plm) library(stargazer) library(readxl) library(lmtest) # Load Data RAWDATA <- read.csv("C:/Users/shame/OneDrive/Desktop/ResearchPaper/DataF.csv") as.data.frame(RAWDATA) # Its important that all variables are in the correct format. # The TOBINS Q ratio can remain as is since but we need to change the # To convert the leverage ratios to a decimal, we apply the following : RAWDATA$LEV <- RAWDATA$LEV/100 #Reorganising the Data #This step is optional. We chose to reorganise the dataframe according to our research proposal. DATA <- RAWDATA[c(1,2,3,10,11,6,5,4,7,8,9,12)] View(DATA) # Describe and summarise the data: DATA %>% str # from this you can tell we have 12 variables and 386 observations. # to view the first 10 observations of our dataframe, we use the "head()"function : DATA %>% head(10) #To generate a summary of your data. # However, not quite correct - why? DATA %>% stargazer(type = "text") # Number of observations # Number of observations by ID variable DATA %>% group_by(ID) %>% summarise(n = n()) # The results generated in the n column indicate the number of years worth # of data is being used per firm. Therefore, the table indicates that we # used 5 years of data per firm. DATA %>% group_by(YEAR) %>% summarise(n = n()) # The results generated show us how many firms data are present in each of # the years. Therefore, 75 firms have data for 2016 and # 78 firms have data for the year 2017 etc. #VARIATION ANALYSIS # Overall Variation : is how the variables vary across firms and over time DATA %>% select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>% mutate_all(function(x) {x - mean(x, na.rm=T)})%>% stargazer(type="text", omit.summary.stat = "mean", digits = 2) # Between Variation : is how the variables varies from one firm to the next DATA %>% group_by(ID) %>% select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>% summarize_all(mean) %>% as.data.frame %>% select(-ID) %>% stargazer(type = "text", digits = 2) # Within variation : shows how the variables varies from within the firm DATA %>% group_by(ID) %>% select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) %>% mutate_all(function(x) {x - mean(x)}) %>% #demean as.data.frame %>% select(-ID) %>% stargazer(type = "text", omit.summary.stat = "mean", digits = 2) # 1. CHECK FOR MULTI COLINEARITY # [INCLUDE WHY WE NEED TO CHECK FOR MULTI COLINEARITY] # we create a data frame called "corr_1" as follows : corr_1 <- DATA %>% select(BI, MO, TOBINQ, ROA, ROE, FIRM_SIZE, LEV, SALES_GROWTH, LG) corr_1 # To generate the correlation matrix, we use the "corr.test"function as follows: corr.test(corr_1, method="pearson") # Pooled OLS estimator # 1.TOBIN Q Tobin_Q <- plm(formula = TOBINQ ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG, data = DATA, index = c(2,3), model = "pooling") summary(Tobin_Q) # 2.ROA ROA <- plm(formula = ROA ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG, data = DATA, index = c(2,3), model = "pooling") summary(ROA) # 3.ROE ROE <- plm(formula = ROE ~ BI + MO + FIRM_SIZE + LEV + SALES_GROWTH + LG, data = DATA, index = c(2,3), model = "pooling") summary(ROE)
Editor is loading...