# 14/50 - Day 1

user_2508819
r
6 months ago
14 kB
1
Indexable
Never
```### ---------------------- Questions To Analysis -------------------------- ###

## Question 1: What Makes Employees Quit Their Jobs? (Termination)

# Analysis 1.1 - Is there correlation between termination type and length of service?

# Filter the data to include only relevant columns and drop NA values
filtered_data <- new_employee_attrition %>%
select(termination_type, length_of_service) %>%
drop_na()

# Group by termination type and calculate the count of each length of service
termtype_los <- filtered_data %>%
group_by(termination_type, length_of_service) %>%
summarize(count = n())

# Create a line chart with theme and theme_minimal
ggplot(termtype_los, aes(x = length_of_service, y = count, color = termination_type)) +
geom_line(size = 1) +
geom_point(size = 3, shape = 21, fill = "lightskyblue1") +
labs(x = "Length of Service", y = "Count", color = "Termination Type",
title = "Correlation between Termination Type and Length of Service") +
theme_minimal()

# Analysis 1.2 - I want to investigate if voluntary termination is related due to length of service?

# Filter the data to include only voluntary terminations and length of service columns
filtered_data <- new_employee_attrition %>%
filter(termination_type == "Voluntary") %>%
select(termination_type, length_of_service)

# Group by length of service and calculate the count of voluntary terminations
voluntary_length_of_service <- filtered_data %>%
group_by(length_of_service) %>%
summarize(count = n())

# Sort the data by length of service
voluntary_length_of_service <- voluntary_length_of_service[order(voluntary_length_of_service\$length_of_service), ]

# Create an area chart to visualize the relationship
ggplot(voluntary_length_of_service, aes(x = length_of_service, y = count)) +
geom_area(fill = "steelblue", alpha = 0.7) +
labs(x = "Length of Service", y = "Count", title = "Voluntary Termination by Length of Service") +
theme_minimal() +
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 12, face = "bold"),
plot.title = element_text(size = 16, face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_x_continuous(breaks = seq(0, max(voluntary_length_of_service\$length_of_service), by = 5)) +
scale_y_continuous(limits = c(0, max(voluntary_length_of_service\$count) * 1.1)) +
theme(plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"),
legend.position = "none")

# Analysis 1.3 - I want to investigate if involuntary termination is related due to length of service?

# Filter the data to include only involuntary terminations and length of service columns
filtered_data <- new_employee_attrition %>%
filter(termination_type == "Involuntary") %>%
select(termination_type, length_of_service)

# Group by length of service and calculate the count of each termination type
involuntary_termination <- filtered_data %>%
group_by(length_of_service, termination_type) %>%
summarize(count = n())

# Create a stacked area chart to visualize the relationship
ggplot(involuntary_termination, aes(x = length_of_service, y = count, fill = termination_type)) +
geom_area(color = "white") +
labs(x = "Length of Service", y = "Count", title = "Relationship between Involuntary Termination and Length of Service") +
theme_minimal() +
theme(axis.text = element_text(size = 10),
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 11, face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_fill_manual(values = c("darkgreen", "lightgreen")) +
theme(plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"),
legend.position = "bottom")

# Analysis 1.4 - What is the correlation between voluntary termination (termination type) and age of employee?

# Filter the data to include only voluntary terminations and age columns
filtered_data <- new_employee_attrition %>%
filter(termination_type == "Voluntary") %>%
select(termination_type, age)

# Group by termination type and calculate the count of each age
voluntary_age <- filtered_data %>%
group_by(age) %>%
summarize(count = n())

# Create a connected scatter plot to visualize the correlation
ggplot(voluntary_age, aes(x = age, y = count)) +
geom_line(color = "steelblue", size = 1) +
geom_point(color = "steelblue", size = 3) +
labs(x = "Age", y = "Count", title = "Correlation between Voluntary Termination and Age") +
theme_minimal() +
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 12, face = "bold"),
plot.title = element_text(size = 14, face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
scale_color_gradient(low = "darkblue", high = "lightblue") +
theme(plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"),
legend.position = "none")

# Analysis 1.5 - What is the correlation between involuntary termination (termination type) and age of employee?

# Filter the data to include only involuntary terminations and age columns, and group by age
involuntary_age <- new_employee_attrition %>%
filter(termination_type == "Involuntary") %>%
group_by(age) %>%
summarize(count = n())

# Create a scatter plot with enhanced aesthetics
ggplot(involuntary_age, aes(x = age, y = count)) +
geom_point(color = "#FF6384", size = 4, alpha = 0.8) +
labs(x = "Age", y = "Count", title = "Correlation between Involuntary Termination and Age") +
theme_minimal() +
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 11, face = "bold"),
plot.title = element_text(size = 14, face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 10),
legend.key.size = unit(0.7, "cm")) +
scale_color_gradient(low = "#FF6384", high = "#FFABAB") +
guides(color = guide_legend(override.aes = list(size = 4))) +
theme(plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"))

# Analysis 1.6 - Is there correlation between termination reason and age of employee?

# Filter, group, summarize, and arrange the data
termreason_age <- new_employee_attrition %>%
filter(!is.na(termination_reason)) %>%
group_by(age, termination_reason) %>%
summarize(count = n())

# Create a stacked bar chart
ggplot(termreason_age, aes(x = age, y = count, fill = termination_reason)) +
geom_bar(stat = "identity") +
labs(x = "Age", y = "Total Count Of Term. Reasons", fill = "Termination Reason") +
ggtitle("The Correlation Between Termination Reasons & Employee Ages")

# Analysis 1.7 - How does employees' genders relate to their reasons for leaving?

# Filter, group, summarize, and arrange the data
termreason_gender <- employee_attrition %>%
filter(!is.na(termination_reason) & !is.na(gender)) %>%
group_by(gender, termination_reason) %>%
summarize(count = n()) %>%
arrange(desc(gender))

# Create a visually appealing grouped bar chart
ggplot(termreason_gender, aes(x = gender, y = count, fill = termination_reason)) +
geom_bar(stat = "identity", position = "dodge", color = "black", alpha = 0.8) +
labs(x = "Gender", y = "Total Count of Term. Reasons", fill = "Termination Reason") +
ggtitle("Termination Reasons by Gender") +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold"),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom") +
scale_fill_brewer(palette = "Set2")

# Analysis 1.8 - What is the relationship between termination types and gender?

# Filter the data to include only relevant columns and non-null values for termination type and gender
filtered_data <- new_employee_attrition %>%
filter(!is.na(termination_type) & !is.na(gender))

# Group by termination type and gender and calculate the count of each combination
grouped_data <- filtered_data %>%
group_by(termination_type, gender) %>%
summarize(count = n())

# Create a stacked bar chart
ggplot(grouped_data, aes(x = termination_type, y = count, fill = gender)) +
geom_bar(stat = "identity") +
labs(x = "Termination Type", y = "Count", fill = "Gender") +
ggtitle("Termination Types by Gender") +
theme_minimal() +
theme(legend.position = "bottom")

# Analysis 1.9 - What is the relationship between voluntary termination and gender?

# Filter the data to include only relevant columns and non-null values for termination type and gender
filtered_data <- new_employee_attrition %>%
filter(!is.na(termination_type) & !is.na(gender))

# Filter the data to include only voluntary terminations
voluntary_data <- filtered_data %>%
filter(termination_type == "Voluntary")

# Group by gender and calculate the count of each gender
grouped_data <- voluntary_data %>%
group_by(gender) %>%
summarize(count = n())

grouped_data

# Create a pie chart
ggplot(grouped_data, aes(x = "", y = count, fill = gender)) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
geom_text(aes(label = count), position = position_stack(vjust = 0.5), color = "white", size = 4) +
labs(x = "", y = "", fill = "Gender") +
ggtitle("Voluntary Termination by Gender") +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold"),
axis.text = element_blank(), axis.title = element_blank(),
panel.grid = element_blank()) +
scale_fill_manual(values = c("seagreen3", "dimgray"))

# Analysis 1.10 - What is the relationship between involuntary termination and gender?

# Filter the data to include only involuntary terminations and gender columns
filtered_data <- new_employee_attrition %>%
filter(termination_type == "Involuntary" & !is.na(gender))

# Group by gender and calculate the count of involuntary terminations
grouped_data <- filtered_data %>%
group_by(gender) %>%
summarize(count = n())

# Create a pie chart with text labels
ggplot(grouped_data, aes(x = "", y = count, fill = gender)) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
geom_text(aes(label = count), position = position_stack(vjust = 0.5), color = "white", size = 4) +
labs(x = "", y = "", fill = "Gender") +
ggtitle("Involuntary Termination by Gender") +
theme_minimal() +
theme(legend.position = "bottom",
plot.title = element_text(size = 16, face = "bold"),
axis.text = element_blank(),
panel.grid = element_blank()) +
scale_fill_manual(values = c("lightsalmon1", "salmon4")) +
coord_polar("y", start = 0, direction = -1) +
theme_void() +
theme(plot.title = element_text(hjust = 0.5))

# Analysis 1.11 - Are job titles make employees terminated voluntarily?
# termination type - voluntary

# Filter the data to include only relevant columns and voluntary terminations
filtered_data <- new_employee_attrition %>%
filter(termination_type == "Voluntary" & !is.na(job_title))

# Group by job title and calculate the count of voluntary terminations
grouped_data <- filtered_data %>%
group_by(job_title) %>%
summarize(count = n())

# Create a horizontal bar chart with a bigger plot title
ggplot(grouped_data, aes(x = count, y = job_title, fill = job_title)) +
geom_bar(stat = "identity", color = "black") +
labs(x = "Count", y = "Job Title", fill = "Job Title") +
ggtitle("Voluntary Termination by Job Title") +
theme_minimal() +
theme(axis.text.y = element_text(hjust = 0),
legend.position = "none",
plot.title = element_text(hjust = 0.5, size = 16))

# Analysis 1.12 - Are job titles make employees terminated involuntarily?
# termination type - Involuntary

# Filter the data to include only relevant columns and involuntary terminations
filtered_data <- new_employee_attrition %>%
filter(termination_type == "Involuntary" & !is.na(job_title))

# Group by job title and calculate the count of involuntary terminations
grouped_data <- filtered_data %>%
group_by(job_title) %>%
summarize(count = n())

# Create a vertical bar chart
ggplot(grouped_data, aes(x = job_title, y = count, fill = job_title)) +
geom_bar(stat = "identity", color = "white") +
labs(x = "Job Title", y = "Count", fill = "Job Title") +
ggtitle("Involuntary Termination by Job Title") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none",
plot.title = element_text(hjust = 0.5))

# Analysis 1.10 -

## Question 2: When Will Employees Retire?

# Analysis 2.1 - What is the distribution of the employees at retirement age?

# Filter, group, summarize, and arrange the data
ret_age <- new_employee_attrition %>%
filter(termination_reason == "Retirement") %>%
group_by(age) %>%
summarize(ret_count = n())

# Calculate density estimate
density_data <- density(ret_age\$age)

# Create a density plot
ggplot(ret_age, aes(x = age)) +
geom_density(fill = "skyblue", color = "black", alpha = 0.7) +
xlab("Age") +
ylab("Density") +
ggtitle("Density Plot of Retirement Age") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))

# Analysis 2.2 - What is the average retirement age of employees?

# Filter the data for employees with retirement as the termination reason
retirement_data <- new_employee_attrition %>%
filter(termination_reason == "Retirement")

# Calculate the average retirement age
average_retirement_age <- mean(retirement_data\$age, na.rm = TRUE)

# Print the average retirement age in the company
cat("Average Retirement Age in the Company: ", round(average_retirement_age), " years\n")

summary(average_retirement_age)
```