Untitled

#K MEANS
df<-read.csv("C:/Users/agarw/Downloads/custom_dataset.csv")
kmeans_result <- kmeans(df[, c(1,2)], centers = 3, nstart = 20)
print(kmeans_result$cluster)
library(ggplot2)
df$Cluster <- as.factor(kmeans_result$cluster)
centers <- as.data.frame(kmeans_result$centers)
colnames(centers) <- c("x", "y") # Rename to match x and y in df
# Plot the clusters
ggplot(df, aes(x = x, y = y, color = Cluster)) +
  geom_point(size = 3) + # Data points
  geom_point(data = centers, aes(x = x, y = y), # Cluster centers
             color = "black", shape = 8, size = 4) +
  labs(title = "K-Means Clustering",
       x = "Feature 1",
       y = "Feature 2") +
  theme_minimal()
#iris k means
library(ggplot2)
data(iris)
head(iris)
iris_data <- iris[, 1:4]
iris_scaled <- scale(iris_data)
set.seed(123)
kmeans_result <- kmeans(iris_scaled, centers = 3, nstart = 20)
iris$Cluster <- as.factor(kmeans_result$cluster)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Cluster)) +
  geom_point(size = 3) +
  geom_point(data = as.data.frame(kmeans_result$centers),
             aes(x = Sepal.Length, y = Sepal.Width),
             color = "black", shape = 8, size = 4) +
  labs(title = "K-Means Clustering on Iris Dataset",
       x = "Sepal Length",
       y = "Sepal Width") +
  theme_minimal()
#pca custom
df<-read.csv("C:/Users/agarw/Downloads/custom_dataset.csv")
custom_data <- df[, sapply(df, is.numeric)]
custom_scaled <- scale(custom_data)
pca_custom <- prcomp(custom_scaled, center = TRUE, scale. = TRUE)
summary(pca_custom)
ggplot(as.data.frame(pca_custom$x), aes(x = PC1, y = PC2)) +
  geom_point(size = 3) +
  labs(title = "PCA on Custom Dataset", x = "Principal Component 1", y = "Principal Component 2") +
  theme_minimal()
#pca iris
data(iris)
iris_data <- iris[, 1:4]
iris_scaled <- scale(iris_data)
pca_iris <- prcomp(iris_scaled, center = TRUE, scale. = TRUE)
summary(pca_iris)
library(ggplot2)
ggplot(as.data.frame(pca_iris$x), aes(x = PC1, y = PC2, color = iris$Species)) +
  geom_point(size = 3) +
  labs(title = "PCA on Iris Dataset", x = "Principal Component 1", y = "Principal Component 2") +
  theme_minimal()
#Hierarchial
df <- read.csv("path/to/your/custom_dataset.csv")
custom_data <- df[, sapply(df, is.numeric)]
custom_scaled <- scale(custom_data)
dist_matrix_custom <- dist(custom_scaled)
hc_custom <- hclust(dist_matrix_custom, method = "complete")
plot(hc_custom, main = "Hierarchical Clustering Dendrogram (Custom Dataset)", xlab = "", sub = "")
#hierarchial iris
data(iris)
iris_data <- iris[, 1:4]
iris_scaled <- scale(iris_data)
dist_matrix <- dist(iris_scaled)
hc_iris <- hclust(dist_matrix, method = "complete")
plot(hc_iris, main = "Hierarchical Clustering Dendrogram (Iris)", xlab = "", sub = "")
#knn iris
library(class)
data(iris)
iris_data <- iris[, 1:4]
iris_labels <- iris$Species
set.seed(123)
train_index <- sample(1:nrow(iris_data), 0.7 * nrow(iris_data))
train_data <- iris_data[train_index, ]
test_data <- iris_data[-train_index, ]
train_labels <- iris_labels[train_index]
test_labels <- iris_labels[-train_index]
knn_predictions <- knn(train_data, test_data, train_labels, k = 3)
table(knn_predictions, test_labels)
#knn custom
library(class)
df <- read.csv("path/to/your/custom_dataset.csv")
custom_data <- df[, sapply(df, is.numeric)]
custom_labels <- df$target_column # Replace with the actual label column name
set.seed(123)
train_index_custom <- sample(1:nrow(custom_data), 0.7 * nrow(custom_data))
train_data_custom <- custom_data[train_index_custom, ]
test_data_custom <- custom_data[-train_index_custom, ]
train_labels_custom <- custom_labels[train_index_custom]
test_labels_custom <- custom_labels[-train_index_custom]
knn_predictions_custom <- knn(train_data_custom, test_data_custom, train_labels_custom, k = 3)
table(knn_predictions_custom, test_labels_custom)
DATA TRANSFORMATION
# Example Dataset
data <- data.frame(
  ID = 1:6,
  Name = c("Alice", "Bob", "Charlie", "David", "Eva", "Frank"),
  Age = c(23, 45, 34, NA, 29, 41),
  Score = c(89, 76, 92, 85, NA, 73),
  Group = c("A", "B", "A", "B", "A", "B")
)
names(data) <- c("ID", "Name", "Age", "Score", "Category") # rename cols
data$ID <- as.character(data$ID)
data$Category <- as.factor(data$Category) #change data type
# Replace NA in Age with mean
data$Age[is.na(data$Age)] <- mean(data$Age, na.rm = TRUE)
# Replace NA in Score with median
data$Score[is.na(data$Score)] <- median(data$Score, na.rm = TRUE)
#check for missing vals
sum(is.na(df))
# Missing Values by Column
colSums(is.na(df))
# Percentage of Missing Values by Column
colSums(is.na(df)) / nrow(df) * 100
cleaned_data <- na.omit(data) # remove rows with missing values
# Filter rows where Age > 30
filtered_data <- data[data$Age > 30, ]
# Using dplyr
library(dplyr)
filtered_data <- data %>% filter(Age > 30)
# Sort by Age
sorted_data <- data[order(data$Age), ]
# Using dplyr
sorted_data <- data %>% arrange(Age) #arrange function
# Add a column indicating whether Score is above 80
data$HighScore <- ifelse(data$Score > 80, TRUE, FALSE)
# Using dplyr
data <- data %>% mutate(HighScore = Score > 80)
# Summary of Age by Category
summary_data <- data %>%
  group_by(Category) %>%
  summarise(
    Avg_Age = mean(Age, na.rm = TRUE),
    Max_Score = max(Score, na.rm = TRUE)
  )
RESHAPE DATA
# Make data wider
wide_data <- data %>%
  pivot_wider(names_from = Category, values_from = Score)
# Make data longer
long_data <- wide_data %>%
  pivot_longer(cols = starts_with("A"), names_to = "Category", values_to = "Score")
# Create another dataset
new_data <- data.frame(ID = c("1", "2", "3"), Gender = c("F", "M", "M"))
# Merge datasets
merged_data <- merge(data, new_data, by = "ID")
# Standardize Score column
data$Score <- scale(data$Score)
# Min-Max Scaling
data$Score <- (data$Score - min(data$Score)) / (max(data$Score) - min(data$Score))
#mean normalization
data$Score <- (data$Score - mean(data$Score)) / (max(data$Score) - min(data$Score))
#scaling using median and iqr
iqr <- IQR(data$Score, na.rm = TRUE)
median_val <- median(data$Score, na.rm = TRUE)
data$Score <- (data$Score - median_val) / iqr
#log transformation
data$Score <- log(data$Score + 1)
#exp tranformation
data$Score <- exp(data$Score)
#sqrt transformation
data$Score <- sqrt(data$Score)
#decimal scaling
max_abs <- max(abs(data$Score), na.rm = TRUE)
data$Score <- data$Score / (10^floor(log10(max_abs) + 1))
#max abs scaling
data$Score <- data$Score / max(abs(data$Score), na.rm = TRUE)
#range scaling
a <- 0
b <- 1
data$Score <- (data$Score - min(data$Score)) / (max(data$Score) - min(data$Score)) * (b - a) + a
data$Category <- recode(data$Category, "A" = "Group_A", "B" = "Group_B") #recode vals
data$Category[data$Category == "A"] <- "Alpha" #replace specific vals
# bin/discretize data
data$AgeGroup <- cut(data$Age, breaks = c(0, 30, 50, Inf), labels = c("Young", "Middle", "Senior"))
unique_data <- data[!duplicated(data), ] #eliminate duplicate row
data$Age <- sapply(data$Age, function(x) x^2) #apply custom function to col
selected_data <- data %>% select(starts_with("A")) #select col matching pattern
data <- data %>% select(-Category) #remove unwanted col
data <- data %>% select(Name, Age, everything()) #reorder cols
transposed_data <- t(data) #transpose data
data <- data %>% mutate(RowID = row_number()) #add row identifier
#convert wide to long data
long_data <- pivot_longer(data, cols = c(Age, Score), names_to = "Variable", values_to = "Value")
#convert long to wide data
wide_data <- pivot_wider(long_data, names_from = Variable, values_from = Value)
data <- data %>% mutate(LaggedScore = lag(Score), LeadScore = lead(Score)) #Lag/lead variables
data <- data %>% mutate(CumulativeScore = cumsum(Score)) #cummulative sum
#rowwise calc
data <- data %>% rowwise() %>% mutate(Age_Score_Sum = sum(Age, Score, na.rm = TRUE))
expanded_data <- expand.grid(Category = c("A", "B"), Score = seq(70, 90, by = 5)) #expand data
data <- data %>% separate(Name, into = c("FirstName", "LastName"), sep = " ") #split cols
data <- data %>% unite("FullName", c("FirstName", "LastName"), sep = " ") #combine cols
data <- data %>% mutate(ScoreRank = rank(-Score)) #rank rows based on cols
filtered_data <- data %>% filter(Age > 30 & Score > 80) #filter data
#FINDING OUTLIERS
# Compute Z-Scores
df$z_score <- scale(df$numeric_column)
# Identify Outliers
outliers <- df[abs(df$z_score) > 3, ]
print("Outliers using Z-Score:")
print(outliers)
# Boxplot with Outliers
boxplot(df$numeric_column, main = "Boxplot to Detect Outliers", col = "lightblue")
# Density Plot
ggplot(df, aes(x = numeric_column)) +
  geom_density(fill = "lightgreen", alpha = 0.5) +
  ggtitle("Density Plot to Detect Outliers")
# Remove Outliers (Using IQR Method as Example)
df_cleaned <- df[df$numeric_column >= lower_bound & df$numeric_column <= upper_bound, ]
print("Dataset after Removing Outliers:")
print(df_cleaned)
# to check for structure str(df)
# to check class
sapply(df, class)
# Data Type of One Column
class(df$age)
VISUALIZATION
#HISTOGRAM
library(ggplot2)
ggplot(data, aes(x = Score)) +
  geom_histogram(binwidth = 5, fill = "blue", color = "black") +
  labs(title = "Histogram of Scores", x = "Score", y = "Frequency")
#DENSITY PLOT
ggplot(data, aes(x = Score)) +
  geom_density(fill = "blue", alpha = 0.5) +
  labs(title = "Density Plot of Scores", x = "Score", y = "Density")
#BOXPLOT
ggplot(data, aes(x = Category, y = Score)) +
  geom_boxplot(fill = "lightblue") +
  labs(title = "Box Plot of Scores by Category", x = "Category", y = "Score")
#VIOLIN PLOT
ggplot(data, aes(x = Category, y = Score)) +
  geom_violin(fill = "lightgreen", alpha = 0.7) +
  labs(title = "Violin Plot of Scores by Category", x = "Category", y = "Score")
#BAR PLOT
ggplot(data, aes(x = Category)) +
  geom_bar(fill = "purple") +
  labs(title = "Bar Plot of Categories", x = "Category", y = "Count")
#SCATTER PLOT
ggplot(data, aes(x = Age, y = Score)) +
  geom_point(color = "blue", size = 3) +
  labs(title = "Scatter Plot of Age vs. Score", x = "Age", y = "Score")
#LINE PLOT
ggplot(data, aes(x = Age, y = Score, group = 1)) +
  geom_line(color = "red", size = 1) +
  geom_point(size = 2) +
  labs(title = "Line Plot of Age vs. Score", x = "Age", y = "Score")
#HEATMAP
library(reshape2)
cor_data <- cor(data[, c("Age", "Score")], use = "complete.obs")
melted_data <- melt(cor_data)
ggplot(melted_data, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0) +
  labs(title = "Heatmap of Correlations", x = "", y = "")
#FACET PLOT
ggplot(data, aes(x = Score)) +
  geom_histogram(binwidth = 5, fill = "orange", color = "black") +
  facet_wrap(~ Category) +
  labs(title = "Histogram of Scores by Category", x = "Score", y = "Frequency")
#QQ PLOT
ggplot(data, aes(sample = Score)) +
  stat_qq() +
  stat_qq_line(color = "red") +
  labs(title = "QQ Plot of Scores", x = "Theoretical Quantiles", y = "Sample Quantiles")
#PAIR PLOT
library(GGally)
ggpairs(data[, c("Age", "Score")]) +
  labs(title = "Pair Plot of Age and Score")
#DOT PLOT
ggplot(data, aes(x = Category, y = Score)) +
  geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.8) +
  labs(title = "Dot Plot of Scores by Category", x = "Category", y = "Score")
#AREA PLOT
ggplot(data, aes(x = Age, y = Score)) +
  geom_area(fill = "lightblue", alpha = 0.5) +
  labs(title = "Area Plot of Age vs. Score", x = "Age", y = "Score")
#BUBBLE CHART
ggplot(data, aes(x = Age, y = Score, size = Age)) +
  geom_point(color = "blue", alpha = 0.7) +
  labs(title = "Bubble Chart of Age vs. Score", x = "Age", y = "Score", size = "Age")
#STACKED BAR CHART
ggplot(data, aes(x = Category, fill = HighScore)) +
  geom_bar(position = "stack") +
  labs(title = "Stacked Bar Chart by HighScore", x = "Category", y = "Count")
#RADAR CHART
library(fmsb)
radar_data <- data.frame(
  Min = rep(0, 3),
  Max = rep(100, 3),
  Score = c(mean(data$Score, na.rm = TRUE), sd(data$Score, na.rm = TRUE), max(data$Score,
                                                                              na.rm = TRUE))
)
rownames(radar_data) <- c("Average", "SD", "Max")
radarchart(radar_data)
#PIE CHART
ggplot(data, aes(x = "", fill = Category)) +
  geom_bar(width = 1, stat = "count") +
  coord_polar(theta = "y") +
  labs(title = "Pie Chart of Categories", x = "", y = "")
#SPLINE CHART
ggplot(data, aes(x = Age, y = Score)) +
  geom_smooth(method = "loess", color = "darkgreen") +
  labs(title = "Spline Chart of Age vs. Score", x = "Age", y = "Score")
CORRELATION
cor(data$Age, data$Score, method = "pearson")
cor(data$Age, data$Score, method = "spearman")
cor(data$Age, data$Score, method = "kendall")
cor_matrix <- cor(data[, c("Age", "Score", "Height")], use = "complete.obs", method = "pearson")
print(cor_matrix)
#significance test
cor_test <- cor.test(data$Age, data$Score, method = "pearson")
print(cor_test)
#partial corr
library(ppcor)
partial_cor <- pcor(data[, c("Age", "Score", "Height")])
print(partial_cor$estimate)
#pairwise corr
pairwise_cor <- cor(data, use = "pairwise.complete.obs")
print(pairwise_cor)
#matrix of p vals
library(Hmisc)
cor_res <- rcorr(as.matrix(data[, c("Age", "Score", "Height")]))
cor_res$P
#biweight midcorr
library(WGCNA)
bicor_res <- bicor(data[, c("Age", "Score", "Height")])
print(bicor_res)
#distance corr
library(energy)
dcor_res <- dcor(data$Age, data$Score)
print(dcor_res)
#polychloric corr
library(polycor)
poly_cor <- polychor(data$Age, data$Score)
print(poly_cor)
#phi coefficient
library(psych)
phi_res <- phi(table(data$BinaryVar1, data$BinaryVar2))
print(phi_res)
#covariance calc
cov_res <- cov(data$Age, data$Score, use = "complete.obs")
print(cov_res)
#multivariate corr
library(psych)
cor2_res <- cor2(data[, c("Age", "Score")], data[, c("Height", "Weight")])
print(cor2_res)
UNIVARIATE ANALYSIS
# Descriptive Statistics for 1D (Numeric Column)
summary_stats <- summary(df$numeric_column)
mean_val <- mean(df$numeric_column, na.rm = TRUE)
median_val <- median(df$numeric_column, na.rm = TRUE)
mode_val <- get_mode(df$numeric_column)
variance_val <- var(df$numeric_column, na.rm = TRUE)
sd_val <- sd(df$numeric_column, na.rm = TRUE)
range_val <- range(df$numeric_column, na.rm = TRUE)
IQR_val <- IQR(df$numeric_column, na.rm = TRUE)
skewness_val <- skewness(df$numeric_column, na.rm = TRUE)
kurtosis_val <- kurtosis(df$numeric_column, na.rm = TRUE)
# Quantiles for a Numeric Column
quantile(df$numeric_column, probs = c(0.25, 0.5, 0.75), na.rm = TRUE)
# Deciles for a Numeric Column
quantile(df$numeric_column, probs = seq(0, 1, by = 0.1), na.rm = TRUE)
# Calculating 10th, 50th (Median), and 90th Percentile
percentiles <- quantile(df$numeric_column, probs = c(0.10, 0.50, 0.90), na.rm = TRUE)
BIVARIATE
# Linear Regression Model
lm_model <- lm(df$numeric_column2 ~ df$numeric_column1)
summary(lm_model)
# Plot the Regression Line
plot(df$numeric_column1, df$numeric_column2,
     main = "Scatter Plot with Regression Line",
     xlab = "Numeric Column1", ylab = "Numeric Column2",
     col = "blue", pch = 16)
abline(lm_model, col = "red")
# ANOVA to Check for Differences in Means
anova_result <- aov(df$numeric_column ~ df$categorical_column)
summary(anova_result)
# Summary Statistics by Group
aggregate(df$numeric_column ~ df$categorical_column, FUN = summary)
# Contingency Table for Categorical Variables
contingency_table <- table(df$categorical_column1, df$categorical_column2)
print(contingency_table)
# Chi-Square Test of Independence
chi_square_result <- chisq.test(contingency_table)
print(chi_square_result)
# Mosaic Plot
mosaicplot(contingency_table, main = "Mosaic Plot of Categorical Variables", color = TRUE)
# Stacked Bar Plot
library(ggplot2)
ggplot(df, aes(x = categorical_column1, fill = categorical_column2)) +
  geom_bar(position = "stack") +
  labs(title = "Stacked Bar Plot of Categorical Variables",
       x = "Categorical Column1", y = "Count")
# Scatter Plot
plot(df$numeric_column1, df$numeric_column2)
# Correlation
cor(df$numeric_column1, df$numeric_column2)
# Linear Regression
lm_model <- lm(df$numeric_column2 ~ df$numeric_column1)
summary(lm_model)
# Boxplot
boxplot(df$numeric_column ~ df$categorical_column)
# ANOVA
anova_result <- aov(df$numeric_column ~ df$categorical_column)
summary(anova_result)
# Contingency Table
table(df$categorical_column1, df$categorical_column2)
# Chi-Square Test
chi_square_result <- chisq.test(contingency_table)
summary(chi_square_result)
# Scatter Plot with Color
plot(df$numeric_column1, df$numeric_column2,
     col = df$categorical_column, pch = 16,
     main = "Scatter Plot Colored by Categorical Variable")
# Pairwise Scatter Plot for Multiple Variables
pairs(df[, c("numeric_column1", "numeric_column2", "numeric_column3")],
      main = "Pairwise Scatter Plot")
# Step 1: Load the in-built AirPassengers dataset
data("AirPassengers")
# Step 2: Check the structure and data type of AirPassengers
str(AirPassengers)
class(AirPassengers)
# Step 3: Check for missing values in the dataset
sum(is.na(AirPassengers))
# Step 4: Check for the starting and ending date of the dataset
start(AirPassengers)
end(AirPassengers)
# Step 5: Check the frequency of the dataset
frequency(AirPassengers)
# Step 6: Check for the summary of the dataset
summary(AirPassengers)
# Step 7: Plot the decomposition of the dataset (Trend, Seasonal, Random)
decomposed_data <- decompose(AirPassengers)
plot(decomposed_data)
# Step 8: Plot the dataset
plot(AirPassengers, main = "Air Passengers Time Series", ylab = "Number of Passengers", xlab =
       "Time", col = "blue")
# Step 9: Plot the time series of the dataset
plot.ts(AirPassengers, main = "Air Passengers Time Series", ylab = "Number of Passengers", col =
          "red")
# Step 10: Draw the regressor line for the dataset (Linear model)
lm_model <- lm(AirPassengers ~ time(AirPassengers))
abline(lm_model, col = "green")
# Step 11: Print the cycle across the years for the dataset
cycle(AirPassengers)
# Step 12: Make the dataset stationary (constant mean and variance)
# a. Log transformation of the dataset
log_air_passengers <- log(AirPassengers)
plot(log_air_passengers, main = "Log-transformed Air Passengers", ylab = "Log(Number of
Passengers)", col = "purple")
# b. Difference the log-transformed data to make it stationary
diff_log_air_passengers <- diff(log_air_passengers)
plot(diff_log_air_passengers, main = "Differenced Log-transformed Air Passengers", ylab =
       "Differenced Log(Number of Passengers)", col = "orange")
# Step 13: Plot a box plot across months for seasonal effect
boxplot(AirPassengers ~ cycle(AirPassengers), main = "Seasonal Effect by Month", xlab = "Month",
        ylab = "Number of Passengers", col = "lightblue")
# ACF (Autocorrelation Function) plot
acf(AirPassengers, main = "Autocorrelation Function (ACF)")
# PACF (Partial Autocorrelation Function) plot
pacf(AirPassengers, main = "Partial Autocorrelation Function (PACF)")
# Augmented Dickey-Fuller Test for Stationarity
library(tseries)
adf_test <- adf.test(AirPassengers)
print(adf_test)
# STL Decomposition (Seasonal-Trend decomposition using LOESS)
stl_decomposition <- stl(AirPassengers, s.window = "periodic")
plot(stl_decomposition)
# Fit an ARIMA model
library(forecast)
arima_model <- auto.arima(AirPassengers)
summary(arima_model)
# Forecast the next 12 months
forecast_values <- forecast(arima_model, h = 12)
plot(forecast_values)
# Fit Exponential Smoothing model
ets_model <- ets(AirPassengers)
summary(ets_model)
# Forecasting with ETS model
ets_forecast <- forecast(ets_model, h = 12)
plot(ets_forecast)
# Rolling Window Cross-validation for time series (Example using ARIMA)
library(forecast)
train_size <- length(AirPassengers) - 12 # Train on all data except last 12 months
# Loop through each fold
for (i in train_size:(length(AirPassengers) - 1)) {
  train_data <- AirPassengers[1:i]
  test_data <- AirPassengers[(i + 1):(i + 12)]
  
  # Fit ARIMA model on training data
  model <- auto.arima(train_data)
  
  # Forecast
  forecast_values <- forecast(model, h = 12)
  
  # Compare forecast with test data
  accuracy(forecast_values, test_data)
}
# Simple Moving Average
sma <- filter(AirPassengers, rep(1/12, 12), sides = 2)
plot(sma, main = "Simple Moving Average of AirPassengers", col = "red", lwd = 2)
# Cumulative Moving Average
cma <- cumsum(AirPassengers) / seq_along(AirPassengers)
plot(cma, main = "Cumulative Moving Average of AirPassengers", col = "green", lwd = 2)
# Seasonal Subseries Plot
library(forecast)
seasonplot(AirPassengers, year.labels = TRUE, main = "Seasonal Subseries Plot")
# Seasonal Adjustment
seasonally_adjusted <- seasadj(stl_decomposition)
plot(seasonally_adjusted, main = "Seasonally Adjusted Air Passengers Data")
# Time Series Clustering
library(cluster)
library(forecast)
# Prepare a time series matrix (for clustering multiple series)
data_matrix <- matrix(AirPassengers, ncol = 1)
# Use k-means clustering on the time series data
clusters <- kmeans(data_matrix, centers = 3)
plot(clusters$centers, main = "Cluster Centers of Time Series")
# Ljung-Box test for autocorrelation in residuals
Box.test(residuals(arima_model), lag = 12, type = "Ljung-Box")
# Fit Holt-Winters Model
holt_winters_model <- HoltWinters(AirPassengers)
# Forecast with Holt-Winters Model
holt_winters_forecast <- forecast(holt_winters_model, h = 12)
plot(holt_winters_forecast)
# Define a function to calculate the square of a number
square <- function(x) {
  return(x^2)
}
# Call the function
result <- square(4)
print(result) # Output: 16
# Define a function to calculate the area of a rectangle
rectangle_area <- function(length, width) {
  area <- length * width
  return(area)
}
# Call the function
area <- rectangle_area(5, 10)
print(area) # Output: 50
# Define a function to perform addition or subtraction based on the operation
calculate <- function(a, b, operation = "add") {
  if (operation == "add") {
    return(a + b)
  } else if (operation == "subtract") {
    return(a - b)
  } else {
    stop("Invalid operation. Use 'add' or 'subtract'.")
  }
}
# Call the function with addition
result_add <- calculate(5, 3, "add")
print(result_add) # Output: 8
# Call the function with subtraction
result_subtract <- calculate(5, 3, "subtract")
print(result_subtract) # Output: 2
Editor is loading...