Untitled

the Wilcoxon Signed-Rank Test
# Load necessary library
library(stats)

# Define the data
google_bleu <- c(0.563, 0.083, 0.533, 0.627, 0.417, 0.413)
google_ter <- c(0.222, 0.45, 0.429, 0.167, 0.391, 0.435)

chatgpt_bleu <- c(0.076, 0.202, 0.366, 0.563, 0.077, 0.114)
chatgpt_ter <- c(0.556, 0.65, 0.571, 0.222, 0.609, 0.783)

# Perform Shapiro-Wilk Test for normality
shapiro_google_bleu <- shapiro.test(google_bleu)
shapiro_google_ter <- shapiro.test(google_ter)
shapiro_chatgpt_bleu <- shapiro.test(chatgpt_bleu)
shapiro_chatgpt_ter <- shapiro.test(chatgpt_ter)

# Display Shapiro-Wilk Test results
cat("Shapiro-Wilk Test Results:\n")
cat("Google BLEU: p-value =", shapiro_google_bleu$p.value, "\n")
cat("Google TER: p-value =", shapiro_google_ter$p.value, "\n")
cat("ChatGPT BLEU: p-value =", shapiro_chatgpt_bleu$p.value, "\n")
cat("ChatGPT TER: p-value =", shapiro_chatgpt_ter$p.value, "\n\n")

# Perform Wilcoxon Signed-Rank Test
wilcoxon_bleu <- wilcox.test(google_bleu, chatgpt_bleu, paired = TRUE)
wilcoxon_ter <- wilcox.test(google_ter, chatgpt_ter, paired = TRUE)

# Display Wilcoxon Signed-Rank Test results
cat("Wilcoxon Signed-Rank Test Results (Google vs ChatGPT):\n")
cat("BLEU Scores: p-value =", wilcoxon_bleu$p.value, "\n")
cat("TER Scores: p-value =", wilcoxon_ter$p.value, "\n\n")

# Perform Wilcoxon Signed-Rank Test for BLEU vs TER within each system
wilcoxon_google_bleu_ter <- wilcox.test(google_bleu, google_ter, paired = TRUE)
wilcoxon_chatgpt_bleu_ter <- wilcox.test(chatgpt_bleu, chatgpt_ter, paired = TRUE)

# Display paired Wilcoxon Test results
cat("Wilcoxon Signed-Rank Test Results (BLEU vs TER within systems):\n")
cat("Google BLEU vs TER: p-value =", wilcoxon_google_bleu_ter$p.value, "\n")
cat("ChatGPT BLEU vs TER: p-value =", wilcoxon_chatgpt_bleu_ter$p.value, "\n")
Editor is loading...