setwd('G:/R_results/New folder')
# Load your metabolome data
data <- read.csv("Input_metabolome.csv", row.names = 1) # Assuming the first column is metabolite IDs
colnames(data) <- sub("^X", "", colnames(data))
data_transposed <- t(data)
# Check for missing values
sum(is.na(data_transposed)) # Should be 0
# Normalize the data (e.g., log transformation)
data_transposed <- log2(data_transposed + 1) # Add 1 to avoid log(0)
# Hierarchical clustering to detect outliers
sample_tree <- hclust(dist(data_transposed), method = "average")
plot(sample_tree, main = "Sample Clustering to Detect Outliers", sub = "", xlab = "")
# Example: Remove outlier samples
outliers <- c("360d_5") # Replace with actual outlier sample names
data_transposed <- data_transposed[!rownames(data_transposed) %in% outliers, ]
# Choose soft-thresholding power
powers <- c(1:20) # Range of powers to test
sft <- pickSoftThreshold(data_transposed, powerVector = powers, verbose = 5)
plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2",
type = "n", main = "Scale Independence")
text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
labels = powers, col = "red")
abline(h = 0.90, col = "red") # Target R^2 value
plot(sft$fitIndices[, 1], sft$fitIndices[, 5],
xlab = "Soft Threshold (power)", ylab = "Mean Connectivity",
type = "n", main = "Mean Connectivity")
text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = powers, col = "red")
soft_power <- 9 # Replace with the chosen power
net <- blockwiseModules(data_transposed,
TOMType = "unsigned", # Type of topological overlap matrix
minModuleSize = 30, # Minimum module size
pamRespectsDendro = FALSE,
module_colors <- labels2colors(net$colors)
# Plot the dendrogram and module colors
plotDendroAndColors(net$dendrograms[[1]], module_colors[net$blockGenes[[1]]],
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
# Calculate module-trait relationships, maybe use the carcasas traits?
module_trait_cor <- cor(net$MEs, traits, use = "p")
module_trait_pvalue <- corPvalueStudent(module_trait_cor, nrow(data_transposed))
# Visualize module-trait relationships
textMatrix <- paste(signif(module_trait_cor, 2), "\n(",
signif(module_trait_pvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(module_trait_cor)
par(mar = c(6, 8.5, 3, 3))
labeledHeatmap(Matrix = module_trait_cor,
yLabels = names(net$MEs),
ySymbols = names(net$MEs),
colors = blueWhiteRed(50),
main = "Module-Trait Relationships")
# Extract metabolites in a specific module
module <- "blue" # Replace with your module of interest
module_metabolites <- colnames(data_transposed)[module_colors == module]
# View metabolites in the module
print(module_metabolites)
# Create a data frame with module assignments
module_assignments <- data.frame(
Metabolite_ID = colnames(data_transposed), # Metabolite IDs
Module_Color = module_colors # Module colors
# View the first few rows of the data frame
# Get a list of all unique module colors
unique_modules <- unique(module_colors)
# Create a list to store metabolites for each module
module_metabolites_list <- lapply(unique_modules, function(module) {
module_assignments$Metabolite_ID[module_assignments$Module_Color == module]
# Name the list with module colors
names(module_metabolites_list) <- unique_modules
# View metabolites in each module
# Convert the list to a data frame
module_metabolites_df <- stack(module_metabolites_list)
colnames(module_metabolites_df) <- c("Metabolite_ID", "Module_Color")
head(module_metabolites_df)
# Save module assignments to a CSV file
write.csv(module_assignments, file = "module_assignments.csv", row.names = FALSE)
# Save module metabolites list to a CSV file
module_metabolites_df <- stack(module_metabolites_list)
colnames(module_metabolites_df) <- c("Metabolite_ID", "Module_Color")
write.csv(module_metabolites_df, file = "module_metabolites.csv", row.names = FALSE)
# Compute the adjacency matrix using the chosen soft-thresholding power
adjacency_matrix <- adjacency(data_transposed, power = soft_power)
# Calculate the Topological Overlap Matrix (TOM) from the adjacency matrix
TOM <- TOMsimilarity(adjacency_matrix)
# Hierarchical clustering of genes based on TOM
dissTOM <- 1 - TOM # Dissimilarity
geneTree <- hclust(as.dist(dissTOM), method = "average")
# Identify modules using dynamic tree cutting
dynamicMods <- cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, pamRespectsDendro = FALSE)
# Convert the module labels into colors#######################
dynamicColors <- labels2colors(dynamicMods)
dynamicColors=module_colors
# Step 6: Calculate module eigengenes (summary profiles for each module)
moduleEigengenes = moduleEigengenes(data_transposed, dynamicColors)$eigengenes
# Step 7: Identify hub genes within each module
# You can use the "degree" of connectivity in the co-expression network to identify hub genes
degree = rowSums(TOM) # Sum the number of connections for each gene
# Create a data frame for storing the degree and module information
degree_info <- data.frame(
Metabolite_ID = colnames(data_transposed), # Metabolite IDs
Degree = degree, # Degree of connectivity
Module_Color = dynamicColors # Module colors
# Step 3: For each module, find the metabolites with the highest degree (hub metabolites)
hub_metabolites_list <- lapply(unique(dynamicColors), function(module) {
# Get metabolites in the current module
module_metabolites <- degree_info[degree_info$Module_Color == module, ]
# Sort by degree (highest degree means the hub metabolite)
module_metabolites_sorted <- module_metabolites[order(module_metabolites$Degree, decreasing = TRUE), ]
# Return the hub metabolites (e.g., top 5 hub metabolites for each module)
top_hub_metabolites <- head(module_metabolites_sorted, 5)
return(top_hub_metabolites)
# Step 4: Name the list with module colors (so each module has its hub metabolites)
names(hub_metabolites_list) <- unique(dynamicColors)
# Step 5: View the hub metabolites for each module
# Step 6: Convert the list to a data frame for easy viewing and export
hub_metabolites_df <- do.call(rbind, hub_metabolites_list)
write.csv(hub_metabolites_df, file = "hub_metabolites.csv", row.names = FALSE)
# View the top hub metabolites
# Calculate module eigengenes (MEs) for each module
MEList <- moduleEigengenes(data_transposed, colors = dynamicColors)
traits=read.csv('Ningxiang pig trait.csv')
rownames(traits)=traits$stage
# Calculate the correlation between MEs and traits
module_trait_cor <- cor(MEs, traits, use = "p")
# Compute p-values for the correlations
module_trait_pvalue <- corPvalueStudent(module_trait_cor, nrow(data_transposed))
# Print the correlation matrix
print(module_trait_pvalue)
# --- Permutation testing for module-trait correlation significance ---
# Get the actual correlation matrix
obs_cor <- module_trait_cor
# Create a matrix to store permuted correlation values
perm_cor_list <- vector("list", n.perm)
# Perform permutation testing
set.seed(123) # for reproducibility
permuted_traits <- traits[sample(1:nrow(traits)), , drop = FALSE]
# Recalculate correlation with permuted traits
perm_cor_list[[i]] <- cor(net$MEs, permuted_traits, use = "p")
# Convert list to 3D array for easy access
perm_array <- array(unlist(perm_cor_list),
dim = c(nrow(obs_cor), ncol(obs_cor), n.perm),
dimnames = list(rownames(obs_cor), colnames(obs_cor), NULL))
# Calculate empirical p-values
perm_pvalues <- matrix(NA, nrow = nrow(obs_cor), ncol = ncol(obs_cor),
dimnames = list(rownames(obs_cor), colnames(obs_cor)))
for (i in 1:nrow(obs_cor)) {
for (j in 1:ncol(obs_cor)) {
obs_val <- abs(obs_cor[i, j])
null_vals <- abs(perm_array[i, j, ])
perm_pvalues[i, j] <- mean(null_vals >= obs_val)
# View significant module–trait associations (e.g., p < 0.05)
significant_modules <- which(perm_pvalues < 0.05, arr.ind = TRUE)
print(significant_modules)
# Optional: combine observed correlation and permuted p-values
Module = rownames(obs_cor)[significant_modules[, 1]],
Trait = colnames(obs_cor)[significant_modules[, 2]],
Correlation = obs_cor[significant_modules],
Permutation_pvalue = perm_pvalues[significant_modules]
# Visualize the module-trait correlations
textMatrix <- paste(signif(module_trait_cor, 2), "\n(",
signif(module_trait_pvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(module_trait_cor)
png("module_trait_relationships.png", width = 4800, height = 4800, res = 300)
# Adjust margins: bottom=10, left=15, top=3, right=3
par(mar = c(10, 15, 3, 3))
labeledHeatmap(Matrix = module_trait_cor,
colors = blueWhiteRed(50),
main = "Module-Trait Relationships")
# Apply the correlation and p-value thresholds
filtered_correlations <- module_trait_cor[abs(module_trait_cor) > 0.5 & module_trait_pvalue < 0.05]
filtered_pvalues <- module_trait_pvalue[abs(module_trait_cor) > 0.5 & module_trait_pvalue < 0.05]
# Get the row and column indices of the filtered values
indices <- which(abs(module_trait_cor) > 0.5 & module_trait_pvalue < 0.05, arr.ind = TRUE)
# Extract module and trait names based on the indices
module_names <- rownames(module_trait_cor)[indices[, 1]]
trait_names <- colnames(module_trait_cor)[indices[, 2]]
# Create a data frame to store the results
filtered_results <- data.frame(
Correlation = filtered_correlations,
P_Value = filtered_pvalues
# Check the resulting data frame
# Export the filtered results to a CSV file
write.csv(filtered_results, file = "filtered_module_trait_correlations.csv", row.names = FALSE)
png("sig_module_trait_relationships.png", width = 2800, height = 2800, res = 300)
# Adjust margins: bottom=10, left=15, top=3, right=3
par(mar = c(10, 15, 3, 3))
# Create the plot with increased font size and bold text
ggplot(filtered_results, aes(x = Trait, y = Module, fill = Correlation)) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
axis.text.x = element_text(angle = 45, hjust = 1, size = 14, face = "bold"), # Bold and larger x-axis labels
axis.text.y = element_text(size = 14, face = "bold"), # Bold and larger y-axis labels
axis.title.x = element_text(size = 16, face = "bold"), # Bold x-axis title
axis.title.y = element_text(size = 16, face = "bold"), # Bold y-axis title
plot.title = element_text(size = 18, face = "bold", hjust = 0.5) # Bold and larger title
labs(title = "Module-Trait Correlations", x = "Trait", y = "Module")
# Save the adjacency matrix to a CSV file
write.csv(adjacency_matrix, "adjacency_matrix.csv")
# Convert the adjacency matrix to an edge list (pairs of genes/metabolites)
# Only keep interactions with a correlation above a threshold, e.g., 0.5
edge_list <- which(adjacency_matrix > threshold, arr.ind = TRUE)
# Create a data frame of edges (pairs of genes/metabolites)
from = rownames(adjacency_matrix)[edge_list[, 1]],
to = colnames(adjacency_matrix)[edge_list[, 2]],
weight = adjacency_matrix[edge_list]
# Save the edge list to a CSV file
write.csv(edges, "edge_list.csv", row.names = FALSE)
# Step 1: Initialize an empty list to store edge lists for each module
# Step 2: Iterate over each unique module color and extract edges for that module
unique_modules <- unique(module_colors)
for (module in unique_modules) {
# Find the indices of genes/metabolites that belong to this module
module_indices <- which(module_colors == module)
# Subset the adjacency matrix to include only these genes/metabolites
module_adjacency <- adjacency_matrix[module_indices, module_indices]
# Step 3: Create edge list for this module (where the adjacency value is between 0.7 and 1)
# Filter the adjacency matrix (keep values > 0.7 and < 1)
filtered_adjacency <- module_adjacency
filtered_adjacency[filtered_adjacency <= threshold_lower | filtered_adjacency >= threshold_upper] <- 0
# Step 4: Create the edge list (pairs of connected nodes)
edge_list <- which(filtered_adjacency > 0, arr.ind = TRUE)
# Step 5: Create a data frame of the edge list with 'from', 'to', and 'weight'
if (nrow(edge_list) > 0) { # Ensure there are edges to process
from = rownames(module_adjacency)[edge_list[, 1]],
to = colnames(module_adjacency)[edge_list[, 2]],
weight = filtered_adjacency[edge_list]
# Save the edge list to a CSV file with module color as part of the file name
file_name <- paste0("edge_list_", module, ".csv")
write.csv(edges, file_name, row.names = FALSE)
# Store edge list in the list for later use
edge_lists[[module]] <- edges
# generate file for visant plotting. Get all CSV files matching the pattern "edge_list_*.csv"
file_list <- list.files(pattern = "edge_list_.*\\.csv")
# Initialize an empty list to store the modified dataframes
# Loop through each file and process
for (file in file_list) {
# Load the edge list CSV file into a dataframe
edge_list <- read.csv(file)
# Insert two columns between the 2nd and 3rd columns
new_column1 <- rep(0, nrow(edge_list)) # First inserted column (with 0s)
new_column2 <- rep("M1002", nrow(edge_list)) # Second inserted column (with "M1002")
# Insert the columns into the dataframe
edge_list <- edge_list[, 1:2] # Keep the first two columns
edge_list$new_column1 <- new_column1 # Add the first inserted column
edge_list$new_column2 <- new_column2 # Add the second inserted column
# Add the rest of the original dataframe from the 3rd column onward
edge_list <- cbind(edge_list, edge_list[, 3:ncol(edge_list) - 2])
# Add the modified dataframe to the list
modified_list[[file]] <- edge_list
# Optionally, save the modified dataframe to a new CSV
write.csv(edge_list, paste0("modified_", file), row.names = FALSE)
modified_list[[1]] # Display the first modified dataframe as an example
# Step 1: Load your edge list (replace 'edge_list.csv' with your file)
edge_list <- read.csv("edge_list_blue.csv")
# Step 2: Create the graph object using the edge list
# Assuming your edge list has columns 'from', 'to', and optionally 'weight'
g <- graph_from_data_frame(edge_list, directed = FALSE)
# Step 3: Customize the plot (visualization)
# Set vertex colors based on degree (size of node)
V(g)$color <- ifelse(degree(g) > 5, "red", "lightblue")
# Set vertex size based on degree
V(g)$size <- degree(g) * 2
#Optional: Set edge width based on weight (if available)
if ("weight" %in% colnames(edge_list)) {
E(g)$width <- edge_list$weight * 2
# Set layout for better visualization (e.g., force-directed layout)
layout_type <- layout_with_fr # Fruchterman-Reingold layout (force-directed)
# Step 4: Plot the network
main = "Network Visualization",
vertex.label.cex = 0.2, # Adjust label size
vertex.label.color = "black", # Label color
edge.width = E(g)$width, # Adjust edge width based on weight
vertex.size = V(g)$size, # Node size based on degree
vertex.color = V(g)$color, # Node color based on degree
edge.arrow.size = 0.5) # Layout (force-directed)
# Step 5: Save the plot (optional)
# Save the plot as a PNG file
png("network_plot.png", width = 800, height =800, res=300)
main = "Network Visualization",
vertex.label.color = "black",
edge.width = E(g)$weight * 4,
vertex.color = V(g)$color,
# Perform WGCNA to detect modules
net <- blockwiseModules(data_transposed, power = power)
# Module assignments for each gene/metabolite
module_labels <- net$colors
# Extract module eigengenes (MEs)
# Example: Sample information (replace with your actual sample data)
sample_info <- data.frame(
Sample_ID = rownames(data_transposed), # Sample IDs
Stage = c("180d", "240d", "300d", "360d") # Example trait
# Combine sample info with MEs
MEs_with_samples <- cbind(sample_info, MEs)