WGCNA R code

Yu Chen

Mar 31, 2026

WGCNA R code

DOI

https://dx.doi.org/10.17504/protocols.io.kqdg3wj51v25/v1

Yu Chen¹

¹Institute of Animal Husbandry and Veterinary Medicine

Yu Chen

hunaas

DOI: https://dx.doi.org/10.17504/protocols.io.kqdg3wj51v25/v1

Protocol Citation: Yu Chen 2026. WGCNA R code. protocols.io https://dx.doi.org/10.17504/protocols.io.kqdg3wj51v25/v1

License: This is an open access protocol distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited

Protocol status: Working

We use this protocol and it's working

Created: June 04, 2025

Last Modified: March 31, 2026

Protocol Integer ID: 219520

Keywords: fat in ningxiang pig, metabolomic data, wgcna analysis, ningxiang pig, fat

Abstract

R code for WGCNA analysis of metabolomic data from back fat in Ningxiang pigs.

library(WGCNA)

setwd('G:/R_results/New folder')






# Load your metabolome data
data <- read.csv("Input_metabolome.csv", row.names = 1)  # Assuming the first column is metabolite IDs
#remove X in colnames
colnames(data) <- sub("^X", "", colnames(data))


# Transpose the data
data_transposed <- t(data)

# Check for missing values
sum(is.na(data_transposed))  # Should be 0

# Normalize the data (e.g., log transformation)
data_transposed <- log2(data_transposed + 1)  # Add 1 to avoid log(0)

# Hierarchical clustering to detect outliers
sample_tree <- hclust(dist(data_transposed), method = "average")
plot(sample_tree, main = "Sample Clustering to Detect Outliers", sub = "", xlab = "")

# Example: Remove outlier samples
outliers <- c("360d_5")  # Replace with actual outlier sample names
data_transposed <- data_transposed[!rownames(data_transposed) %in% outliers, ]

# Choose soft-thresholding power
powers <- c(1:20)  # Range of powers to test
sft <- pickSoftThreshold(data_transposed, powerVector = powers, verbose = 5)

# Plot the results
par(mfrow = c(1, 2))
plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2",
     type = "n", main = "Scale Independence")
text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
     labels = powers, col = "red")
abline(h = 0.90, col = "red")  # Target R^2 value

plot(sft$fitIndices[, 1], sft$fitIndices[, 5],
     xlab = "Soft Threshold (power)", ylab = "Mean Connectivity",
     type = "n", main = "Mean Connectivity")
text(sft$fitIndices[, 1], sft$fitIndices[, 5], labels = powers, col = "red")

# Construct the network
soft_power <- 9  # Replace with the chosen power
net <- blockwiseModules(data_transposed,
                        power = soft_power,
                        TOMType = "unsigned",  # Type of topological overlap matrix
                        minModuleSize = 30,    # Minimum module size
                        reassignThreshold = 0,
                        mergeCutHeight = 0.25,
                        numericLabels = TRUE,
                        pamRespectsDendro = FALSE,
                        verbose = 3)

# View module colors
module_colors <- labels2colors(net$colors)
table(module_colors)

# Plot the dendrogram and module colors
plotDendroAndColors(net$dendrograms[[1]], module_colors[net$blockGenes[[1]]],
                    "Module colors",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)



# Calculate module-trait relationships, maybe use the carcasas traits?
module_trait_cor <- cor(net$MEs, traits, use = "p")
module_trait_pvalue <- corPvalueStudent(module_trait_cor, nrow(data_transposed))

# Visualize module-trait relationships
textMatrix <- paste(signif(module_trait_cor, 2), "\n(",
                    signif(module_trait_pvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(module_trait_cor)
par(mar = c(6, 8.5, 3, 3))
labeledHeatmap(Matrix = module_trait_cor,
               xLabels = names(traits),
               yLabels = names(net$MEs),
               ySymbols = names(net$MEs),
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               zlim = c(-1, 1),
               main = "Module-Trait Relationships")



# Extract metabolites in a specific module
module <- "blue"  # Replace with your module of interest
module_metabolites <- colnames(data_transposed)[module_colors == module]

# View metabolites in the module
print(module_metabolites)

# Create a data frame with module assignments
module_assignments <- data.frame(
  Metabolite_ID = colnames(data_transposed),  # Metabolite IDs
  Module_Color = module_colors                # Module colors
)

# View the first few rows of the data frame
head(module_assignments)

# Get a list of all unique module colors
unique_modules <- unique(module_colors)

# Create a list to store metabolites for each module
module_metabolites_list <- lapply(unique_modules, function(module) {
  module_assignments$Metabolite_ID[module_assignments$Module_Color == module]
})

# Name the list with module colors
names(module_metabolites_list) <- unique_modules

# View metabolites in each module
module_metabolites_list

# Convert the list to a data frame
module_metabolites_df <- stack(module_metabolites_list)
colnames(module_metabolites_df) <- c("Metabolite_ID", "Module_Color")

# View the data frame
head(module_metabolites_df)


# Save module assignments to a CSV file
write.csv(module_assignments, file = "module_assignments.csv", row.names = FALSE)

# Save module metabolites list to a CSV file
module_metabolites_df <- stack(module_metabolites_list)
colnames(module_metabolites_df) <- c("Metabolite_ID", "Module_Color")
write.csv(module_metabolites_df, file = "module_metabolites.csv", row.names = FALSE)


# Compute the adjacency matrix using the chosen soft-thresholding power
adjacency_matrix <- adjacency(data_transposed, power = soft_power)


# Calculate the Topological Overlap Matrix (TOM) from the adjacency matrix
TOM <- TOMsimilarity(adjacency_matrix)

# Hierarchical clustering of genes based on TOM
dissTOM <- 1 - TOM  # Dissimilarity
geneTree <- hclust(as.dist(dissTOM), method = "average")

# Identify modules using dynamic tree cutting
dynamicMods <- cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, pamRespectsDendro = FALSE)

# Convert the module labels into colors#######################
dynamicColors <- labels2colors(dynamicMods)
dynamicColors=module_colors

# Step 6: Calculate module eigengenes (summary profiles for each module)
moduleEigengenes = moduleEigengenes(data_transposed, dynamicColors)$eigengenes


# Step 7: Identify hub genes within each module
# You can use the "degree" of connectivity in the co-expression network to identify hub genes
degree = rowSums(TOM)  # Sum the number of connections for each gene


# Create a data frame for storing the degree and module information
degree_info <- data.frame(
  Metabolite_ID = colnames(data_transposed),  # Metabolite IDs
  Degree = degree,                           # Degree of connectivity
  Module_Color = dynamicColors               # Module colors
)

# Step 3: For each module, find the metabolites with the highest degree (hub metabolites)
hub_metabolites_list <- lapply(unique(dynamicColors), function(module) {
  # Get metabolites in the current module
  module_metabolites <- degree_info[degree_info$Module_Color == module, ]
  
  # Sort by degree (highest degree means the hub metabolite)
  module_metabolites_sorted <- module_metabolites[order(module_metabolites$Degree, decreasing = TRUE), ]
  
  # Return the hub metabolites (e.g., top 5 hub metabolites for each module)
  top_hub_metabolites <- head(module_metabolites_sorted, 5)
  return(top_hub_metabolites)
})

# Step 4: Name the list with module colors (so each module has its hub metabolites)
names(hub_metabolites_list) <- unique(dynamicColors)

# Step 5: View the hub metabolites for each module
hub_metabolites_list

# Step 6: Convert the list to a data frame for easy viewing and export
hub_metabolites_df <- do.call(rbind, hub_metabolites_list)
write.csv(hub_metabolites_df, file = "hub_metabolites.csv", row.names = FALSE)

# View the top hub metabolites
head(hub_metabolites_df)



# Calculate module eigengenes (MEs) for each module
MEList <- moduleEigengenes(data_transposed, colors = dynamicColors)
MEs <- MEList$eigengenes

#load traits file

traits=read.csv('Ningxiang pig trait.csv')
rownames(traits)=traits$stage
traits=traits[,-1]


# Calculate the correlation between MEs and traits
module_trait_cor <- cor(MEs, traits, use = "p")

# Compute p-values for the correlations
module_trait_pvalue <- corPvalueStudent(module_trait_cor, nrow(data_transposed))

# Print the correlation matrix
print(module_trait_cor)
print(module_trait_pvalue)

# --- Permutation testing for module-trait correlation significance ---

# Number of permutations
n.perm <- 1000

# Get the actual correlation matrix
obs_cor <- module_trait_cor

# Create a matrix to store permuted correlation values
perm_cor_list <- vector("list", n.perm)

# Perform permutation testing
set.seed(123)  # for reproducibility
for (i in 1:n.perm) {
  # Shuffle trait labels
  permuted_traits <- traits[sample(1:nrow(traits)), , drop = FALSE]
  
  # Recalculate correlation with permuted traits
  perm_cor_list[[i]] <- cor(net$MEs, permuted_traits, use = "p")
}

# Convert list to 3D array for easy access
perm_array <- array(unlist(perm_cor_list),
                    dim = c(nrow(obs_cor), ncol(obs_cor), n.perm),
                    dimnames = list(rownames(obs_cor), colnames(obs_cor), NULL))

# Calculate empirical p-values
perm_pvalues <- matrix(NA, nrow = nrow(obs_cor), ncol = ncol(obs_cor),
                       dimnames = list(rownames(obs_cor), colnames(obs_cor)))

for (i in 1:nrow(obs_cor)) {
  for (j in 1:ncol(obs_cor)) {
    obs_val <- abs(obs_cor[i, j])
    null_vals <- abs(perm_array[i, j, ])
    perm_pvalues[i, j] <- mean(null_vals >= obs_val)
  }
}

# View significant module–trait associations (e.g., p < 0.05)
significant_modules <- which(perm_pvalues < 0.05, arr.ind = TRUE)
print(significant_modules)

# Optional: combine observed correlation and permuted p-values
library(dplyr)
result_df <- data.frame(
  Module = rownames(obs_cor)[significant_modules[, 1]],
  Trait = colnames(obs_cor)[significant_modules[, 2]],
  Correlation = obs_cor[significant_modules],
  Permutation_pvalue = perm_pvalues[significant_modules]
)

print(result_df)



# Visualize the module-trait correlations
textMatrix <- paste(signif(module_trait_cor, 2), "\n(",
                    signif(module_trait_pvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(module_trait_cor)



png("module_trait_relationships.png", width = 4800, height = 4800, res = 300)
# Adjust margins: bottom=10, left=15, top=3, right=3
par(mar = c(10, 15, 3, 3))
labeledHeatmap(Matrix = module_trait_cor,
               xLabels = names(traits),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               zlim = c(-1, 1),
               main = "Module-Trait Relationships")
dev.off()

# Apply the correlation and p-value thresholds
filtered_correlations <- module_trait_cor[abs(module_trait_cor) > 0.5 & module_trait_pvalue < 0.05]
filtered_pvalues <- module_trait_pvalue[abs(module_trait_cor) > 0.5 & module_trait_pvalue < 0.05]

# Get the row and column indices of the filtered values
indices <- which(abs(module_trait_cor) > 0.5 & module_trait_pvalue < 0.05, arr.ind = TRUE)

# Extract module and trait names based on the indices
module_names <- rownames(module_trait_cor)[indices[, 1]]
trait_names <- colnames(module_trait_cor)[indices[, 2]]

# Create a data frame to store the results
filtered_results <- data.frame(
  Module = module_names,
  Trait = trait_names,
  Correlation = filtered_correlations,
  P_Value = filtered_pvalues
)

# Check the resulting data frame
head(filtered_results)

  # Export the filtered results to a CSV file
  write.csv(filtered_results, file = "filtered_module_trait_correlations.csv", row.names = FALSE)
  
  
library(ggplot2)
library(reshape2)
  
  # Create a heatmap
  png("sig_module_trait_relationships.png", width = 2800, height = 2800, res = 300)
  # Adjust margins: bottom=10, left=15, top=3, right=3
  par(mar = c(10, 15, 3, 3))
  
  # Create the plot with increased font size and bold text
  ggplot(filtered_results, aes(x = Trait, y = Module, fill = Correlation)) +
    geom_tile() +
    scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1, size = 14, face = "bold"),  # Bold and larger x-axis labels
      axis.text.y = element_text(size = 14, face = "bold"),                        # Bold and larger y-axis labels
      axis.title.x = element_text(size = 16, face = "bold"),                       # Bold x-axis title
      axis.title.y = element_text(size = 16, face = "bold"),                       # Bold y-axis title
      plot.title = element_text(size = 18, face = "bold", hjust = 0.5)             # Bold and larger title
    ) +
    labs(title = "Module-Trait Correlations", x = "Trait", y = "Module")
dev.off()



# Save the adjacency matrix to a CSV file
write.csv(adjacency_matrix, "adjacency_matrix.csv")

# Convert the adjacency matrix to an edge list (pairs of genes/metabolites)
# Only keep interactions with a correlation above a threshold, e.g., 0.5
threshold <- 0.5
edge_list <- which(adjacency_matrix > threshold, arr.ind = TRUE)

# Create a data frame of edges (pairs of genes/metabolites)
edges <- data.frame(
  from = rownames(adjacency_matrix)[edge_list[, 1]],
  to = colnames(adjacency_matrix)[edge_list[, 2]],
  weight = adjacency_matrix[edge_list]
)

# Save the edge list to a CSV file
write.csv(edges, "edge_list.csv", row.names = FALSE)



# Step 1: Initialize an empty list to store edge lists for each module
edge_lists <- list()

# Step 2: Iterate over each unique module color and extract edges for that module
unique_modules <- unique(module_colors)
for (module in unique_modules) {
  
  # Find the indices of genes/metabolites that belong to this module
  module_indices <- which(module_colors == module)
  
  # Subset the adjacency matrix to include only these genes/metabolites
  module_adjacency <- adjacency_matrix[module_indices, module_indices]
  
  # Step 3: Create edge list for this module (where the adjacency value is between 0.7 and 1)
  threshold_lower <- 0.7
  threshold_upper <- 1
  
  # Filter the adjacency matrix (keep values > 0.7 and < 1)
  filtered_adjacency <- module_adjacency
  filtered_adjacency[filtered_adjacency <= threshold_lower | filtered_adjacency >= threshold_upper] <- 0
  
  # Step 4: Create the edge list (pairs of connected nodes)
  edge_list <- which(filtered_adjacency > 0, arr.ind = TRUE)
  
  # Step 5: Create a data frame of the edge list with 'from', 'to', and 'weight'
  if (nrow(edge_list) > 0) {  # Ensure there are edges to process
    edges <- data.frame(
      from = rownames(module_adjacency)[edge_list[, 1]],
      to = colnames(module_adjacency)[edge_list[, 2]],
      weight = filtered_adjacency[edge_list]
    )
    
    # Save the edge list to a CSV file with module color as part of the file name
    file_name <- paste0("edge_list_", module, ".csv")
    write.csv(edges, file_name, row.names = FALSE)
    
    # Store edge list in the list for later use
    edge_lists[[module]] <- edges
  }
}


# generate file for visant plotting. Get all CSV files matching the pattern "edge_list_*.csv"
file_list <- list.files(pattern = "edge_list_.*\\.csv")

# Initialize an empty list to store the modified dataframes
modified_list <- list()

# Loop through each file and process
for (file in file_list) {
  # Load the edge list CSV file into a dataframe
  edge_list <- read.csv(file)
  
  # Insert two columns between the 2nd and 3rd columns
  new_column1 <- rep(0, nrow(edge_list))  # First inserted column (with 0s)
  new_column2 <- rep("M1002", nrow(edge_list))  # Second inserted column (with "M1002")
  
  # Insert the columns into the dataframe
  edge_list <- edge_list[, 1:2]  # Keep the first two columns
  edge_list$new_column1 <- new_column1  # Add the first inserted column
  edge_list$new_column2 <- new_column2  # Add the second inserted column
  
  # Add the rest of the original dataframe from the 3rd column onward
  edge_list <- cbind(edge_list, edge_list[, 3:ncol(edge_list) - 2])
  
  # Add the modified dataframe to the list
  modified_list[[file]] <- edge_list
  
  # Optionally, save the modified dataframe to a new CSV
  write.csv(edge_list, paste0("modified_", file), row.names = FALSE)
}

# Check the result
modified_list[[1]]  # Display the first modified dataframe as an example



# Load necessary library
library(igraph)

# Step 1: Load your edge list (replace 'edge_list.csv' with your file)
edge_list <- read.csv("edge_list_blue.csv")

# Step 2: Create the graph object using the edge list
# Assuming your edge list has columns 'from', 'to', and optionally 'weight'
g <- graph_from_data_frame(edge_list, directed = FALSE)

# Step 3: Customize the plot (visualization)

# Set vertex colors based on degree (size of node)
V(g)$color <- ifelse(degree(g) > 5, "red", "lightblue")

# Set vertex size based on degree
V(g)$size <- degree(g) * 2

#Optional: Set edge width based on weight (if available)
if ("weight" %in% colnames(edge_list)) {
  E(g)$width <- edge_list$weight * 2
} else {
  E(g)$width <- 1
}

# Set layout for better visualization (e.g., force-directed layout)
layout_type <- layout_with_fr  # Fruchterman-Reingold layout (force-directed)

# Step 4: Plot the network
plot(g, 
     main = "Network Visualization", 
     vertex.label.cex = 0.2,  # Adjust label size
     vertex.label.color = "black",  # Label color
     edge.width = E(g)$width,  # Adjust edge width based on weight
     vertex.size = V(g)$size,  # Node size based on degree
     vertex.color = V(g)$color,  # Node color based on degree
     layout = layout_type,
     edge.arrow.size = 0.5)  # Layout (force-directed)

# Step 5: Save the plot (optional)
# Save the plot as a PNG file
png("network_plot.png", width = 800, height =800, res=300)
plot(g, 
     main = "Network Visualization", 
     vertex.label.cex = 0.02, 
     vertex.label.color = "black", 
     edge.width = E(g)$weight * 4, 
     vertex.size = V(g)$size, 
     vertex.color = V(g)$color, 
     layout = layout_with_fr)
dev.off()




# Perform WGCNA to detect modules
net <- blockwiseModules(data_transposed, power = power)

# Module assignments for each gene/metabolite
module_labels <- net$colors



# Extract module eigengenes (MEs)
MEs <- net$MEs

# View the MEs data
head(MEs)

# Example: Sample information (replace with your actual sample data)
sample_info <- data.frame(
  Sample_ID = rownames(data_transposed),  # Sample IDs
  Stage = c("180d", "240d", "300d", "360d")  # Example trait
)

# Combine sample info with MEs
MEs_with_samples <- cbind(sample_info, MEs)

# View the combined data
head(MEs_with_samples)