Pseudo Code Generator

Pseudo Code for VGAE with CPM on Karate Club Dataset

This document presents a structured pseudo code representation for implementing a Variational Graph Autoencoder (VGAE) with Community Partitioning Method (CPM) tailored for the Karate Club dataset, detailing model setup, training,


Empty image or helper icon

Prompt

import torch
import torch.nn.functional as F
from torch_geometric.datasets import KarateClub
from torch_geometric.nn import GCNConv, VGAE
from torch_geometric.utils import train_test_split_edges
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from itertools import combinations
from networkx.algorithms.community import k_clique_communities
from sklearn.metrics.pairwise import cosine_similarity

# Set a random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Load the Karate Club dataset
dataset = KarateClub()
data = dataset[0]
data = train_test_split_edges(data)

# Define the GCN Encoder for the VGAE model
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2_mu = GCNConv(hidden_channels, out_channels)
        self.conv2_logstd = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        return self.conv2_mu(x, edge_index), self.conv2_logstd(x, edge_index)

# Define the VGAE model with the GCN encoder
class VGAEWithCPM(VGAE):
    def __init__(self, encoder):
        super(VGAEWithCPM, self).__init__(encoder)

    def forward(self, x, edge_index):
        z = super(VGAEWithCPM, self).encode(x, edge_index)
        memberships = self.cpm_community_detection(z, edge_index)
        return z, memberships

    # Define CPM for overlapping community detection
    def cpm_community_detection(self, z, edge_index):
        # Convert the edge index to a NetworkX graph
        edges = edge_index.t().detach().cpu().numpy()
        G = nx.Graph()
        G.add_edges_from(edges)

        # Compute similarity matrix from embeddings
        similarity_matrix = cosine_similarity(z.detach().cpu().numpy())

        # Update edge weights based on similarity matrix
        weighted_edges = [(i, j, similarity_matrix[i, j]) for i, j in edges]
        G.add_weighted_edges_from(weighted_edges)

        # Perform k-clique community detection using CPM (k=3 for triangles)
        k = 3
        communities = list(k_clique_communities(G, k))

        # Generate membership matrix for each node
        memberships = np.zeros((G.number_of_nodes(), len(communities)))

        for community_idx, community in enumerate(communities):
            for node in community:
                memberships[node, community_idx] = 1

        # Convert memberships to a PyTorch tensor
        memberships = torch.tensor(memberships, dtype=torch.float32)

        # Return softmax to simulate overlapping membership probabilities
        return torch.softmax(memberships, dim=1)

# Initialize the models and optimizer
encoder = GCNEncoder(in_channels=dataset.num_node_features, hidden_channels=16, out_channels=16)
model = VGAEWithCPM(encoder)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Function to create the adjacency matrix from the edge index
def get_adjacency_matrix(edge_index, num_nodes):
    adj = torch.zeros((num_nodes, num_nodes))
    adj[edge_index[0], edge_index[1]] = 1
    return adj

# Training the model
def train():
    model.train()
    optimizer.zero_grad()

    z, memberships = model(data.x, data.train_pos_edge_index)

    adj = get_adjacency_matrix(data.train_pos_edge_index, data.num_nodes)
    loss = model.recon_loss(z, data.train_pos_edge_index) + torch.norm(torch.mm(memberships, memberships.t()) - adj)

    loss.backward()
    optimizer.step()

    return loss.item()

# Train the model for a few epochs
for epoch in range(100):
    loss = train()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')

# Evaluate the model
model.eval()
z, memberships = model(data.x, data.train_pos_edge_index)
memberships = memberships.detach().cpu().numpy()

# Plot the learned community memberships with colored edges
G = nx.karate_club_graph()
pos = nx.spring_layout(G, seed=42)
colors = ['r', 'g', 'b', 'y']

plt.figure(figsize=(8, 6))

# Draw nodes with the color of the top community membership
for i, node in enumerate(G.nodes()):
    community = np.argmax(memberships[i])
    nx.draw_networkx_nodes(G, pos, nodelist=[node], node_color=colors[community], node_size=500, alpha=0.8)

# Draw edges with colors based on the communities of connected nodes
for u, v in G.edges():
    u_community = np.argmax(memberships[u])
    v_community = np.argmax(memberships[v])
    if u_community == v_community:
        edge_color = colors[u_community]
    else:
        edge_color = 'k'  # Black for edges between different communities
    nx.draw_networkx_edges(G, pos, edgelist=[(u, v)], edge_color=edge_color)

nx.draw_networkx_labels(G, pos, font_color='white')
plt.title('VGAE with CPM Community Detection on Karate Club Graph')
plt.show()

# Calculate the Omega Index for overlapping community detection
def calculate_omega_index(true_memberships, pred_memberships):
    num_nodes = true_memberships.shape[0]
    num_pairs = num_nodes * (num_nodes - 1) // 2
    agreements = 0
    possible_agreements = 0

    for i, j in combinations(range(num_nodes), 2):
        true_overlap = np.dot(true_memberships[i], true_memberships[j])
        pred_overlap = np.dot(pred_memberships[i], pred_memberships[j])

        if true_overlap > 0 and pred_overlap > 0:
            agreements += 1
        elif true_overlap == 0 and pred_overlap == 0:
            agreements += 1

        possible_agreements += 1

    return agreements / possible_agreements if possible_agreements > 0 else 0.0

# Convert memberships to binary format (thresholding)
threshold = 0.5
pred_binary_memberships = (memberships > threshold).astype(int)

# Adjust the number of columns in true_binary_memberships based on the maximum label
num_communities = max(data.y.cpu().numpy()) + 1  # Adjust based on the number of distinct labels
true_binary_memberships = np.zeros((data.num_nodes, num_communities))

# Assign true community labels based on data.y
true_labels = data.y.cpu().numpy()
for i, label in enumerate(true_labels):
    true_binary_memberships[i][label] = 1

# Calculate the Omega Index
omega_index = calculate_omega_index(true_binary_memberships, pred_binary_memberships)
print(f'Omega Index: {omega_index:.4f}')

Answer

Pseudo Code for VGAE with CPM on Karate Club Dataset

This document provides a clear and structured pseudo code representation of the given Python script to facilitate understanding of the underlying logic and functionality.

1. Set Up Environment

  • Import necessary libraries:

    • PyTorch and functional components for neural networks.
    • Karate Club dataset from PyTorch Geometric.
    • Graph neural network components (GCN and VGAE).
    • Other utilities for graph handling and plotting.
  • Set random seeds for reproducibility.

2. Load Dataset

Initialize random_seed using 42
Load Karate Club dataset into 'dataset'
Obtain 'data' as the first element of dataset
Split edges of 'data' into training and testing sets

3. Define GCN Encoder

Class GCNEncoder:
    Constructor(in_channels, hidden_channels, out_channels):
        Initialize super class
        Create first convolution layer (GCNConv) with in_channels and hidden_channels
        Create second convolution layers for mean and log std (GCNConv) with hidden_channels and out_channels

    Function forward(x, edge_index):
        Apply ReLU activation on conv1
        Get 'mu' and 'logstd' from conv2_mu and conv2_logstd using x and edge_index
        Return mu and logstd

4. Define VGAE Model with CPM

Class VGAEWithCPM inherits VGAE:
    Constructor(encoder):
        Initialize super class with encoder

    Function forward(x, edge_index):
        Call encode method from parent class
        Apply cpm_community_detection using z and edge_index
        Return z and memberships

    Function cpm_community_detection(z, edge_index):
        Convert edge_index to NetworkX Graph
        Create similarity_matrix from embeddings z using cosine similarity
        Define weighted_edges based on similarity_matrix
        Add weighted_edges to graph G

        Set k = 3 for k-clique communities
        Get list of communities using k_clique_communities

        Create an empty memberships matrix (num_nodes x num_communities)

        For each community and its index:
            For each node in the community:
                Update memberships matrix to include the community

        Convert memberships to PyTorch tensor and apply softmax
        Return memberships

5. Initialize Model and Optimizer

Create encoder with GCNEncoder taking dataset properties
Create model instance of VGAEWithCPM passing encoder
Initialize optimizer as Adam with model parameters and learning rate = 0.01

6. Define Utility Function for Adjacency Matrix

Function get_adjacency_matrix(edge_index, num_nodes):
    Initialize adj matrix (zeros of size num_nodes x num_nodes)
    Set adj[edge_index[0], edge_index[1]] to 1
    Return adj

7. Training Procedure

Function train:
    Set model to training mode
    Zero gradients in optimizer

    Get z and memberships from model using training edge_index
    Calculate adjacency matrix using get_adjacency_matrix
    Compute loss as reconstruction loss + regularization on memberships

    Backpropagate loss
    Step optimizer

    Return loss value

8. Execute Training Loop

Set epochs = 100
For epoch in range(epochs):
    Call train function to get loss
    If epoch is multiple of 10:
        Print current epoch and loss

9. Model Evaluation

Set model to evaluation mode
Get z and memberships using training edge_index
Convert memberships to numpy array

Initialize network G from karate_club_graph
Set layout for visualization

Define colors for communities

Create plot:
    For each node in G:
        Get top community membership and draw nodes

    For each edge in G:
        Determine edge color based on connected nodes' community membership
        Draw edges with the respective colors

Finalize plot with labels and title
Show plot

10. Calculate Omega Index for Community Detection

Function calculate_omega_index(true_memberships, pred_memberships):
    Initialize agreements and possible_agreements
    For each pair of nodes (i, j):
        Calculate overlaps between ground truth and predictions

        Update agreements count based on overlaps

    Return agreements / possible_agreements

11. Prepare Membership Data and Calculate Omega Index

Set threshold value
Convert memberships to binary based on threshold

Prepare true binary memberships based on true_labels using data.y
Calculate omega_index using calculate_omega_index
Output omega_index value

This pseudo code captures the essential logic and flow of the original Python program while maintaining clarity and structure for ease of understanding.

Create your Thread using our flexible tools, share it with friends and colleagues.

Your current query will become the main foundation for the thread, which you can expand with other tools presented on our platform. We will help you choose tools so that your thread is structured and logically built.

Description

This document presents a structured pseudo code representation for implementing a Variational Graph Autoencoder (VGAE) with Community Partitioning Method (CPM) tailored for the Karate Club dataset, detailing model setup, training, evaluation, and community detection.