Graph Embedding

GNN Embedding:

Example of generating GNN Embeddings

import pandas as pd
from bioneuralnet.network_embedding import GNNEmbedding

def main():
    phenotype_df = pd.DataFrame({
        'SampleID': ['S1', 'S2', 'S3', 'S4'],
        'Phenotype': ['Control', 'Treatment', 'Control', 'Treatment']
    })

    omics_df1 = pd.DataFrame({
        'SampleID': ['S1', 'S2', 'S3', 'S4'],
        'GeneA': [1.2, 2.3, 3.1, 4.0],
        'GeneB': [2.1, 3.4, 1.2, 3.3],
        'GeneC': [3.3, 1.5, 2.2, 4.1]
    })

    omics_df2 = pd.DataFrame({
        'SampleID': ['S1', 'S2', 'S3', 'S4'],
        'GeneD': [4.2, 5.3, 6.1, 7.0],
        'GeneE': [5.1, 6.4, 4.2, 6.3],
        'GeneF': [6.3, 4.5, 5.2, 7.1]
    })

    clinical_data_df = pd.DataFrame({
        'SampleID': ['S1', 'S2', 'S3', 'S4'],
        'Age': [30, 40, 50, 60],
        'Sex': ['Male', 'Female', 'Female', 'Male'],
        'BMI': [25.0, 28.1, 30.2, 24.5]
    })

    adjacency_matrix = pd.DataFrame({
        'GeneA': [1.0, 0.8, 0.3, 0.0],
        'GeneB': [0.8, 1.0, 0.4, 0.0],
        'GeneC': [0.3, 0.4, 1.0, 0.7],
        'GeneD': [0.0, 0.0, 0.7, 1.0]
    }, index=['GeneA', 'GeneB', 'GeneC', 'GeneD'])

    omics_data = pd.concat([omics_df1, omics_df2], axis=1)

    gnn_embed = GNNEmbedding(
        adjacency_matrix=adjacency_matrix,
        omics_data=omics_data,
        phenotype_df=phenotype_df.set_index('SampleID'),
        clinical_data_df=clinical_data_df.set_index('SampleID'), 
        adjacency_matrix=adjacency_matrix,
        model_type='GCN'
    )

    # Run GNN embedding process
    print("Generating GNN embeddings...")
    embeddings_dict = gnn_embed.run()
    embeddings = embeddings_dict['graph']

    print("GNN Embeddings generated successfully.")
    print(embeddings.head())

if __name__ == "__main__":
    main()

Node2Vec Embedding:

Example of generating Node2Vec Embeddings

import pandas as pd
from bioneuralnet.network_embedding import Node2VecEmbedding


def main():
    try:
        print("Starting Node2Vec Embedding Workflow...")

        adjacency_matrix = pd.DataFrame({
            'GeneA': [1.0, 1.0, 0.0, 0.0],
            'GeneB': [1.0, 1.0, 1.0, 0.0],
            'GeneC': [0.0, 1.0, 1.0, 1.0],
            'GeneD': [0.0, 0.0, 1.0, 1.0]
        }, index=['GeneA', 'GeneB', 'GeneC', 'GeneD'])

        node2vec = Node2VecEmbedding(
            adjacency_matrix=adjacency_matrix,
            embedding_dim=64,      
            walk_length=30,        
            num_walks=200,        
            window_size=10,        
            workers=4,             
            seed=42,                
        )

        embeddings = node2vec.run()

        print("\nNode Embeddings:")
        print(embeddings)

        # save_path = 'node_embeddings.csv'
        # node2vec.save_embeddings(save_path)
        # print(f"\nEmbeddings saved to {save_path}")

        # We have a built in function to save the embeddings to a csv file
        # But we can also save the embeddings to a csv file using the following code
        output_file = 'output/embeddings.csv'
        embeddings.to_csv(output_file)

        print("\nNode2Vec Embedding Workflow completed successfully.")

    except Exception as e:
        print(f"An error occurred during execution: {e}")
        raise e


if __name__ == "__main__":
    main()