{ "cells": [ { "cell_type": "markdown", "id": "5533c7d3-591e-471e-8295-aa128afcf27e", "metadata": {}, "source": [ "# Tutorial 2 Run HRCHY-CytoCommunity on 18 Mouse hypothalamic preoptic region MERFISH dataset\n", "Creator: Runzhi xie (rzxie@stu.xidian.edu.cn).\n", "\n", "Affiliation: xidian University, Gao Lab\n", "\n", "Date of Creation: 10.10.2025\n", "\n", "Date of Last Modification: 10.10.2025" ] }, { "cell_type": "code", "execution_count": 1, "id": "df6ae573", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": null, "id": "b4b16b52-5ff0-48ff-91d4-5672404458ab", "metadata": {}, "outputs": [], "source": [ "import scanpy as sc\n", "import numpy as np\n", "import pandas as pd\n", "import os\n", "from sklearn.neighbors import kneighbors_graph\n", "import datetime\n", "from typing import Optional\n", "from hrchy_cytocommunity.models.dataset import SpatialOmicsImageDataset\n", "from hrchy_cytocommunity.models import HRCHYCytoCommunity, HRCHYCytoCommunityGrand\n", "from hrchy_cytocommunity.visualization.visualization import load_base_data, vis_heatmap\n", "from hrchy_cytocommunity.models.auto_k import HRCHYClusterAutoK, _dd_list,_dd_float" ] }, { "cell_type": "markdown", "id": "a2d0c9d4", "metadata": {}, "source": [ "## prepare input data\n", "### construct k-nn graph" ] }, { "cell_type": "code", "execution_count": null, "id": "3b182b0d-330f-4042-8655-6db2cba6c4f6", "metadata": {}, "outputs": [], "source": [ "def compute_knn(coords, K, sample_id, save_folder: Optional[str] = None):\n", " \"\"\"\n", " construct KNN graph and save it into file\n", " \n", " 参数:\n", " coords: (n, 2) ndarray, the coordinates of cells\n", " K: the number of nearest neighbors\n", " sample_id: sample id\n", " \n", " save_folder: the path of HRCHY-CytoCommunity input data\n", " \"\"\"\n", " print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\n", " print(f'Constructing KNN graph for {len(coords)} points...')\n", " A = kneighbors_graph(coords, K, mode='connectivity', include_self=False, n_jobs=-1) # CSR\n", "\n", " A = A.maximum(A.T).tocsr()\n", " A.eliminate_zeros()\n", " A.sort_indices()\n", "\n", " src, dst = A.nonzero()\n", " edge_index = np.vstack((src, dst)).astype(np.int64) \n", " edge_index = edge_index.T # or int32\n", " if save_folder is not None:\n", " filename = os.path.join(save_folder, f\"{sample_id}_EdgeIndex.txt\")\n", " np.savetxt(filename, edge_index, delimiter='\\t', fmt='%d')\n", " print(f\"Saved {len(edge_index)} edges to {filename}\")\n", " print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))\n", " return edge_index" ] }, { "cell_type": "code", "execution_count": 4, "id": "5ee83f70-2613-4321-bf0b-111d725a0d9d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "bregma-0.14 is processing!\n", "2025-10-10 22:28:22\n", "Constructing KNN graph for 5926 points...\n", "Saved 639294 edges to ./example_data/MERFISH/raw/bregma-0.14_EdgeIndex.txt\n", "2025-10-10 22:28:23\n" ] } ], "source": [ "K = 100 # number of nearest neighbors\n", "input_dir0 = './example_data/MERFISH/raw'\n", "sample_list = ['bregma-0.14']\n", "setting = f'KNN_{K}'\n", "for i,sample_id in enumerate(sample_list):\n", " coords = np.loadtxt(f\"{input_dir0}/{sample_id}_Coordinates.txt\")\n", " print(f\"{sample_id} is processing!\")\n", " compute_knn(coords,K=K,sample_id=sample_id,save_folder=input_dir0)\n", " #print(len(set(adata.obs['region'])))" ] }, { "cell_type": "markdown", "id": "b9c6f17f", "metadata": {}, "source": [ "## run HRCHY-CytoCommunity pipeline\n", "finish training within 2mins (RTX4090)" ] }, { "cell_type": "code", "execution_count": null, "id": "0232cb6d", "metadata": {}, "outputs": [], "source": [ "data_input_dir = './example_data/MERFISH'\n", "save_dir = './results/MERFISH/'\n", "dataset = SpatialOmicsImageDataset(data_input_dir)\n", "graph_dict = {\n", " 'bregma-0.14':0, # the index of graph in dataset\n", "}\n", "model_params = {\n", " 'mode' : 'full', # full HRCHYCytoCommunity model\n", " 's' : 5, # number of perturbations\n", " 'num_tcn1' : 12, # number of fine-grained TC\n", " 'num_tcn2' : 2, # number of coarse-grained TC\n", " 'num_epoch' : 1500,\n", " 'lambda1':1, # Coefficient of consistency regularization\n", " 'lambda_balance':1, # Coefficient of cluster balance regularization\n", " 'num_hidden' : 128, # the dimension of hidden layer\n", " 'lr' : 1e-4, # learning rate\n", " 'drop_rate' : 0.5, # rate of drop node\n", " 'gt_fine':True, # whether input data contain fine-grained CN ground truth\n", " 'gt_coarse':False, # whether input data contain coarse-grained TC ground truth\n", " 'device':'cuda:0' # training device, if no gpu, set 'cpu'\n", " }" ] }, { "cell_type": "code", "execution_count": null, "id": "f5d86f37-5239-40d4-a8da-d22415393128", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "bregma-0.14 is processing\n", "2025-10-10 22:46:35\n", "edge_pruning_cutoff = 0.09090909090909091\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0/1500 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "df = cell_meta[[x_label,y_label]].copy()\n", "df['coarse_cluster'] = cell_meta['coarse_cluster'].to_list()\n", "df['fine_cluster'] = cell_meta['fine_cluster'].to_list()\n", "\n", "fig, axes = plt.subplots(1, 2, figsize=(12, 6))\n", "\n", "# left:coarse cluster\n", "sns.scatterplot(\n", " x=x_label, y=y_label,\n", " data=df,\n", " hue='coarse_cluster',\n", " legend=True,\n", " s=10,\n", " palette=dict_color_TC,\n", " alpha=1.0,\n", " ax=axes[0]\n", ")\n", "axes[0].set_title(\"Coarse-grained TC\")\n", "axes[0].set_xticks([])\n", "axes[0].set_yticks([])\n", "axes[0].set_xlabel(None)\n", "axes[0].set_ylabel(None)\n", "# axes[0].invert_yaxis()\n", "sns.despine(ax=axes[0], left=True, bottom=True)\n", "\n", "# right:fine cluster\n", "sns.scatterplot(\n", " x=x_label, y=y_label,\n", " data=df,\n", " hue='fine_cluster',\n", " legend=True,\n", " s=10,\n", " palette=dict_color_TCN1,\n", " alpha=1.0,\n", " ax=axes[1]\n", ")\n", "axes[1].set_title(\"Fine-grained CN\")\n", "axes[1].set_xticks([])\n", "axes[1].set_yticks([])\n", "axes[1].set_xlabel(None)\n", "axes[1].set_ylabel(None)\n", "# axes[1].invert_yaxis() # Invert y-axis to match image coordinate system\n", "sns.despine(ax=axes[1], left=True, bottom=True)\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "177a8a9d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "HRCHY-CytoCommunity", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 5 }