{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# How to do 'Stemness' analysis\n", "\n", "We will use [CellRank's implementation](https://cellrank.readthedocs.io/en/stable/index.html) of [CytoTRACE](https://cytotrace.stanford.edu/)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Outline:\n", "\n", "- Loading data\n", "- Semi-standard quality control\n", "- PCA embedding\n", "- cluster annotation\n", "- diffusion map\n", "- CytoTRACE\n", "- Comparison to diffusion pseudotime\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load basic QC, normalization" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import scanpy as sc\n", "import scipy as sp\n", "import numpy as np\n", "import warnings\n", "\n", "# NB these new packages\n", "import scvelo as scv\n", "import cellrank as cr\n", "\n", "sc.settings.set_figure_params(dpi=80)\n", "warnings.filterwarnings('ignore')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 5405 × 27998\n", " obs: 'sampleName', 'n_genes_by_counts', 'total_counts', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_mt', 'pct_counts_mt'\n", " var: 'gene_ids', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# change this to point the appropriate path\n", "adata = sc.read_h5ad(\"/Users/patrickcahan/Dropbox (Personal)/data/cscb/2022/d4/cscb_2022_d4_raw.h5ad\")\n", "adata.obs['sampleName'] = \"mEB_day4\"\n", "adata" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "adata.var['mt']= adata.var_names.str.startswith((\"mt-\"))\n", "adata.var['ribo'] = adata.var_names.str.startswith((\"Rps\",\"Rpl\"))\n", "sc.pp.calculate_qc_metrics(adata, qc_vars=['ribo', 'mt'], percent_top=None, log1p=False, inplace=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of cells: 5405\n", "95th percentile: 12928.400000000001\n", "Number of cells: 5134\n" ] } ], "source": [ "print(\"Number of cells: \",adata.n_obs)\n", "\n", "# figure out the total counts == 95 percentile\n", "thresh = np.percentile(adata.obs['total_counts'],95)\n", "print(\"95th percentile: \",thresh)\n", "\n", "adata = adata[adata.obs['total_counts'] < thresh, :]\n", "print(\"Number of cells: \",adata.n_obs)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# SKIP THIS FOR NOW\n", "# mito_genes = adata.var_names.str.startswith('mt-')\n", "# ribo_genes = adata.var_names.str.startswith((\"Rpl\",\"Rps\"))\n", "# malat_gene = adata.var_names.str.startswith(\"Malat1\")\n", "\n", "# remove = np.add(mito_genes, ribo_genes)\n", "# remove = np.add(remove, malat_gene)\n", "# keep = np.invert(remove)\n", "\n", "# print(len(keep) - np.count_nonzero(keep))\n", "\n", "# adata = adata[:,keep].copy()\n", "# print(\"Number of genes: \",adata.n_vars)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "\n", "adM1Norm = adata.copy()\n", "sc.pp.filter_genes(adM1Norm, min_cells=5)\n", "sc.pp.normalize_per_cell(adM1Norm, counts_per_cell_after=1e4)\n", "sc.pp.log1p(adM1Norm)\n", "sc.pp.highly_variable_genes(adM1Norm, min_mean=0.0125, max_mean=5, min_disp=0.25)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CytoTRACE" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "computing neighbors\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " finished (0:00:06) --> added \n", " 'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)\n", "computing moments based on connectivities\n", " finished (0:00:05) --> added \n", " 'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)\n" ] } ], "source": [ "# This is a necessary hack. See CellRank docs\n", "adM1Norm.layers[\"spliced\"] = adM1Norm.X\n", "adM1Norm.layers[\"unspliced\"] = adM1Norm.X\n", "scv.pp.moments(adM1Norm, n_pcs=30, n_neighbors=30)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from cellrank.tl.kernels import CytoTRACEKernel\n", "\n", "ctk = CytoTRACEKernel(adM1Norm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This should produce a kernel (more on this later), but importantly for us here, Cytotrace scpre in `.obs['ct_score']`" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | sampleName\n", " | n_genes_by_counts\n", " | total_counts\n", " | total_counts_ribo\n", " | pct_counts_ribo\n", " | total_counts_mt\n", " | pct_counts_mt\n", " | n_counts\n", " | ct_num_exp_genes\n", " | ct_score\n", " | ct_pseudotime\n", " | 
|---|---|---|---|---|---|---|---|---|---|---|---|
| AAACATACCCTACC-1\n", " | mEB_day4\n", " | 1212\n", " | 2238.0\n", " | 629.0\n", " | 28.105453\n", " | 28.0\n", " | 1.251117\n", " | 2237.0\n", " | 1211\n", " | 0.316669\n", " | 0.683331\n", " | 
| AAACATACGTCGTA-1\n", " | mEB_day4\n", " | 1588\n", " | 3831.0\n", " | 1267.0\n", " | 33.072304\n", " | 34.0\n", " | 0.887497\n", " | 3831.0\n", " | 1588\n", " | 0.621059\n", " | 0.378941\n", " | 
| AAACATACTTTCAC-1\n", " | mEB_day4\n", " | 1538\n", " | 3381.0\n", " | 961.0\n", " | 28.423544\n", " | 2.0\n", " | 0.059154\n", " | 3381.0\n", " | 1538\n", " | 0.752555\n", " | 0.247445\n", " | 
| AAACATTGCATTGG-1\n", " | mEB_day4\n", " | 1221\n", " | 2489.0\n", " | 750.0\n", " | 30.132584\n", " | 24.0\n", " | 0.964243\n", " | 2489.0\n", " | 1221\n", " | 0.397417\n", " | 0.602583\n", " | 
| AAACATTGCTTGCC-1\n", " | mEB_day4\n", " | 2661\n", " | 9510.0\n", " | 3132.0\n", " | 32.933754\n", " | 71.0\n", " | 0.746583\n", " | 9507.0\n", " | 2658\n", " | 0.759517\n", " | 0.240483\n", " | 
| ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | ...\n", " | 
| TTTGACTGACTCTT-1\n", " | mEB_day4\n", " | 2941\n", " | 11419.0\n", " | 4406.0\n", " | 38.584812\n", " | 44.0\n", " | 0.385323\n", " | 11419.0\n", " | 2941\n", " | 0.751135\n", " | 0.248865\n", " | 
| TTTGACTGAGGCGA-1\n", " | mEB_day4\n", " | 2446\n", " | 6908.0\n", " | 1999.0\n", " | 28.937466\n", " | 65.0\n", " | 0.940938\n", " | 6907.0\n", " | 2445\n", " | 0.736133\n", " | 0.263867\n", " | 
| TTTGACTGCATTGG-1\n", " | mEB_day4\n", " | 2906\n", " | 9558.0\n", " | 3067.0\n", " | 32.088303\n", " | 91.0\n", " | 0.952082\n", " | 9557.0\n", " | 2905\n", " | 0.718990\n", " | 0.281010\n", " | 
| TTTGACTGCTGGAT-1\n", " | mEB_day4\n", " | 1475\n", " | 3280.0\n", " | 1035.0\n", " | 31.554878\n", " | 22.0\n", " | 0.670732\n", " | 3280.0\n", " | 1475\n", " | 0.609957\n", " | 0.390043\n", " | 
| TTTGACTGGTGAGG-1\n", " | mEB_day4\n", " | 2808\n", " | 9123.0\n", " | 2923.0\n", " | 32.039898\n", " | 55.0\n", " | 0.602872\n", " | 9122.0\n", " | 2807\n", " | 0.736943\n", " | 0.263057\n", " | 
5134 rows × 11 columns
\n", "