{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# How to do 'Stemness' analysis\n", "\n", "We will use [CellRank's implementation](https://cellrank.readthedocs.io/en/stable/index.html) of [CytoTRACE](https://cytotrace.stanford.edu/)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Outline:\n", "\n", "- Loading data\n", "- Semi-standard quality control\n", "- PCA embedding\n", "- cluster annotation\n", "- diffusion map\n", "- CytoTRACE\n", "- Comparison to diffusion pseudotime\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load basic QC, normalization" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import scanpy as sc\n", "import scipy as sp\n", "import numpy as np\n", "import warnings\n", "\n", "# NB these new packages\n", "import scvelo as scv\n", "import cellrank as cr\n", "\n", "sc.settings.set_figure_params(dpi=80)\n", "warnings.filterwarnings('ignore')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 5405 × 27998\n", " obs: 'sampleName', 'n_genes_by_counts', 'total_counts', 'total_counts_ribo', 'pct_counts_ribo', 'total_counts_mt', 'pct_counts_mt'\n", " var: 'gene_ids', 'mt', 'ribo', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# change this to point the appropriate path\n", "adata = sc.read_h5ad(\"/Users/patrickcahan/Dropbox (Personal)/data/cscb/2022/d4/cscb_2022_d4_raw.h5ad\")\n", "adata.obs['sampleName'] = \"mEB_day4\"\n", "adata" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "adata.var['mt']= adata.var_names.str.startswith((\"mt-\"))\n", "adata.var['ribo'] = adata.var_names.str.startswith((\"Rps\",\"Rpl\"))\n", "sc.pp.calculate_qc_metrics(adata, qc_vars=['ribo', 'mt'], percent_top=None, log1p=False, inplace=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of cells: 5405\n", "95th percentile: 12928.400000000001\n", "Number of cells: 5134\n" ] } ], "source": [ "print(\"Number of cells: \",adata.n_obs)\n", "\n", "# figure out the total counts == 95 percentile\n", "thresh = np.percentile(adata.obs['total_counts'],95)\n", "print(\"95th percentile: \",thresh)\n", "\n", "adata = adata[adata.obs['total_counts'] < thresh, :]\n", "print(\"Number of cells: \",adata.n_obs)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# SKIP THIS FOR NOW\n", "# mito_genes = adata.var_names.str.startswith('mt-')\n", "# ribo_genes = adata.var_names.str.startswith((\"Rpl\",\"Rps\"))\n", "# malat_gene = adata.var_names.str.startswith(\"Malat1\")\n", "\n", "# remove = np.add(mito_genes, ribo_genes)\n", "# remove = np.add(remove, malat_gene)\n", "# keep = np.invert(remove)\n", "\n", "# print(len(keep) - np.count_nonzero(keep))\n", "\n", "# adata = adata[:,keep].copy()\n", "# print(\"Number of genes: \",adata.n_vars)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "\n", "adM1Norm = adata.copy()\n", "sc.pp.filter_genes(adM1Norm, min_cells=5)\n", "sc.pp.normalize_per_cell(adM1Norm, counts_per_cell_after=1e4)\n", "sc.pp.log1p(adM1Norm)\n", "sc.pp.highly_variable_genes(adM1Norm, min_mean=0.0125, max_mean=5, min_disp=0.25)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### CytoTRACE" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "computing neighbors\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " finished (0:00:06) --> added \n", " 'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)\n", "computing moments based on connectivities\n", " finished (0:00:05) --> added \n", " 'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)\n" ] } ], "source": [ "# This is a necessary hack. See CellRank docs\n", "adM1Norm.layers[\"spliced\"] = adM1Norm.X\n", "adM1Norm.layers[\"unspliced\"] = adM1Norm.X\n", "scv.pp.moments(adM1Norm, n_pcs=30, n_neighbors=30)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from cellrank.tl.kernels import CytoTRACEKernel\n", "\n", "ctk = CytoTRACEKernel(adM1Norm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This should produce a kernel (more on this later), but importantly for us here, Cytotrace scpre in `.obs['ct_score']`" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | sampleName | \n", "n_genes_by_counts | \n", "total_counts | \n", "total_counts_ribo | \n", "pct_counts_ribo | \n", "total_counts_mt | \n", "pct_counts_mt | \n", "n_counts | \n", "ct_num_exp_genes | \n", "ct_score | \n", "ct_pseudotime | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
AAACATACCCTACC-1 | \n", "mEB_day4 | \n", "1212 | \n", "2238.0 | \n", "629.0 | \n", "28.105453 | \n", "28.0 | \n", "1.251117 | \n", "2237.0 | \n", "1211 | \n", "0.316669 | \n", "0.683331 | \n", "
AAACATACGTCGTA-1 | \n", "mEB_day4 | \n", "1588 | \n", "3831.0 | \n", "1267.0 | \n", "33.072304 | \n", "34.0 | \n", "0.887497 | \n", "3831.0 | \n", "1588 | \n", "0.621059 | \n", "0.378941 | \n", "
AAACATACTTTCAC-1 | \n", "mEB_day4 | \n", "1538 | \n", "3381.0 | \n", "961.0 | \n", "28.423544 | \n", "2.0 | \n", "0.059154 | \n", "3381.0 | \n", "1538 | \n", "0.752555 | \n", "0.247445 | \n", "
AAACATTGCATTGG-1 | \n", "mEB_day4 | \n", "1221 | \n", "2489.0 | \n", "750.0 | \n", "30.132584 | \n", "24.0 | \n", "0.964243 | \n", "2489.0 | \n", "1221 | \n", "0.397417 | \n", "0.602583 | \n", "
AAACATTGCTTGCC-1 | \n", "mEB_day4 | \n", "2661 | \n", "9510.0 | \n", "3132.0 | \n", "32.933754 | \n", "71.0 | \n", "0.746583 | \n", "9507.0 | \n", "2658 | \n", "0.759517 | \n", "0.240483 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
TTTGACTGACTCTT-1 | \n", "mEB_day4 | \n", "2941 | \n", "11419.0 | \n", "4406.0 | \n", "38.584812 | \n", "44.0 | \n", "0.385323 | \n", "11419.0 | \n", "2941 | \n", "0.751135 | \n", "0.248865 | \n", "
TTTGACTGAGGCGA-1 | \n", "mEB_day4 | \n", "2446 | \n", "6908.0 | \n", "1999.0 | \n", "28.937466 | \n", "65.0 | \n", "0.940938 | \n", "6907.0 | \n", "2445 | \n", "0.736133 | \n", "0.263867 | \n", "
TTTGACTGCATTGG-1 | \n", "mEB_day4 | \n", "2906 | \n", "9558.0 | \n", "3067.0 | \n", "32.088303 | \n", "91.0 | \n", "0.952082 | \n", "9557.0 | \n", "2905 | \n", "0.718990 | \n", "0.281010 | \n", "
TTTGACTGCTGGAT-1 | \n", "mEB_day4 | \n", "1475 | \n", "3280.0 | \n", "1035.0 | \n", "31.554878 | \n", "22.0 | \n", "0.670732 | \n", "3280.0 | \n", "1475 | \n", "0.609957 | \n", "0.390043 | \n", "
TTTGACTGGTGAGG-1 | \n", "mEB_day4 | \n", "2808 | \n", "9123.0 | \n", "2923.0 | \n", "32.039898 | \n", "55.0 | \n", "0.602872 | \n", "9122.0 | \n", "2807 | \n", "0.736943 | \n", "0.263057 | \n", "
5134 rows × 11 columns
\n", "