Resources

This page collects useful resources for writing manuscripts and creating academic figures

Scatter Plots

source: https://www.nature.com/articles/s41398-023-02358-w/figures/1

import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt

np.random.seed(42); N=200

df = pd.DataFrame({'Group':np.repeat(['Control','Duplication','Deletion'],N),

 'X':np.concatenate([np.random.normal(0,1,N),

                     np.random.normal(1,1,N),

                     np.random.normal(-1,1,N)]),

 'Y':np.concatenate([np.random.normal(0,1,N),

                     np.random.normal(1,1,N),

                     np.random.normal(-1,1,N)])})

colors = {'Control':'gray','Duplication':'blue','Deletion':'red'}


fig = plt.figure(figsize=(7,7))

gs  = fig.add_gridspec(2,2,width_ratios=[4,1],height_ratios=[1,4],wspace=0.05,hspace=0.05)

ax_main=fig.add_subplot(gs[1,0]); ax_top=fig.add_subplot(gs[0,0],sharex=ax_main); ax_side=fig.add_subplot(gs[1,1],sharey=ax_main)

sns.set_style('whitegrid')


for g in df.Group.unique():

    sns.regplot(x='X', y='Y', data=df[df.Group==g],

                scatter_kws={'alpha':0.5,'color':colors[g]},

                line_kws={'color':colors[g]}, ci=None, ax=ax_main)

for g in df.Group.unique():

    sns.kdeplot(x=df[df.Group==g].X, color=colors[g], fill=True, alpha=0.2, ax=ax_top)

for g in df.Group.unique():

    sns.kdeplot(y=df[df.Group==g].Y, color=colors[g], fill=True, alpha=0.2, ax=ax_side)


ax_main.set_xlabel('Measure X'); ax_main.set_ylabel('Measure Y')

ax_top.set_xticks([]); ax_top.set_yticks([]); ax_side.set_xticks([]); ax_side.set_yticks([])

sns.despine(ax=ax_top,  left=True, bottom=True)

sns.despine(ax=ax_side, left=True, bottom=True)

plt.show()


Correlation Table

Source:

Fig 2.

Abdellaoui, Abdel, and Karin JH Verweij. "Dissecting polygenic signals from genome-wide association studies on human behaviour." Nature Human Behaviour 5.6 (2021): 686-694.

https://t.co/37RHhqch1r

import numpy as np, matplotlib.pyplot as plt

import matplotlib.colors as mcolors, matplotlib.cm as cm

from matplotlib.patches import Circle, Wedge


np.random.seed(42); n=8

traits=["Education\n(Years)","Income","Neuroticism","Subjective Wellbeing",

        "Age at First\nIntercourse","Number of\nSexual Partners",

        "Alcohol Dependence","Schizophrenia"]

C=np.random.uniform(-1,1,(n,n))                 # correlation matrix

secondary=np.random.rand(n,n)                  # pie/wedge ratio

H1,H2=np.random.rand(n)*0.4+0.3,np.random.rand(n)*0.4+0.3  # heritabilities


fig=plt.figure(figsize=(10,6))

gs=fig.add_gridspec(1,2,width_ratios=[3,1],wspace=0.4)

ax_mat,ax_line=fig.add_subplot(gs[0,0]),fig.add_subplot(gs[0,1])

cmap=cm.RdBu_r; norm=mcolors.Normalize(vmin=-1,vmax=1); r=0.45


for i in range(n):

  for j in range(n):

    if i<j:

      x,y=j,i; val=C[i,j]; col=cmap(norm(val))

      ax_mat.add_patch(Circle((x,y),r,facecolor=col,edgecolor='white',lw=0.5))

      ratio=secondary[i,j]

      ax_mat.add_patch(Wedge((x,y),r,90,90-360*ratio,facecolor='gold',edgecolor='white',lw=0.5))


ax_mat.set_xlim(-0.5,n-0.5); ax_mat.set_ylim(-0.5,n-0.5)

ax_mat.invert_yaxis(); ax_mat.set_aspect('equal')

ax_mat.set_xticks(range(n)); ax_mat.set_yticks(range(n))

ax_mat.set_xticklabels(traits,rotation=90); ax_mat.set_yticklabels(traits)

ax_mat.set_title("Genetic Correlations",fontsize=12)

for k in range(n+1):

  ax_mat.axhline(k-0.5,color='lightgray',lw=1,zorder=0)

  ax_mat.axvline(k-0.5,color='lightgray',lw=1,zorder=0)

sm=cm.ScalarMappable(norm=norm,cmap=cmap); sm.set_array([])

cbar=fig.colorbar(sm,ax=ax_mat,fraction=0.046,pad=0.04)

cbar.set_label("Correlation",fontsize=10)


yvals=np.arange(n); ax_line.invert_yaxis()

ax_line.set_yticks(yvals); ax_line.set_yticklabels(traits)

ax_line.set_xlabel("Heritability Estimate",fontsize=10)

ax_line.set_title("Twin/Family vs GWAS",fontsize=12)

for i in range(n):

  ax_line.plot([H1[i],H2[i]],[i,i],color='gray',lw=2)

  ax_line.plot(H1[i],i,'o',color='orange',label='Twin/family'if i==0 else'')

  ax_line.plot(H2[i],i,'o',color='red',label='GWAS'if i==0 else'')

ax_line.grid(True,axis='x',ls='--',alpha=0.5)

ax_line.yaxis.tick_right(); ax_line.legend(loc='upper right')

plt.tight_layout(); plt.show()




Table with Hierarchical Clustering 

https://www.nature.com/articles/s41593-025-01891-9/figures/5

import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt

np.random.seed(42)

genes=["Icam1","Gm47079","Jpt1","Gls","Arl5b","Slc22a13b","Tacr1",

       "Gadd45b","Rhou","Ifi207","Cxcl10","Shisa-6","Igkv3-2","Rgs9bp",

       "Cldn2","Cfap65","Otx2os1","Cxcl13","Kctd1","D630024D03Rik"]

samples=["SS1","SS2","RES1","RES2","RES3"]

df=pd.DataFrame(np.random.randn(len(genes),len(samples)),index=genes,columns=samples)

col_colors=pd.Series(["pink","pink","gray","gray","gray"],index=samples)

g=sns.clustermap(df,cmap="bwr",row_cluster=True,col_cluster=False,

                 col_colors=col_colors,z_score=0,figsize=(6,8))

g.cax.set_title("Z score")

plt.show()


Table with Violin Plot 

https://www.nature.com/articles/s41593-025-01878-6/figures/1

import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt


# Sample cell types & genes

cts=["Neuroblast","NSC/progenitor","Pericyte","Astrocyte","Ependymal cell",

     "Microglia","Oligodendrocyte/OPC","Neuron","Endothelial","NA"]

gs=["Nfib","Nr2f1","Apt13","Adgrl3","Aldoc","Acsl3","S100b","Gja1","Aqp4","Sox9"]


# Make random expression data

np.random.seed(42)

df=pd.DataFrame([[c,g,v] for c in cts for g in gs for v in np.random.normal(0,2,40)],

                columns=["CellType","Gene","Expression"])


# One color per gene

clrs=sns.color_palette("husl",len(gs))

gene2clr=dict(zip(gs,clrs))


def color_violin(x,color,**kw):

    g=kw["data"]["Gene"].iloc[0]

    sns.violinplot(x=x,color=gene2clr[g],orient="h",linewidth=1,cut=0,inner="box",**kw)


sns.set(style="white",font_scale=1.0)

f=sns.FacetGrid(df,row="CellType",col="Gene",sharex=True,sharey=False,

                height=1.3,aspect=0.6,margin_titles=True)

f.map_dataframe(color_violin,"Expression")

f.set_titles(row_template="{row_name}",col_template="{col_name}")

f.set(xlabel="",ylabel="")

for ax in f.axes[-1]: ax.set_xlabel("Expression Level")

for row in f.axes: 

    for ax in row: ax.tick_params(labelleft=False)

sns.despine(left=True,bottom=True)

plt.tight_layout(); plt.show()


t-SNE plot

https://www.nature.com/articles/s41593-025-01878-6/figures/13

import numpy as np, matplotlib.pyplot as plt, seaborn as sns

from sklearn.manifold import TSNE


# Cell types (clusters)

cts=["Neuroblast","NSC/progenitor","Pericyte","Astrocyte","Ependymal cell",

     "Microglia","Oligodendrocyte/OPC","Neuron","Endothelial","NA"]

n=len(cts); s=200; d=30; np.random.seed(42)


# Synthetic data: each cluster around a random center

centers=np.random.randn(n,d)*5; X=[]; L=[]

for i in range(n):

    x=centers[i]+np.random.randn(s,d)

    X.append(x); L+=[i]*s

X=np.vstack(X); L=np.array(L)


# t-SNE

emb=TSNE(n_components=2,perplexity=30,random_state=42).fit_transform(X)


# Plot

sns.set(style="white",context="talk")

plt.figure(figsize=(7,7))

pal=sns.color_palette("Set2",n)

for i,ct in enumerate(cts):

    idx=(L==i)

    plt.scatter(emb[idx,0],emb[idx,1],color=pal[i],s=10,alpha=0.6)

    cx,cy=emb[idx].mean(axis=0)

    plt.scatter(cx,cy,color=pal[i],s=200,marker="X",edgecolor="k",lw=1,zorder=3)

    plt.text(cx,cy,ct,fontsize=10,ha="center",va="center",

             bbox=dict(boxstyle="round",fc="white",alpha=0.8))

plt.xlabel("t-SNE1"); plt.ylabel("t-SNE2")

sns.despine(); plt.tight_layout(); plt.show()


Violin + Scatter plot

https://www.nature.com/articles/s41593-025-01878-6/figures/13

import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt


np.random.seed(42)

df=pd.DataFrame({'group':np.repeat(['CER_1','CER_2'],200),

 'num_genes':np.concatenate([np.random.normal(3000,1000,200),

                             np.random.normal(5000,1200,200)]),

 'umi_count':np.concatenate([np.random.normal(5e4,2e4,200),

                             np.random.normal(8e4,2.5e4,200)]),

 'pct_mito':np.concatenate([np.random.normal(5,2,200),

                            np.random.normal(12,3,200)])})


sns.set(style="ticks",context="talk")

fig,axes=plt.subplots(1,3,figsize=(9,6))

mets=['num_genes','umi_count','pct_mito']

titles=['Number of genes','UMI count','% mitochondrial genes']

colors=["#f79489","#74c69d"]


for ax,m,t in zip(axes,mets,titles):

  sns.violinplot(x='group',y=m,data=df,palette=colors,cut=0,scale="width",

                 inner="quartile",linewidth=1,ax=ax)

  sns.stripplot(x='group',y=m,data=df,color='k',size=2,alpha=0.4,ax=ax)

  ax.set_title(t,fontsize=12)

  ax.set_xlabel(""); ax.set_ylabel("")

  ax.set_xticklabels(ax.get_xticklabels(),rotation=20,ha='right')


sns.despine()

plt.tight_layout()

plt.show()