Resources
This page collects useful resources for writing manuscripts and creating academic figures
Scatter Plots
source: https://www.nature.com/articles/s41398-023-02358-w/figures/1
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
np.random.seed(42); N=200
df = pd.DataFrame({'Group':np.repeat(['Control','Duplication','Deletion'],N),
'X':np.concatenate([np.random.normal(0,1,N),
np.random.normal(1,1,N),
np.random.normal(-1,1,N)]),
'Y':np.concatenate([np.random.normal(0,1,N),
np.random.normal(1,1,N),
np.random.normal(-1,1,N)])})
colors = {'Control':'gray','Duplication':'blue','Deletion':'red'}
fig = plt.figure(figsize=(7,7))
gs = fig.add_gridspec(2,2,width_ratios=[4,1],height_ratios=[1,4],wspace=0.05,hspace=0.05)
ax_main=fig.add_subplot(gs[1,0]); ax_top=fig.add_subplot(gs[0,0],sharex=ax_main); ax_side=fig.add_subplot(gs[1,1],sharey=ax_main)
sns.set_style('whitegrid')
for g in df.Group.unique():
sns.regplot(x='X', y='Y', data=df[df.Group==g],
scatter_kws={'alpha':0.5,'color':colors[g]},
line_kws={'color':colors[g]}, ci=None, ax=ax_main)
for g in df.Group.unique():
sns.kdeplot(x=df[df.Group==g].X, color=colors[g], fill=True, alpha=0.2, ax=ax_top)
for g in df.Group.unique():
sns.kdeplot(y=df[df.Group==g].Y, color=colors[g], fill=True, alpha=0.2, ax=ax_side)
ax_main.set_xlabel('Measure X'); ax_main.set_ylabel('Measure Y')
ax_top.set_xticks([]); ax_top.set_yticks([]); ax_side.set_xticks([]); ax_side.set_yticks([])
sns.despine(ax=ax_top, left=True, bottom=True)
sns.despine(ax=ax_side, left=True, bottom=True)
plt.show()
Correlation Table
Source:
Fig 2.
Abdellaoui, Abdel, and Karin JH Verweij. "Dissecting polygenic signals from genome-wide association studies on human behaviour." Nature Human Behaviour 5.6 (2021): 686-694.
https://t.co/37RHhqch1r
import numpy as np, matplotlib.pyplot as plt
import matplotlib.colors as mcolors, matplotlib.cm as cm
from matplotlib.patches import Circle, Wedge
np.random.seed(42); n=8
traits=["Education\n(Years)","Income","Neuroticism","Subjective Wellbeing",
"Age at First\nIntercourse","Number of\nSexual Partners",
"Alcohol Dependence","Schizophrenia"]
C=np.random.uniform(-1,1,(n,n)) # correlation matrix
secondary=np.random.rand(n,n) # pie/wedge ratio
H1,H2=np.random.rand(n)*0.4+0.3,np.random.rand(n)*0.4+0.3 # heritabilities
fig=plt.figure(figsize=(10,6))
gs=fig.add_gridspec(1,2,width_ratios=[3,1],wspace=0.4)
ax_mat,ax_line=fig.add_subplot(gs[0,0]),fig.add_subplot(gs[0,1])
cmap=cm.RdBu_r; norm=mcolors.Normalize(vmin=-1,vmax=1); r=0.45
for i in range(n):
for j in range(n):
if i<j:
x,y=j,i; val=C[i,j]; col=cmap(norm(val))
ax_mat.add_patch(Circle((x,y),r,facecolor=col,edgecolor='white',lw=0.5))
ratio=secondary[i,j]
ax_mat.add_patch(Wedge((x,y),r,90,90-360*ratio,facecolor='gold',edgecolor='white',lw=0.5))
ax_mat.set_xlim(-0.5,n-0.5); ax_mat.set_ylim(-0.5,n-0.5)
ax_mat.invert_yaxis(); ax_mat.set_aspect('equal')
ax_mat.set_xticks(range(n)); ax_mat.set_yticks(range(n))
ax_mat.set_xticklabels(traits,rotation=90); ax_mat.set_yticklabels(traits)
ax_mat.set_title("Genetic Correlations",fontsize=12)
for k in range(n+1):
ax_mat.axhline(k-0.5,color='lightgray',lw=1,zorder=0)
ax_mat.axvline(k-0.5,color='lightgray',lw=1,zorder=0)
sm=cm.ScalarMappable(norm=norm,cmap=cmap); sm.set_array([])
cbar=fig.colorbar(sm,ax=ax_mat,fraction=0.046,pad=0.04)
cbar.set_label("Correlation",fontsize=10)
yvals=np.arange(n); ax_line.invert_yaxis()
ax_line.set_yticks(yvals); ax_line.set_yticklabels(traits)
ax_line.set_xlabel("Heritability Estimate",fontsize=10)
ax_line.set_title("Twin/Family vs GWAS",fontsize=12)
for i in range(n):
ax_line.plot([H1[i],H2[i]],[i,i],color='gray',lw=2)
ax_line.plot(H1[i],i,'o',color='orange',label='Twin/family'if i==0 else'')
ax_line.plot(H2[i],i,'o',color='red',label='GWAS'if i==0 else'')
ax_line.grid(True,axis='x',ls='--',alpha=0.5)
ax_line.yaxis.tick_right(); ax_line.legend(loc='upper right')
plt.tight_layout(); plt.show()
Table with Hierarchical Clustering
https://www.nature.com/articles/s41593-025-01891-9/figures/5
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
np.random.seed(42)
genes=["Icam1","Gm47079","Jpt1","Gls","Arl5b","Slc22a13b","Tacr1",
"Gadd45b","Rhou","Ifi207","Cxcl10","Shisa-6","Igkv3-2","Rgs9bp",
"Cldn2","Cfap65","Otx2os1","Cxcl13","Kctd1","D630024D03Rik"]
samples=["SS1","SS2","RES1","RES2","RES3"]
df=pd.DataFrame(np.random.randn(len(genes),len(samples)),index=genes,columns=samples)
col_colors=pd.Series(["pink","pink","gray","gray","gray"],index=samples)
g=sns.clustermap(df,cmap="bwr",row_cluster=True,col_cluster=False,
col_colors=col_colors,z_score=0,figsize=(6,8))
g.cax.set_title("Z score")
plt.show()
Table with Violin Plot
https://www.nature.com/articles/s41593-025-01878-6/figures/1
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
# Sample cell types & genes
cts=["Neuroblast","NSC/progenitor","Pericyte","Astrocyte","Ependymal cell",
"Microglia","Oligodendrocyte/OPC","Neuron","Endothelial","NA"]
gs=["Nfib","Nr2f1","Apt13","Adgrl3","Aldoc","Acsl3","S100b","Gja1","Aqp4","Sox9"]
# Make random expression data
np.random.seed(42)
df=pd.DataFrame([[c,g,v] for c in cts for g in gs for v in np.random.normal(0,2,40)],
columns=["CellType","Gene","Expression"])
# One color per gene
clrs=sns.color_palette("husl",len(gs))
gene2clr=dict(zip(gs,clrs))
def color_violin(x,color,**kw):
g=kw["data"]["Gene"].iloc[0]
sns.violinplot(x=x,color=gene2clr[g],orient="h",linewidth=1,cut=0,inner="box",**kw)
sns.set(style="white",font_scale=1.0)
f=sns.FacetGrid(df,row="CellType",col="Gene",sharex=True,sharey=False,
height=1.3,aspect=0.6,margin_titles=True)
f.map_dataframe(color_violin,"Expression")
f.set_titles(row_template="{row_name}",col_template="{col_name}")
f.set(xlabel="",ylabel="")
for ax in f.axes[-1]: ax.set_xlabel("Expression Level")
for row in f.axes:
for ax in row: ax.tick_params(labelleft=False)
sns.despine(left=True,bottom=True)
plt.tight_layout(); plt.show()
t-SNE plot
https://www.nature.com/articles/s41593-025-01878-6/figures/13
import numpy as np, matplotlib.pyplot as plt, seaborn as sns
from sklearn.manifold import TSNE
# Cell types (clusters)
cts=["Neuroblast","NSC/progenitor","Pericyte","Astrocyte","Ependymal cell",
"Microglia","Oligodendrocyte/OPC","Neuron","Endothelial","NA"]
n=len(cts); s=200; d=30; np.random.seed(42)
# Synthetic data: each cluster around a random center
centers=np.random.randn(n,d)*5; X=[]; L=[]
for i in range(n):
x=centers[i]+np.random.randn(s,d)
X.append(x); L+=[i]*s
X=np.vstack(X); L=np.array(L)
# t-SNE
emb=TSNE(n_components=2,perplexity=30,random_state=42).fit_transform(X)
# Plot
sns.set(style="white",context="talk")
plt.figure(figsize=(7,7))
pal=sns.color_palette("Set2",n)
for i,ct in enumerate(cts):
idx=(L==i)
plt.scatter(emb[idx,0],emb[idx,1],color=pal[i],s=10,alpha=0.6)
cx,cy=emb[idx].mean(axis=0)
plt.scatter(cx,cy,color=pal[i],s=200,marker="X",edgecolor="k",lw=1,zorder=3)
plt.text(cx,cy,ct,fontsize=10,ha="center",va="center",
bbox=dict(boxstyle="round",fc="white",alpha=0.8))
plt.xlabel("t-SNE1"); plt.ylabel("t-SNE2")
sns.despine(); plt.tight_layout(); plt.show()
Violin + Scatter plot
https://www.nature.com/articles/s41593-025-01878-6/figures/13
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt
np.random.seed(42)
df=pd.DataFrame({'group':np.repeat(['CER_1','CER_2'],200),
'num_genes':np.concatenate([np.random.normal(3000,1000,200),
np.random.normal(5000,1200,200)]),
'umi_count':np.concatenate([np.random.normal(5e4,2e4,200),
np.random.normal(8e4,2.5e4,200)]),
'pct_mito':np.concatenate([np.random.normal(5,2,200),
np.random.normal(12,3,200)])})
sns.set(style="ticks",context="talk")
fig,axes=plt.subplots(1,3,figsize=(9,6))
mets=['num_genes','umi_count','pct_mito']
titles=['Number of genes','UMI count','% mitochondrial genes']
colors=["#f79489","#74c69d"]
for ax,m,t in zip(axes,mets,titles):
sns.violinplot(x='group',y=m,data=df,palette=colors,cut=0,scale="width",
inner="quartile",linewidth=1,ax=ax)
sns.stripplot(x='group',y=m,data=df,color='k',size=2,alpha=0.4,ax=ax)
ax.set_title(t,fontsize=12)
ax.set_xlabel(""); ax.set_ylabel("")
ax.set_xticklabels(ax.get_xticklabels(),rotation=20,ha='right')
sns.despine()
plt.tight_layout()
plt.show()