#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Plots for pyPhi
@author: Sal Garcia <sgarciam@ic.ac.uk> <salvadorgarciamunoz@gmail.com>
Addition on Jan 20 2025 Added barplot and lineplot
Addition on Apr 29 2024 Made it compatible with Bokeh 3.4.1 replacing "circle" with "scatter"
Addition on Feb 24 2024 Replaced the randon number in the file names with a time string.
Addition on Feb 21 2024 Added ability to make score plots with a gradient color
of nbins based on a numerical value in the classids
Addition on Jan 18 2024 Added flag to score_scatter to include model scores in plot
replaced phi.unique -> np.unique
Updated call to maptplotlib colormap to keep it compatible
Addition on Sep 26 2023 All plots are now viewable offline (e.g. in airplane mode)
Addition on May 1 2023 corrected description of mb_vip
Addition on Apr 25 2023 added markersize to score_scatter
Addition on Apr 23 2023 also added the text_alpha flag to loadings map for PCA models
Addition on Apr 22 2023 added tooltips to contribution plots and VIP
implemented multiple columns in score scatter (yay!)
Addition on Apr 17 2023 added tpls to the supported models in all loadings, vip, r2pv
and score_scatter plots
Addition on Apr 15 2023, made all loadings, vip, r2pv and score_scatter compatible with
lpls and jrpls models
Addition on April 9 2023, added legends and pan tools to r2pv (yay!)
Addition on April 8 2023, fixed predvsobs to take MB data
Release Nov 15 2021
* Added "xgrid" flag to all plots using bar plots (loadings, weighted loadings, contributions) to add the Xgrid lines to the plot
Release Jan 15, 2021
* Added mb_blockweights plot for MBPSL models
Release Date: March 30 2020
* Fixed small syntax change for Bohek to stay compatible
Release Date: Aug 22 2019
What was done:
* This header is now included to track high level changes
"""
import numpy as np
from bokeh.io import show, output_file
from bokeh.plotting import figure
from bokeh.layouts import column
from bokeh.models import ColumnDataSource,LabelSet,Span,Legend
import pyphi as phi
import pandas as pd
from datetime import datetime
#import matplotlib.cm as cm
import matplotlib
[docs]
def timestr():
now=datetime.now()
return now.strftime("%Y%m%d%H%M%S%f")
[docs]
def r2pv(mvm_obj,*,plotwidth=600,plotheight=400,addtitle='',material=False,zspace=False):
"""R2 per variable per component plots
r2pv(mvm_obj,*,plotwidth=600,plotheight=400,addtitle='',material=False,zspace=False)
Args:
mvm_obj: A model created with phi.pca or phi.pls
Other Parameters:
material: To obtain the plot for the properties of a specific material
When doing this for a JRPLS or TPLS mode
zspace: If True will display the r2pv for the process space in a TPLS model
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
mvmobj=mvm_obj.copy()
A= mvmobj['T'].shape[1]
yaxlbl='X'
if (mvmobj['type']=='lpls') or (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls'):
if ((mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls')) and not(isinstance(material, bool) ):
mvmobj['r2xpv']=mvmobj['r2xpvi'][mvmobj['materials'].index(material)]
mvmobj['varidX']=mvmobj['varidXi'][mvmobj['materials'].index(material) ]
elif (mvmobj['type']=='tpls') and zspace :
mvmobj['r2xpv']=mvmobj['r2zpv']
mvmobj['varidX']=mvmobj['varidZ']
yaxlbl='Z'
else:
num_varX=mvmobj['P'].shape[0]
if 'Q' in mvmobj:
is_pls=True
lv_prefix='LV #'
else:
is_pls=False
lv_prefix='PC #'
lv_labels = []
for a in list(np.arange(A)+1):
lv_labels.append(lv_prefix+str(a))
if 'varidX' in mvmobj:
r2pvX_dict = {'XVar': mvmobj['varidX']}
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(num_varX)+1):
XVar.append('XVar #'+str(n))
r2pvX_dict = {'XVar': XVar}
for i in list(np.arange(A)):
r2pvX_dict.update({lv_labels[i] : mvmobj['r2xpv'][:,i].tolist()})
if 'Q' in mvmobj:
num_varY=mvmobj['Q'].shape[0]
if 'varidY' in mvmobj:
r2pvY_dict = {'YVar': mvmobj['varidY']}
YVar=mvmobj['varidY']
else:
YVar = []
for n in list(np.arange(num_varY)+1):
YVar.append('YVar #'+str(n))
r2pvY_dict = {'YVar': YVar}
for i in list(np.arange(A)):
r2pvY_dict.update({lv_labels[i] : mvmobj['r2ypv'][:,i].tolist()})
if is_pls:
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("r2xypv_"+rnd_num+".html",title="R2"+ yaxlbl+ "YPV",mode='inline')
#colormap =cm.get_cmap("rainbow")
colormap = matplotlib.colormaps['rainbow']
different_colors=A
color_mapping=colormap(np.linspace(0,1,different_colors),1,True)
bokeh_palette=["#%02x%02x%02x" % (r, g, b) for r, g, b in color_mapping[:,0:3]]
px = figure(x_range=XVar, title="R2"+ yaxlbl+" Per Variable "+addtitle,
tools="save,box_zoom,xpan,hover,reset", tooltips="$name @XVar: @$name",width=plotwidth,height=plotheight)
v=px.vbar_stack(lv_labels, x='XVar', width=0.9,color=bokeh_palette,source=r2pvX_dict)
px.y_range.range_padding = 0.1
px.ygrid.grid_line_color = None
px.xgrid.grid_line_color = None
px.axis.minor_tick_line_color = None
px.outline_line_color = None
px.yaxis.axis_label = 'R2'+ yaxlbl
px.xaxis.major_label_orientation = 45
legend = Legend(items=[(x, [v[i]]) for i, x in enumerate(lv_labels)], location=(0, 0))
px.add_layout(legend, 'right')
py = figure(x_range=YVar, height=plotheight, title="R2Y Per Variable "+addtitle,
tools="save,box_zoom,xpan,hover,reset", tooltips="$name @YVar: @$name",width=plotwidth)
v=py.vbar_stack(lv_labels, x='YVar', width=0.9,color=bokeh_palette,source=r2pvY_dict)
py.y_range.range_padding = 0.1
py.ygrid.grid_line_color = None
py.axis.minor_tick_line_color = None
py.xgrid.grid_line_color = None
py.outline_line_color = None
py.yaxis.axis_label = 'R2Y'
py.xaxis.major_label_orientation = 45
legend = Legend(items=[(x, [v[i]]) for i, x in enumerate(lv_labels)], location=(0, 0))
py.add_layout(legend, 'right')
show(column(px,py))
else:
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("r2xpv_"+rnd_num+".html",title='R2XPV',mode='inline')
#colormap =cm.get_cmap("rainbow")
colormap = matplotlib.colormaps['rainbow']
different_colors=A
color_mapping=colormap(np.linspace(0,1,different_colors),1,True)
bokeh_palette=["#%02x%02x%02x" % (r, g, b) for r, g, b in color_mapping[:,0:3]]
p = figure(x_range=XVar, title="R2X Per Variable "+addtitle,
tools="save,box_zoom,xpan,hover,reset", tooltips="$name @XVar: @$name",width=plotwidth,height=plotheight)
v=p.vbar_stack(lv_labels, x='XVar', width=0.9,color=bokeh_palette,source=r2pvX_dict)
legend = Legend(items=[(x, [v[i]]) for i, x in enumerate(lv_labels)], location=(0, 0))
p.y_range.range_padding = 0.1
p.ygrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.yaxis.axis_label = 'R2X'
p.xaxis.major_label_orientation = 45
p.add_layout(legend, 'right')
show(p)
return
[docs]
def loadings(mvm_obj,*,plotwidth=600,xgrid=False,addtitle='',material=False,zspace=False):
"""Column plots of loadings
loadings(mvm_obj,*,plotwidth=600,xgrid=False,addtitle='',material=False,zspace=False)
Args:
mvm_obj: A PCA,PLS, LPLS, JPLS or TPLS model
Other Parameters:
material: To obtain the plot for the properties of a specific material
When doing this for a JRPLS or TPLS mode
zspace: If True will display the plot for the process space in a TPLS model
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
mvmobj=mvm_obj.copy()
space_lbl='X'
A= mvmobj['T'].shape[1]
if (mvmobj['type']=='lpls') or (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls'):
loading_lbl='S*'
if (mvmobj['type']=='lpls'):
mvmobj['Ws']=mvmobj['Ss']
if isinstance(material, bool) and not(zspace):
mvmobj['Ws']=mvmobj['Ss']
if ((mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls') ) and not(isinstance(material, bool) ):
mvmobj['Ws']=mvmobj['Ssi'][mvmobj['materials'].index(material)]
mvmobj['varidX']=mvmobj['varidXi'][mvmobj['materials'].index(material) ]
elif (mvmobj['type']=='tpls') and zspace :
mvmobj['varidX']=mvmobj['varidZ']
loading_lbl='Wz*'
space_lbl='Z'
else:
num_varX=mvmobj['P'].shape[0]
loading_lbl='W*'
if 'Q' in mvmobj:
is_pls=True
lv_prefix='LV #'
else:
is_pls=False
lv_prefix='PC #'
lv_labels = []
for a in list(np.arange(A)+1):
lv_labels.append(lv_prefix+str(a))
if 'varidX' in mvmobj:
X_loading_dict = {'XVar': mvmobj['varidX']}
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(num_varX)+1):
XVar.append('XVar #'+str(n))
X_loading_dict = {'XVar': XVar}
if 'Q' in mvmobj:
for i in list(np.arange(A)):
X_loading_dict.update({lv_labels[i] : mvmobj['Ws'][:,i].tolist()})
num_varY=mvmobj['Q'].shape[0]
if 'varidY' in mvmobj:
Q_dict = {'YVar': mvmobj['varidY']}
YVar=mvmobj['varidY']
else:
YVar = []
for n in list(np.arange(num_varY)+1):
YVar.append('YVar #'+str(n))
Q_dict = {'YVar': YVar}
for i in list(np.arange(A)):
Q_dict.update({lv_labels[i] : mvmobj['Q'][:,i].tolist()})
else:
for i in list(np.arange(A)):
X_loading_dict.update({lv_labels[i] : mvmobj['P'][:,i].tolist()})
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("Variable:","@names")
]
if is_pls:
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings "+space_lbl+" Space_"+rnd_num+".html",title=space_lbl+' Loadings PLS',mode='inline')
for i in list(np.arange(A)):
p = figure(x_range=XVar, title=space_lbl+" Space Loadings "+lv_labels[i]+addtitle,
tools=TOOLS,tooltips=TOOLTIPS,width=plotwidth)
source1 = ColumnDataSource(data=dict(x_=XVar, y_=mvmobj['Ws'][:,i].tolist(),names=XVar))
#p.vbar(x=XVar, top=mvmobj['Ws'][:,i].tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.ygrid.grid_line_color = None
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = loading_lbl+' ['+str(i+1)+']'
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
if i==0:
p_list=[p]
else:
p_list.append(p)
show(column(p_list))
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings Y Space_"+rnd_num+".html",title='Y Loadings PLS',mode='inline')
for i in list(np.arange(A)):
p = figure(x_range=YVar, title="Y Space Loadings "+lv_labels[i]+addtitle,
tools="save,box_zoom,pan,reset",tooltips=TOOLTIPS,width=plotwidth)
source1 = ColumnDataSource(data=dict(x_=YVar, y_=mvmobj['Q'][:,i].tolist(),names=YVar))
#p.vbar(x=YVar, top=mvmobj['Q'][:,i].tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.ygrid.grid_line_color = None
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = 'Q ['+str(i+1)+']'
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
if i==0:
p_list=[p]
else:
p_list.append(p)
show(column(p_list))
else:
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings X Space_"+rnd_num+".html",title='X Loadings PCA',mode='inline')
for i in list(np.arange(A)):
source1 = ColumnDataSource(data=dict(x_=XVar, y_=mvmobj['P'][:,i].tolist(),names=XVar))
p = figure(x_range=XVar, title="X Space Loadings "+lv_labels[i]+addtitle,
tools=TOOLS,tooltips=TOOLTIPS,width=plotwidth)
#p.vbar(x=XVar, top=mvmobj['P'][:,i].tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = 'P ['+str(i+1)+']'
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
if i==0:
p_list=[p]
else:
p_list.append(p)
show(column(p_list))
return
[docs]
def loadings_map(mvm_obj,dims,*,plotwidth=600,addtitle='',material=False,zspace=False,textalpha=0.75):
"""Scatter plot overlaying X and Y loadings
loadings_map(mvm_obj,dims,*,plotwidth=600,addtitle='',material=False,zspace=False,textalpha=0.75)
Args:
mvm_obj: A PCA,PLS, LPLS, JPLS or TPLS model
dims: Dimensions to plot in x and y axis (e.g. [1,2])
Other Parameters:
material: To obtain the plot for the properties of a specific material
When doing this for a JRPLS or TPLS mode
zspace: If True will display the plot for the process space in a TPLS model
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
mvmobj=mvm_obj.copy()
A= mvmobj['T'].shape[1]
if (mvmobj['type']=='lpls') or (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls'):
if (mvmobj['type']=='lpls'):
mvmobj['Ws']=mvmobj['Ss']
if isinstance(material, bool) and not(zspace):
mvmobj['Ws']=mvmobj['Ss']
if ((mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls')) and not(isinstance(material, bool) ):
mvmobj['Ws']=mvmobj['Ssi'][mvmobj['materials'].index(material)]
mvmobj['varidX']=mvmobj['varidXi'][mvmobj['materials'].index(material) ]
elif (mvmobj['type']=='tpls') and zspace :
mvmobj['varidX']=mvmobj['varidZ']
else:
num_varX=mvmobj['P'].shape[0]
if 'Q' in mvmobj:
lv_prefix='LV #'
lv_labels = []
for a in list(np.arange(A)+1):
lv_labels.append(lv_prefix+str(a))
if 'varidX' in mvmobj:
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(num_varX)+1):
XVar.append('XVar #'+str(n))
num_varY=mvmobj['Q'].shape[0]
if 'varidY' in mvmobj:
YVar=mvmobj['varidY']
else:
YVar = []
for n in list(np.arange(num_varY)+1):
YVar.append('YVar #'+str(n))
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings Map"+rnd_num+".html",title='Loadings Map',mode='inline')
x_ws = mvmobj['Ws'][:,dims[0]-1]
x_ws = x_ws/np.max(np.abs(x_ws))
y_ws = mvmobj['Ws'][:,dims[1]-1]
y_ws = y_ws/np.max(np.abs(y_ws))
x_q = mvmobj['Q'][:,dims[0]-1]
x_q = x_q/np.max(np.abs(x_q))
y_q = mvmobj['Q'][:,dims[1]-1]
y_q = y_q/np.max(np.abs(y_q))
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("index", "$index"),
("(x,y)", "($x, $y)"),
("Variable:","@names")
]
source1 = ColumnDataSource(data=dict(x=x_ws, y=y_ws,names=XVar))
source2 = ColumnDataSource(data=dict(x=x_q, y=y_q,names=YVar))
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=plotwidth, title="Loadings Map LV["+str(dims[0])+"] - LV["+str(dims[1])+"] "+addtitle,
x_range=(-1.5,1.5),y_range=(-1.5,1.5))
#p.circle('x', 'y', source=source1,size=10,color='darkblue')
#p.circle('x', 'y', source=source2,size=10,color='red')
p.scatter(x='x',y= 'y', source=source1,size=10,color='darkblue')
p.scatter('x', 'y', source=source2,size=10,color='red')
p.xaxis.axis_label = lv_labels [dims[0]-1]
p.yaxis.axis_label = lv_labels [dims[1]-1]
labelsX = LabelSet(x='x', y='y', text='names',
level='glyph',x_offset=5, y_offset=5,
source=source1,text_color='darkgray',
text_alpha=textalpha )
labelsY = LabelSet(x='x', y='y', text='names',
level='glyph',x_offset=5, y_offset=5,
source=source2,text_color='darkgray',
text_alpha=textalpha )
p.add_layout(labelsX)
p.add_layout(labelsY)
vline = Span(location=0, dimension='height', line_color='black', line_width=2)
# Horizontal line
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([vline, hline])
show(p)
else:
lv_prefix='PC #'
lv_labels = []
for a in list(np.arange(A)+1):
lv_labels.append(lv_prefix+str(a))
if 'varidX' in mvmobj:
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(num_varX)+1):
XVar.append('XVar #'+str(n))
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings Map"+rnd_num+".html",title='Loadings Map',mode='inline')
x_p = mvmobj['P'][:,dims[0]-1]
y_p = mvmobj['P'][:,dims[1]-1]
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("index", "$index"),
("(x,y)", "($x, $y)"),
("Variable:","@names")
]
source1 = ColumnDataSource(data=dict(x=x_p, y=y_p,names=XVar))
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=plotwidth, title="Loadings Map PC["+str(dims[0])+"] - PC["+str(dims[1])+"] "+addtitle, x_range=(-1.5,1.5),y_range=(-1.5,1.5))
#p.circle('x', 'y', source=source1,size=10,color='darkblue')
p.scatter(x='x',y='y', source=source1,size=10,color='darkblue')
p.xaxis.axis_label = lv_labels [dims[0]-1]
p.yaxis.axis_label = lv_labels [dims[1]-1]
labelsX = LabelSet(x='x', y='y', text='names', level='glyph',x_offset=5, y_offset=5, source=source1,
text_color='darkgray',text_alpha=textalpha)
p.add_layout(labelsX)
vline = Span(location=0, dimension='height', line_color='black', line_width=2)
# Horizontal line
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([vline, hline])
show(p)
return
[docs]
def weighted_loadings(mvm_obj,*,plotwidth=600,xgrid=False,addtitle='',material=False,zspace=False):
"""Column plots of loadings weighted by r2x/r2y correspondingly
weighted_loadings(mvm_obj,*,plotwidth=600,xgrid=False,addtitle='',material=False,zspace=False):
Args:
mvm_obj: A PCA,PLS, LPLS, JPLS or TPLS model
Other Parameters:
material: To obtain the plot for the properties of a specific material
When doing this for a JRPLS or TPLS mode
zspace: If True will display the plot for the process space in a TPLS model
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
mvmobj=mvm_obj.copy()
A= mvmobj['T'].shape[1]
space_lbl='X'
if (mvmobj['type']=='lpls') or (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls'):
loading_lbl='S*'
if (mvmobj['type']=='lpls'):
mvmobj['Ws']=mvmobj['Ss']
if isinstance(material, bool) and not(zspace):
mvmobj['Ws']=mvmobj['Ss']
if ((mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls')) and not(isinstance(material, bool) ):
mvmobj['Ws']=mvmobj['Ssi'][mvmobj['materials'].index(material)]
mvmobj['varidX']=mvmobj['varidXi'][mvmobj['materials'].index(material) ]
mvmobj['r2xpv']=mvmobj['r2xpvi'][mvmobj['materials'].index(material) ]
elif (mvmobj['type']=='tpls') and zspace:
mvmobj['varidX']=mvmobj['varidZ']
mvmobj['r2xpv']=mvmobj['r2zpv']
loading_lbl='Wz*'
space_lbl='Z'
else:
num_varX=mvmobj['P'].shape[0]
loading_lbl='W*'
if 'Q' in mvmobj:
is_pls=True
lv_prefix='LV #'
else:
is_pls=False
lv_prefix='PC #'
lv_labels = []
for a in list(np.arange(A)+1):
lv_labels.append(lv_prefix+str(a))
if 'varidX' in mvmobj:
X_loading_dict = {'XVar': mvmobj['varidX']}
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(num_varX)+1):
XVar.append('XVar #'+str(n))
X_loading_dict = {'XVar': XVar}
if 'Q' in mvmobj:
for i in list(np.arange(A)):
X_loading_dict.update({lv_labels[i] : mvmobj['Ws'][:,i].tolist()})
num_varY=mvmobj['Q'].shape[0]
if 'varidY' in mvmobj:
Q_dict = {'YVar': mvmobj['varidY']}
YVar=mvmobj['varidY']
else:
YVar = []
for n in list(np.arange(num_varY)+1):
YVar.append('YVar #'+str(n))
Q_dict = {'YVar': YVar}
for i in list(np.arange(A)):
Q_dict.update({lv_labels[i] : mvmobj['Q'][:,i].tolist()})
else:
for i in list(np.arange(A)):
X_loading_dict.update({lv_labels[i] : mvmobj['P'][:,i].tolist()})
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("Variable:","@names")
]
if is_pls:
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings "+space_lbl+" Space_"+rnd_num+".html",title=space_lbl+' Weighted Loadings PLS',mode='inline')
for i in list(np.arange(A)):
p = figure(x_range=XVar, title=space_lbl+" Space Weighted Loadings "+lv_labels[i]+addtitle,
tools=TOOLS,tooltips=TOOLTIPS,width=plotwidth)
source1 = ColumnDataSource(data=dict(x_=XVar, y_=(mvmobj['r2xpv'][:,i] * mvmobj['Ws'][:,i]).tolist(),names=XVar))
#p.vbar(x=XVar, top=(mvmobj['r2xpv'][:,i] * mvmobj['Ws'][:,i]).tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.ygrid.grid_line_color = None
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = loading_lbl+' x R2'+space_lbl+' ['+str(i+1)+']'
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
if i==0:
p_list=[p]
else:
p_list.append(p)
show(column(p_list))
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings Y Space_"+rnd_num+".html",title='Y Weighted Loadings PLS',mode='inline')
for i in list(np.arange(A)):
p = figure(x_range=YVar, title="Y Space Weighted Loadings "+lv_labels[i]+addtitle,
tools=TOOLS,tooltips=TOOLTIPS,width=plotwidth)
source1 = ColumnDataSource(data=dict(x_=YVar, y_=(mvmobj['r2ypv'][:,i] * mvmobj['Q'][:,i]).tolist(),names=YVar))
#p.vbar(x=YVar, top=(mvmobj['r2ypv'][:,i] * mvmobj['Q'][:,i]).tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.ygrid.grid_line_color = None
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = 'Q x R2Y ['+str(i+1)+']'
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
if i==0:
p_list=[p]
else:
p_list.append(p)
show(column(p_list))
else:
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Loadings X Space_"+rnd_num+".html",title='X Weighted Loadings PCA',mode='inline')
for i in list(np.arange(A)):
p = figure(x_range=XVar, title="X Space Weighted Loadings "+lv_labels[i]+addtitle,
tools=TOOLS,tooltips=TOOLTIPS,width=plotwidth)
source1 = ColumnDataSource(data=dict(x_=XVar, y_=(mvmobj['r2xpv'][:,i] * mvmobj['P'][:,i]).tolist(),names=XVar))
#p.vbar(x=XVar, top=(mvmobj['r2xpv'][:,i] * mvmobj['P'][:,i]).tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.ygrid.grid_line_color = None
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = 'P x R2X['+str(i+1)+']'
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
if i==0:
p_list=[p]
else:
p_list.append(p)
show(column(p_list))
return
[docs]
def vip(mvm_obj,*,plotwidth=600,material=False,zspace=False,addtitle=''):
"""Very Important to the Projection (VIP) plot
vip(mvm_obj,*,plotwidth=600,material=False,zspace=False,addtitle='')
Args:
mvm_obj: A PLS, LPLS, JPLS or TPLS model
Other Parameters:
material: To obtain the plot for the properties of a specific material
When doing this for a JRPLS or TPLS mode
zspace: If True will display the vip for the process space in a TPLS model
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
mvm_obj: A model created with phi.pls
"""
mvmobj=mvm_obj.copy()
if 'Q' in mvmobj:
if (mvmobj['type']=='lpls') or (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls'):
if (mvmobj['type']=='lpls'):
mvmobj['Ws']=mvmobj['Ss']
if isinstance(material, bool) and not(zspace):
mvmobj['Ws']=mvmobj['Ss']
if ((mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls')) and not(isinstance(material, bool) ):
mvmobj['Ws']=mvmobj['Ssi'][mvmobj['materials'].index(material)]
mvmobj['varidX']=mvmobj['varidXi'][mvmobj['materials'].index(material) ]
elif (mvmobj['type']=='tpls') and zspace:
mvmobj['varidX']=mvmobj['varidZ']
else:
num_varX=mvmobj['P'].shape[0]
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("VIP_"+rnd_num+".html",title='VIP Coefficient',mode='inline')
if 'varidX' in mvmobj:
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(num_varX)+1):
XVar.append('XVar #'+str(n))
vip=np.sum(np.abs(mvmobj['Ws'] * np.tile(mvmobj['r2y'],(mvmobj['Ws'].shape[0],1)) ),axis=1)
vip=np.reshape(vip,(len(vip),-1))
sort_indx=np.argsort(-vip,axis=0)
vip=vip[sort_indx]
sorted_XVar=[]
for i in sort_indx[:,0]:
sorted_XVar.append(XVar[i])
TOOLTIPS = [
("Variable","@names")
]
p = figure(x_range=sorted_XVar, title="VIP "+addtitle,
tools="save,box_zoom,pan,reset",tooltips=TOOLTIPS,width=plotwidth)
source1 = ColumnDataSource(data=dict(x_=sorted_XVar, y_=vip.tolist(),names=sorted_XVar))
#p.vbar(x=sorted_XVar, top=vip.tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.xgrid.grid_line_color = None
p.yaxis.axis_label = 'Very Important to the Projection'
p.xaxis.major_label_orientation = 45
show(p)
return
def _create_classid_(df,column,*,nbins=5):
'''
Internal routine to create a CLASSID dataframe from values in a column
'''
hist,bin_edges=np.histogram(df[column].values[np.logical_not(np.isnan(df[column].values))],bins=nbins )
range_list=[]
for i,e in enumerate(bin_edges[:-1]):
range_list.append(str(np.round(bin_edges[i],3))+' to '+ str(np.round(bin_edges[i+1],3)))
#range_list.append('NaN')
bin_edges_=bin_edges.copy()
bin_edges_[-1]=bin_edges_[-1]+0.1
membership_=np.digitize(df[column].values,bin_edges_)
membership=[]
for m in membership_:
membership.append(range_list[m-1])
classid_df=df[df.columns[0]].to_frame()
classid_df.insert(1,column,membership)
return classid_df
[docs]
def score_scatter(mvm_obj,xydim,*,CLASSID=False,colorby=False,Xnew=False,
add_ci=False,add_labels=False,add_legend=True,legend_cols=1,
addtitle='',plotwidth=600,plotheight=600,
rscores=False,material=False,marker_size=7,nbins=False,include_model=False):
'''Score scatter plot
score_scatter(mvm_obj,xydim,*,CLASSID=False,colorby=False,nbins=False,Xnew=False,
add_ci=False,add_labels=False,add_legend=True,legend_cols=1,
addtitle='',plotwidth=600,plotheight=600,
rscores=False,material=False,marker_size=7,include_model=False):
Args:
mvm_obj : PLS or PCA object from phyphi
xydim : LV to plot on x and y axes. eg [1,2] will plot t1 vs t2
CLASSID : Pandas DataFrame with CLASSIDS
colorby : Category (one of the CLASSIDS) to color by
Xnew : New data for which to make the score plot this routine evaluates and plots
nbins : Number of groups to use when color coding by a numeric value
add_ci : when = True will add confidence intervals
add_labels : When = True labels each point with Obs ID
add_legend : When = True will add a legend with classid labels
legend_cols: Number of columns for legend
addtitle : Additional text to be added to title
plotwidth : If omitted, width is 600
plotheight : If omitted, height is 600
rscores : Plot scores for all material space in lpls|jrpls|tpls
material : Label for specific material to plot scores for in lpls|jrpls|tpls
include_model: Will plot model scores in gray as backgrpound
by Salvador Garcia Munoz (sgarciam@imperial.ac.uk salvadorgarciamunoz@gmail.com)
'''
if not(isinstance(nbins, bool)):
if colorby in CLASSID.columns.to_list():
classid_by_var = _create_classid_(CLASSID,colorby,nbins=nbins)
CLASSID = classid_by_var.copy()
mvmobj=mvm_obj.copy()
if ((mvmobj['type']=='lpls') or (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls')) and (not(isinstance(Xnew,bool))):
Xnew=False
print('score scatter does not take Xnew for jrpls or lpls for now')
if isinstance(Xnew,bool):
if 'obsidX' in mvmobj:
ObsID_=mvmobj['obsidX']
else:
ObsID_ = []
for n in list(np.arange(mvmobj['T'].shape[0])+1):
ObsID_.append('Obs #'+str(n))
T_matrix=mvmobj['T']
if not(rscores):
if (mvmobj['type']=='lpls'):
ObsID_=mvmobj['obsidR']
if (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls') :
ObsID_=mvmobj['obsidRi'][0]
else:
if (mvmobj['type']=='lpls'):
ObsID_=mvmobj['obsidX']
T_matrix=mvmobj['Rscores']
if (mvmobj['type']=='jrpls') or (mvmobj['type']=='tpls') :
if isinstance(material,bool):
allobsids=[y for x in mvmobj['obsidXi'] for y in x]
ObsID_=allobsids
clssid_obs=[]
clssid_class=[]
for i,R_ in enumerate(mvmobj['Rscores']):
clssid_obs.extend(mvmobj['obsidXi'][i])
clssid_class.extend([mvmobj['materials'][i]]*len( mvmobj['obsidXi'][i]))
if i==0:
allrscores=R_
else:
allrscores=np.vstack((allrscores,R_))
classid=pd.DataFrame(clssid_class,columns=['material'])
classid.insert(0,'obs',clssid_obs)
CLASSID=classid
colorby='material'
T_matrix=allrscores
else:
ObsID_ = mvmobj['obsidXi'][mvmobj['materials'].index(material) ]
T_matrix = mvmobj['Rscores'][mvmobj['materials'].index(material) ]
else:
if isinstance(Xnew,np.ndarray):
X_=Xnew.copy()
ObsID_ = []
for n in list(np.arange(Xnew.shape[0])+1):
ObsID_.append('Obs #'+str(n))
elif isinstance(Xnew,pd.DataFrame):
X_=np.array(Xnew.values[:,1:]).astype(float)
ObsID_ = Xnew.values[:,0].astype(str)
ObsID_ = ObsID_.tolist()
if 'Q' in mvmobj:
xpred=phi.pls_pred(X_,mvmobj)
else:
xpred=phi.pca_pred(X_,mvmobj)
T_matrix=xpred['Tnew']
if include_model:
if 'obsidX' in mvmobj:
ObsID__=mvmobj['obsidX'].copy()
else:
ObsID__ = []
for n in list(np.arange(mvmobj['T'].shape[0])+1):
ObsID__.append('Model Obs #'+str(n))
T_matrix_=mvmobj['T'].copy()
if isinstance(CLASSID,bool): #If there are no classids I need to create one
source=(['Model']*T_matrix_.shape[0])
source.extend(['New']*T_matrix.shape[0])
ObsID__.extend(ObsID_)
CLASSID=pd.DataFrame.from_dict( {'ObsID':ObsID__,'_Source_':source })
colorby='_Source_'
else: #IF there are I need to augment it
source=['Model']*T_matrix_.shape[0]
CLASSID_=pd.DataFrame.from_dict( {CLASSID.columns[0]:ObsID__,colorby:source })
ObsID__.extend(ObsID_)
CLASSID = pd.concat([CLASSID_,CLASSID])
ObsID_=ObsID__.copy()
T_matrix=np.vstack((T_matrix_,T_matrix ))
ObsNum_=[]
for n in list(range(1,len(ObsID_)+1)):
ObsNum_.append(str(n))
if isinstance(CLASSID,bool): # No CLASSIDS
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Score_Scatter_"+rnd_num+".html",title='Score Scatter t['+str(xydim[0])+'] - t['+str(xydim[1])+ ']',mode='inline')
x_=T_matrix[:,[xydim[0]-1]]
y_=T_matrix[:,[xydim[1]-1]]
source = ColumnDataSource(data=dict(x=x_, y=y_,ObsID=ObsID_,ObsNum=ObsNum_))
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("Obs #", "@ObsNum"),
("(x,y)", "($x, $y)"),
("Obs: ","@ObsID")
]
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=plotwidth,height=plotheight, title='Score Scatter t['+str(xydim[0])+'] - t['+str(xydim[1])+ '] '+addtitle)
#p.circle('x', 'y', source=source,size=marker_size)
p.scatter('x', 'y', source=source,size=marker_size)
if add_ci:
T_aux1=mvmobj['T'][:,[xydim[0]-1]]
T_aux2=mvmobj['T'][:,[xydim[1]-1]]
T_aux = np.hstack((T_aux1,T_aux2))
st=(T_aux.T @ T_aux)/T_aux.shape[0]
[xd95,xd99,yd95p,yd95n,yd99p,yd99n]=phi.scores_conf_int_calc(st,mvmobj['T'].shape[0])
p.line(xd95,yd95p,line_color="gold",line_dash='dashed')
p.line(xd95,yd95n,line_color="gold",line_dash='dashed')
p.line(xd99,yd99p,line_color="red",line_dash='dashed')
p.line(xd99,yd99n,line_color="red",line_dash='dashed')
if add_labels:
labelsX = LabelSet(x='x', y='y', text='ObsID', level='glyph',x_offset=5, y_offset=5, source=source)
p.add_layout(labelsX)
if not(rscores):
p.xaxis.axis_label = 't ['+str(xydim[0])+']'
p.yaxis.axis_label = 't ['+str(xydim[1])+']'
else:
p.xaxis.axis_label = 'r ['+str(xydim[0])+']'
p.yaxis.axis_label = 'r ['+str(xydim[1])+']'
# Vertical line
vline = Span(location=0, dimension='height', line_color='black', line_width=2)
# Horizontal line
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([vline, hline])
show(p)
else: # YES CLASSIDS
Classes_=phi.unique(CLASSID,colorby)
A=len(Classes_)
colormap = matplotlib.colormaps['rainbow']
different_colors=A
color_mapping=colormap(np.linspace(0,1,different_colors),1,True)
#Test code to overwrite "Model" Category with light Cyan
if Classes_[0]=='Model':
color_mapping=colormap(np.linspace(0,1,different_colors-1),1,True)
color_mapping=np.vstack((np.array([225,225,225,255]),color_mapping))
if not(isinstance(nbins, bool)):
if colorby in CLASSID.columns.to_list():
#Classes_=phi.unique(CLASSID,colorby)
Classes_=np.unique(CLASSID[colorby]).tolist()
item_o=[]
for item in Classes_:
item_o.append(float(item[:item.find(' ')]))
idx=idx=np.argsort(item_o)
Classes_o=[]
for i in idx:
Classes_o.append(Classes_[i])
Classes_=Classes_o.copy()
#print(Classes_)
A=len(Classes_)
#colormap =cm.get_cmap("rainbow")
colormap = matplotlib.colormaps['viridis']
different_colors=A
color_mapping=colormap(np.linspace(0,1,different_colors),1,True)
if Classes_[0]=='Model':
color_mapping=colormap(np.linspace(0,1,different_colors-1),1,True)
color_mapping=np.vstack((np.array([225,225,225,255]),color_mapping))
bokeh_palette=["#%02x%02x%02x" % (r, g, b) for r, g, b in color_mapping[:,0:3]]
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Score_Scatter_"+rnd_num+".html",title='Score Scatter t['+str(xydim[0])+'] - t['+str(xydim[1])+ ']',mode='inline')
x_=T_matrix[:,[xydim[0]-1]]
y_=T_matrix[:,[xydim[1]-1]]
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("Obs #", "@ObsNum"),
("(x,y)", "($x, $y)"),
("Obs: ","@ObsID"),
("Class:","@Class")
]
# if not(isinstance(nbins, bool)):
# if colorby in CLASSID.columns.to_list():
# classid_=list(classid_by_var[colorby])
# else:
classid_=list(CLASSID[colorby])
legend_it = []
p = figure(tools=TOOLS, tooltips=TOOLTIPS,toolbar_location="above",width=plotwidth,height=plotheight,title='Score Scatter t['+str(xydim[0])+'] - t['+str(xydim[1])+ '] '+addtitle)
for classid_in_turn in Classes_:
x_aux = []
y_aux = []
obsid_aux = []
obsnum_aux = []
classid_aux = []
for i in list(range(len(ObsID_))):
if classid_[i]==classid_in_turn:
x_aux.append(x_[i][0])
y_aux.append(y_[i][0])
obsid_aux.append(ObsID_[i])
obsnum_aux.append(ObsNum_[i])
classid_aux.append(classid_in_turn)
source = ColumnDataSource(data=dict(x=x_aux, y=y_aux,ObsID=obsid_aux,ObsNum=obsnum_aux, Class=classid_aux))
color_=bokeh_palette[Classes_.index(classid_in_turn)]
if add_legend:
#c = p.circle('x','y',source=source,color=color_,size=marker_size)
c = p.scatter('x','y',source=source,color=color_,size=marker_size)
aux_=classid_in_turn
if isinstance(aux_,(float,int)):
aux_=str(aux_)
#legend_it.append((classid_in_turn, [c]))
legend_it.append((aux_, [c]))
else:
#p.circle('x','y',source=source,color=color_,size=marker_size)
p.scatter('x','y',source=source,color=color_,size=marker_size)
if add_labels:
labelsX = LabelSet(x='x', y='y', text='ObsID', level='glyph',x_offset=5, y_offset=5, source=source)
p.add_layout(labelsX)
if add_ci:
T_aux1=mvmobj['T'][:,[xydim[0]-1]]
T_aux2=mvmobj['T'][:,[xydim[1]-1]]
T_aux = np.hstack((T_aux1,T_aux2))
st=(T_aux.T @ T_aux)/T_aux.shape[0]
[xd95,xd99,yd95p,yd95n,yd99p,yd99n]=phi.scores_conf_int_calc(st,mvmobj['T'].shape[0])
p.line(xd95,yd95p,line_color="gold",line_dash='dashed')
p.line(xd95,yd95n,line_color="gold",line_dash='dashed')
p.line(xd99,yd99p,line_color="red",line_dash='dashed')
p.line(xd99,yd99n,line_color="red",line_dash='dashed')
if not(rscores):
p.xaxis.axis_label = 't ['+str(xydim[0])+']'
p.yaxis.axis_label = 't ['+str(xydim[1])+']'
else:
p.xaxis.axis_label = 'r ['+str(xydim[0])+']'
p.yaxis.axis_label = 'r ['+str(xydim[1])+']'
# Vertical line
vline = Span(location=0, dimension='height', line_color='black', line_width=2)
# Horizontal line
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([vline, hline])
if add_legend:
#legend_cols=1
ipc=[np.round(len(legend_it)/legend_cols)]*legend_cols
ipc[-1]=len(legend_it)-sum(ipc[:-1])
pastit=0
for it in ipc:
leg_ = Legend(
items=legend_it[int(0+pastit):int(pastit+it)])
#location=(0,15+pastit*5))
pastit+=it
p.add_layout(leg_, 'right')
leg_.click_policy="hide"
#legend = Legend(items=legend_it, location='top_right')
#p.add_layout(legend, 'right')
show(p)
return
[docs]
def score_line(mvmobj,dim,*,CLASSID=False,colorby=False,Xnew=False,add_ci=False,add_labels=False,add_legend=True,plotline=True,plotwidth=600,plotheight=600):
'''Score line plot
score_line(mvmobj,dim,*,CLASSID=False,colorby=False,Xnew=False,add_ci=False,add_labels=False,add_legend=True,plotline=True,plotwidth=600,plotheight=600):
Args:
mvmobj : PLS or PCA object from phyphi
dim : LV to plot eg "1" will plot t1 vs observation #
CLASSID : Pandas DataFrame with CLASSIDS
colorby : Category (one of the CLASSIDS) to color by
Xnew : New data for which to make the score plot this routine evaluates and plots
add_ci : When = True will add confidence intervals
add_labels : When =True will display Obs ID per point
plotwidth : When Omitted is = 600
plotline : Adds a conecting line between dots [True by default]
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
'''
if not(isinstance(dim,list)):
if isinstance(dim, int):
dim=[dim]
if isinstance(Xnew,bool):
if 'obsidX' in mvmobj:
ObsID_=mvmobj['obsidX']
else:
ObsID_ = []
for n in list(np.arange(mvmobj['T'].shape[0])+1):
ObsID_.append('Obs #'+str(n))
T_matrix=mvmobj['T']
else:
if isinstance(Xnew,np.ndarray):
X_=Xnew.copy()
ObsID_ = []
for n in list(np.arange(Xnew.shape[0])+1):
ObsID_.append('Obs #'+str(n))
elif isinstance(Xnew,pd.DataFrame):
X_=np.array(Xnew.values[:,1:]).astype(float)
ObsID_ = Xnew.values[:,0].astype(str)
ObsID_ = ObsID_.tolist()
if 'Q' in mvmobj:
xpred=phi.pls_pred(X_,mvmobj)
else:
xpred=phi.pca_pred(X_,mvmobj)
T_matrix=xpred['Tnew']
ObsNum_=[]
for n in list(range(1,len(ObsID_)+1)):
ObsNum_.append('Obs #'+str(n))
if isinstance(CLASSID,bool): # No CLASSIDS
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Score_Line_"+rnd_num+".html",title='Score Line t['+str(dim[0])+ ']',mode='inline')
y_=T_matrix[:,[dim[0]-1]]
x_=list(range(1,y_.shape[0]+1))
source = ColumnDataSource(data=dict(x=x_, y=y_,ObsID=ObsID_,ObsNum=ObsNum_))
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("Obs#", "@ObsNum"),
("(x,y)", "($x, $y)"),
("Obs: ","@ObsID")
]
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=plotwidth,height=plotheight, title='Score Line t['+str(dim[0])+']' )
#p.circle('x', 'y', source=source,size=7)
p.scatter('x', 'y', source=source,size=10)
if plotline:
p.line('x', 'y', source=source)
if add_ci:
lim95,lim99=phi.single_score_conf_int(mvmobj['T'][:,[dim[0]-1]])
p.line(x_, lim95,line_color="gold",line_dash='dashed')
p.line(x_,-lim95,line_color="gold",line_dash='dashed')
p.line(x_, lim99,line_color="red",line_dash='dashed')
p.line(x_,-lim99,line_color="red",line_dash='dashed')
if add_labels:
labelsX = LabelSet(x='x', y='y', text='ObsID', level='glyph',x_offset=5, y_offset=5, source=source)
p.add_layout(labelsX)
p.xaxis.axis_label = 'Observation'
p.yaxis.axis_label = 't ['+str(dim[0])+']'
show(p)
else: # YES CLASSIDS
#Classes_=np.unique(CLASSID[colorby]).tolist()
Classes_=phi.unique(CLASSID,colorby)
A=len(Classes_)
#colormap =cm.get_cmap("rainbow")
colormap = matplotlib.colormaps['rainbow']
different_colors=A
color_mapping=colormap(np.linspace(0,1,different_colors),1,True)
bokeh_palette=["#%02x%02x%02x" % (r, g, b) for r, g, b in color_mapping[:,0:3]]
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Score_Line_"+rnd_num+".html",title='Score Line t['+str(dim[0])+ ']',mode='inline')
y_=T_matrix[:,[dim[0]-1]]
x_=list(range(1,y_.shape[0]+1))
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("Obs#", "@ObsNum"),
("(x,y)", "($x, $y)"),
("Obs: ","@ObsID"),
("Class:","@Class")
]
classid_=list(CLASSID[colorby])
legend_it = []
p = figure(tools=TOOLS, tooltips=TOOLTIPS,toolbar_location="above",width=plotwidth,height=plotheight, title='Score Line t['+str(dim[0])+ ']')
for classid_in_turn in Classes_:
x_aux=[]
y_aux=[]
obsid_aux=[]
classid_aux=[]
obsnum_aux=[]
for i in list(range(len(ObsID_))):
if classid_[i]==classid_in_turn:
x_aux.append(x_[i])
y_aux.append(y_[i][0])
obsid_aux.append(ObsID_[i])
obsnum_aux.append(ObsNum_[i])
classid_aux.append(classid_in_turn)
source = ColumnDataSource(data=dict(x=x_aux, y=y_aux,ObsID=obsid_aux,ObsNum=obsnum_aux,Class=classid_aux))
color_=bokeh_palette[Classes_.index(classid_in_turn)]
#c=p.circle('x','y',source=source,color=color_)
c=p.scatter('x','y',source=source,color=color_)
if plotline:
c1=p.line('x','y',source=source,color=color_)
#added to allow numbers in classids
aux_=classid_in_turn
if isinstance(aux_,(float,int)):
aux_=str(aux_)
#
if add_legend and plotline:
# legend_it.append((classid_in_turn, [c,c1]))
legend_it.append((aux_, [c,c1]))
if add_legend and not(plotline):
# legend_it.append((classid_in_turn, [c]))
legend_it.append((aux_, [c]))
if add_labels:
labelsX = LabelSet(x='x', y='y', text='ObsID', level='glyph',x_offset=5, y_offset=5, source=source)
p.add_layout(labelsX)
if add_ci:
lim95,lim99=phi.single_score_conf_int(mvmobj['T'][:,[dim[0]-1]])
p.line(x_, lim95,line_color="gold",line_dash='dashed')
p.line(x_,-lim95,line_color="gold",line_dash='dashed')
p.line(x_, lim99,line_color="red",line_dash='dashed')
p.line(x_,-lim99,line_color="red",line_dash='dashed')
p.xaxis.axis_label = 'Observation'
p.yaxis.axis_label = 't ['+str(dim[0])+']'
if add_legend:
legend = Legend(items=legend_it, location='top_right')
p.add_layout(legend, 'right')
legend.click_policy="hide"
show(p)
return
[docs]
def diagnostics(mvmobj,*,Xnew=False,Ynew=False,score_plot_xydim=False,plotwidth=600,ht2_logscale=False,spe_logscale=False):
"""Hotelling's T2 and SPE
diagnostics(mvmobj,*,Xnew=False,Ynew=False,score_plot_xydim=False,plotwidth=600,ht2_logscale=False,spe_logscale=False):
Args:
mvmobj: A model created with phi.pca or phi.pls
Optional Parameters:
Xnew/Ynew: Data used to calculate diagnostics[numpy arrays or pandas dataframes]
score_plot_xydim: will add a score scatter plot at the bottom
if sent with a list of [dimx, dimy] where dimx/dimy
are integers and refer to the latent space to plot
in the x and y axes of the scatter plot. e.g. [1,2] will
add a t1-t2 plot
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
if isinstance(score_plot_xydim,bool):
add_score_plot = False
else:
add_score_plot = True
if isinstance(Xnew,bool): #No Xnew was given need to plot all from model
if 'obsidX' in mvmobj:
ObsID_=mvmobj['obsidX']
else:
ObsID_ = []
for n in list(np.arange(mvmobj['T'].shape[0])+1):
ObsID_.append('Obs #'+str(n))
Obs_num = np.arange(mvmobj['T'].shape[0])+1
if add_score_plot and not(isinstance(score_plot_xydim,bool)):
t_x = mvmobj['T'][:,[score_plot_xydim[0]-1]]
t_y = mvmobj['T'][:,[score_plot_xydim[1]-1]]
else:
add_score_plot = False
t2_ = mvmobj['T2']
spex_ = mvmobj['speX']
if ht2_logscale:
t2_=np.log10(t2_)
if spe_logscale:
spex_= np.log10(spex_)
if not(add_score_plot):
if 'Q' in mvmobj:
spey_=1
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,t2=t2_,spex=spex_,spey=mvmobj['speY']))
else:
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,t2=t2_,spex=spex_))
else:
if 'Q' in mvmobj:
spey_=1
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,t2=t2_,spex=spex_,spey=mvmobj['speY'],tx=t_x,ty=t_y))
else:
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,t2=t2_,spex=spex_,tx=t_x,ty=t_y))
else: #Xnew was given
if isinstance(Xnew,np.ndarray):
ObsID_ = []
for n in list(np.arange(Xnew.shape[0])+1):
ObsID_.append('Obs #'+str(n))
elif isinstance(Xnew,pd.DataFrame):
X_=np.array(Xnew.values[:,1:]).astype(float)
ObsID_ = Xnew.values[:,0].astype(str)
ObsID_ = ObsID_.tolist()
if add_score_plot and not(isinstance(score_plot_xydim,bool)):
if 'Q' in mvmobj:
xpred=phi.pls_pred(X_,mvmobj)
else:
xpred=phi.pca_pred(X_,mvmobj)
T_matrix=xpred['Tnew']
t_x = T_matrix[:,[score_plot_xydim[0]-1]]
t_y = T_matrix[:,[score_plot_xydim[1]-1]]
else:
add_score_plot = False
t2_ = phi.hott2(mvmobj,Xnew=Xnew)
Obs_num = np.arange(t2_.shape[0])+1
if 'Q' in mvmobj and not(isinstance(Ynew,bool)):
spex_,spey_ = phi.spe(mvmobj,Xnew,Ynew=Ynew)
else:
spex_ = phi.spe(mvmobj,Xnew)
spey_ = False
if ht2_logscale:
t2_=np.log10(t2_)
if spe_logscale:
spex_= np.log10(spex_)
ObsNum_=[]
for n in list(range(1,len(ObsID_)+1)):
ObsNum_.append('Obs #'+str(n))
if not(add_score_plot):
if 'Q' in mvmobj and not(isinstance(Ynew,bool)):
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,ObsNum=ObsNum_,t2=t2_,spex=spex_,spey=spey_))
else:
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,ObsNum=ObsNum_,t2=t2_,spex=spex_))
else:
if 'Q' in mvmobj and not(isinstance(Ynew,bool)):
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,ObsNum=ObsNum_,t2=t2_,spex=spex_,spey=spey_,tx=t_x,ty=t_y))
else:
source = ColumnDataSource(data=dict(x=Obs_num, ObsID=ObsID_,ObsNum=ObsNum_,t2=t2_,spex=spex_,tx=t_x,ty=t_y))
TOOLS = "save,wheel_zoom,box_zoom,reset,lasso_select"
TOOLTIPS = [
("Obs #", "@x"),
("(x,y)", "($x, $y)"),
("Obs: ","@ObsID")
]
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Diagnostics"+rnd_num+".html",title='Diagnostics',mode='inline')
p = figure(tools=TOOLS, tooltips=TOOLTIPS, width=plotwidth, title="Hotelling's T2")
#p.circle('x','t2',source=source)
p.scatter('x','t2',source=source)
if ht2_logscale:
p.line([0,Obs_num[-1]],[np.log10(mvmobj['T2_lim95']),np.log10(mvmobj['T2_lim95'])],line_color='gold')
p.line([0,Obs_num[-1]],[np.log10(mvmobj['T2_lim99']),np.log10(mvmobj['T2_lim99'])],line_color='red')
else:
p.line([0,Obs_num[-1]],[mvmobj['T2_lim95'],mvmobj['T2_lim95']],line_color='gold')
p.line([0,Obs_num[-1]],[mvmobj['T2_lim99'],mvmobj['T2_lim99']],line_color='red')
p.xaxis.axis_label = 'Observation sequence'
p.yaxis.axis_label = "HT2"
p_list=[p]
p = figure(tools=TOOLS, tooltips=TOOLTIPS, width=plotwidth, title='SPE X')
#p.circle('x','spex',source=source)
p.scatter('x','spex',source=source)
if spe_logscale:
p.line([0,Obs_num[-1]],[np.log10(mvmobj['speX_lim95']),np.log10(mvmobj['speX_lim95'])],line_color='gold')
p.line([0,Obs_num[-1]],[np.log10(mvmobj['speX_lim99']),np.log10(mvmobj['speX_lim99'])],line_color='red')
else:
p.line([0,Obs_num[-1]],[mvmobj['speX_lim95'],mvmobj['speX_lim95']],line_color='gold')
p.line([0,Obs_num[-1]],[mvmobj['speX_lim99'],mvmobj['speX_lim99']],line_color='red')
p.xaxis.axis_label = 'Observation sequence'
p.yaxis.axis_label = 'SPE X-Space'
p_list.append(p)
p = figure(tools=TOOLS, tooltips=TOOLTIPS, width=plotwidth, title='Outlier Map')
#p.circle('t2','spex',source=source)
p.scatter('t2','spex',source=source)
if ht2_logscale:
vline = Span(location=np.log10(mvmobj['T2_lim99']), dimension='height', line_color='red', line_width=1)
else:
vline = Span(location=mvmobj['T2_lim99'], dimension='height', line_color='red', line_width=1)
if spe_logscale:
hline = Span(location=np.log10(mvmobj['speX_lim99']), dimension='width', line_color='red', line_width=1)
else:
hline = Span(location=mvmobj['speX_lim99'], dimension='width', line_color='red', line_width=1)
p.renderers.extend([vline, hline])
p.xaxis.axis_label = "Hotelling's T2"
p.yaxis.axis_label = 'SPE X-Space'
p_list.append(p)
if 'Q' in mvmobj and not(isinstance(spey_,bool)):
p = figure(tools=TOOLS, tooltips=TOOLTIPS, height=400, title='SPE Y')
#p.circle('x','spey',source=source)
p.scatter('x','spey',source=source,size=10)
p.line([0,Obs_num[-1]],[mvmobj['speY_lim95'],mvmobj['speY_lim95']],line_color='gold')
p.line([0,Obs_num[-1]],[mvmobj['speY_lim99'],mvmobj['speY_lim99']],line_color='red')
p.xaxis.axis_label = 'Observation sequence'
p.yaxis.axis_label = 'SPE Y-Space'
p_list.append(p)
if add_score_plot:
p = figure(tools=TOOLS, tooltips=TOOLTIPS, width=plotwidth, title='Score Scatter')
#p.circle('tx', 'ty', source=source,size=7)
p.scatter('tx', 'ty', source=source,size=10)
T_aux1=mvmobj['T'][:,[score_plot_xydim[0]-1]]
T_aux2=mvmobj['T'][:,[score_plot_xydim[1]-1]]
T_aux = np.hstack((T_aux1,T_aux2))
st=(T_aux.T @ T_aux)/T_aux.shape[0]
[xd95,xd99,yd95p,yd95n,yd99p,yd99n]=phi.scores_conf_int_calc(st,mvmobj['T'].shape[0])
p.line(xd95,yd95p,line_color="gold",line_dash='dashed')
p.line(xd95,yd95n,line_color="gold",line_dash='dashed')
p.line(xd99,yd99p,line_color="red",line_dash='dashed')
p.line(xd99,yd99n,line_color="red",line_dash='dashed')
p.xaxis.axis_label = 't ['+str(score_plot_xydim[0])+']'
p.yaxis.axis_label = 't ['+str(score_plot_xydim[1])+']'
# Vertical line
vline = Span(location=0, dimension='height', line_color='black', line_width=2)
# Horizontal line
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([vline, hline])
#Do another p.figure
p_list.append(p)
show(column(p_list))
return
[docs]
def predvsobs(mvmobj,X,Y,*,CLASSID=False,colorby=False,x_space=False):
""" Plot observed vs predicted values
predvsobs(mvmobj,X,Y,*,CLASSID=False,colorby=False,x_space=False)
Args:
mvmobj: A model created with phi.pca or phi.pls
X/Y: Data [numpy arrays or pandas dataframes]
Optional Parameters:
CLASSID: Pandas Data Frame with classifiers per observation, each column is a class
colorby: one of the classes in CLASSID to color by
x_space: = 'False' will skip plotting the obs. vs pred for X *default*
'True' will also plot obs vs pred for X
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
num_varX=mvmobj['P'].shape[0]
if isinstance(X,np.ndarray):
X_=X.copy()
ObsID_ = []
for n in list(np.arange(X.shape[0])+1):
ObsID_.append('Obs #'+str(n))
XVarID_ = []
for n in list(np.arange(X.shape[1])+1):
XVarID_.append('Var #'+str(n))
elif isinstance(X,pd.DataFrame):
X_=np.array(X.values[:,1:]).astype(float)
ObsID_ = X.values[:,0].astype(str)
ObsID_ = ObsID_.tolist()
elif isinstance(X,dict):
X_=X.copy()
k=list(X.keys())
ObsID_=X[k[0]].values[:,0].astype(str)
ObsID_ = ObsID_.tolist()
if 'varidX' in mvmobj:
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(num_varX)+1):
XVar.append('XVar #'+str(n))
if 'Q' in mvmobj:
num_varY=mvmobj['Q'].shape[0]
if 'varidY' in mvmobj:
YVar=mvmobj['varidY']
else:
YVar = []
for n in list(np.arange(num_varY)+1):
YVar.append('YVar #'+str(n))
if isinstance(Y,np.ndarray):
Y_=Y.copy()
elif isinstance(Y,pd.DataFrame):
Y_=np.array(Y.values[:,1:]).astype(float)
if 'Q' in mvmobj:
pred=phi.pls_pred(X_,mvmobj)
yhat=pred['Yhat']
if x_space:
xhat=pred['Xhat']
else:
xhat=False
else:
x_space=True
pred=phi.pca_pred(X_,mvmobj)
xhat=pred['Xhat']
yhat=False
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("index", "$index"),
("(x,y)", "($x, $y)"),
("Obs: ","@ObsID")
]
if isinstance(CLASSID,bool): # No CLASSIDS
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("ObsvsPred_"+rnd_num+".html",title='ObsvsPred',mode='inline')
plot_counter=0
if not(isinstance(yhat,bool)): #skip if PCA model sent
for i in list(range(Y_.shape[1])):
x_ = Y_[:,i]
y_ = yhat[:,i]
min_value = np.nanmin([np.nanmin(x_),np.nanmin(y_)])
max_value = np.nanmax([np.nanmax(x_),np.nanmax(y_)])
source = ColumnDataSource(data=dict(x=x_, y=y_,ObsID=ObsID_))
#p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=600, height=600, title=YVar[i])
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=600, height=600, title=YVar[i],x_range=(min_value, max_value),y_range=(min_value, max_value))
#p.circle('x', 'y', source=source,size=7,color='darkblue')
p.scatter('x', 'y', source=source,size=7,color='darkblue')
p.line([min_value,max_value],[min_value,max_value],line_color='cyan',line_dash='dashed')
p.xaxis.axis_label ='Observed'
p.yaxis.axis_label ='Predicted'
if plot_counter==0:
p_list=[p]
else:
p_list.append(p)
plot_counter = plot_counter+1
if x_space: #
for i in list(range(X_.shape[1])):
x_ = X_[:,i]
y_ = xhat[:,i]
min_value = np.nanmin([np.nanmin(x_),np.nanmin(y_)])
max_value = np.nanmax([np.nanmax(x_),np.nanmax(y_)])
source = ColumnDataSource(data=dict(x=x_, y=y_,ObsID=ObsID_))
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=600, height=600, title=XVar[i],x_range=(min_value, max_value),y_range=(min_value, max_value))
#p.circle('x', 'y', source=source,size=7,color='darkblue')
p.scatter('x', 'y', source=source,size=10,color='darkblue')
p.line([min_value,max_value],[min_value,max_value],line_color='cyan',line_dash='dashed')
p.xaxis.axis_label ='Observed'
p.yaxis.axis_label ='Predicted'
if plot_counter==0:
p_list=[p]
else:
p_list.append(p)
plot_counter = plot_counter+1
show(column(p_list))
else: # YES CLASSIDS
#Classes_=np.unique(CLASSID[colorby]).tolist()
Classes_=phi.unique(CLASSID,colorby)
different_colors=len(Classes_)
#colormap =cm.get_cmap("rainbow")
colormap = matplotlib.colormaps['rainbow']
color_mapping=colormap(np.linspace(0,1,different_colors),1,True)
bokeh_palette=["#%02x%02x%02x" % (r, g, b) for r, g, b in color_mapping[:,0:3]]
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("ObsvsPred_"+rnd_num+".html",title='ObsvsPred',mode='inline')
classid_=list(CLASSID[colorby])
plot_counter=0
if not(isinstance(yhat,bool)): #skip if PCA model sent
for i in list(range(Y_.shape[1])):
x_ = Y_[:,i]
y_ = yhat[:,i]
min_value = np.nanmin([np.nanmin(x_),np.nanmin(y_)])
max_value = np.nanmax([np.nanmax(x_),np.nanmax(y_)])
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=600, height=600, title=YVar[i],x_range=(min_value, max_value),y_range=(min_value, max_value))
for classid_in_turn in Classes_:
x_aux=[]
y_aux=[]
obsid_aux=[]
classid_aux=[]
for i in list(range(len(ObsID_))):
if classid_[i]==classid_in_turn and not(np.isnan(x_[i])):
x_aux.append(x_[i])
y_aux.append(y_[i])
obsid_aux.append(ObsID_[i])
classid_aux.append(classid_in_turn)
source = ColumnDataSource(data=dict(x=x_aux, y=y_aux,ObsID=obsid_aux,Class=classid_aux))
color_=bokeh_palette[Classes_.index(classid_in_turn)]
#p.circle('x','y',source=source,color=color_,legend_label=classid_in_turn)
p.scatter('x','y',source=source,color=color_,legend_label=classid_in_turn)
p.line([min_value,max_value],[min_value,max_value],line_color='cyan',line_dash='dashed')
p.xaxis.axis_label ='Observed'
p.yaxis.axis_label ='Predicted'
p.legend.click_policy="hide"
p.legend.location = "top_left"
if plot_counter==0:
p_list=[p]
plot_counter = plot_counter+1
else:
p_list.append(p)
if x_space: #
for i in list(range(X_.shape[1])):
x_ = X_[:,i]
y_ = xhat[:,i]
min_value = np.nanmin([np.nanmin(x_),np.nanmin(y_)])
max_value = np.nanmax([np.nanmax(x_),np.nanmax(y_)])
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=600, height=600, title=XVar[i],x_range=(min_value, max_value),y_range=(min_value, max_value))
for classid_in_turn in Classes_:
x_aux=[]
y_aux=[]
obsid_aux=[]
classid_aux=[]
for i in list(range(len(ObsID_))):
if classid_[i]==classid_in_turn and not(np.isnan(x_[i])):
x_aux.append(x_[i])
y_aux.append(y_[i])
obsid_aux.append(ObsID_[i])
classid_aux.append(classid_in_turn)
source = ColumnDataSource(data=dict(x=x_aux, y=y_aux,ObsID=obsid_aux,Class=classid_aux))
color_=bokeh_palette[Classes_.index(classid_in_turn)]
#p.circle('x','y',source=source,color=color_,legend_label=classid_in_turn)
p.scatter('x','y',source=source,color=color_,legend_label=classid_in_turn)
p.line([min_value,max_value],[min_value,max_value],line_color='cyan',line_dash='dashed')
p.xaxis.axis_label ='Observed'
p.yaxis.axis_label ='Predicted'
p.legend.click_policy="hide"
p.legend.location = "top_left"
if plot_counter==0:
p_list=[p]
plot_counter = plot_counter+1
else:
p_list.append(p)
show(column(p_list))
return
[docs]
def contributions_plot(mvmobj,X,cont_type,*,Y=False,from_obs=False,to_obs=False,lv_space=False,plotwidth=800,plotheight=600,xgrid=False):
"""Plot contributions to diagnostics
contributions_plot(mvmobj,X,cont_type,*,Y=False,from_obs=False,to_obs=False,lv_space=False,plotwidth=800,plotheight=600,xgrid=False):
Args:
mvmobj : A dictionary created by phi.pls or phi.pca
X/Y: Data [numpy arrays or pandas dataframes] - Y space is optional
cont_type: 'ht2'
'spe'
'scores'
to_obs: Scalar or list of scalars with observation(s) number(s)| first element is #0
- OR -
Strings or list of strings with observation(s) name(s) [if X/Y are pandas data frames]
Optional Parameters:
from_obs: Scalar or list of scalars with observation(s) number(s) | first element is #0
- OR -
Strings or list of strings with observation(s) name(s) [if X/Y are pandas data frames]
Used to off set calculations for scores or ht2
"False' will calculate with respect to origin *default if not sent*
*Note: from_obs is ignored when cont_type='spe'*
lv_space: Latent spaces over which to do the calculations [applicable to 'ht2' and 'scores']
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
good_to_go=True
if isinstance(X,pd.DataFrame):
ObsID=X.values[:,0].tolist()
if isinstance(to_obs,str):
to_obs_=ObsID.index(to_obs)
elif isinstance(to_obs,int):
to_obs_=to_obs
elif isinstance(to_obs,list):
if isinstance(to_obs[0],str):
to_obs_=[]
for o in to_obs:
to_obs_.append(ObsID.index(o))
elif isinstance(to_obs[0],int):
to_obs_=to_obs.copy()
elif isinstance(to_obs,bool):
good_to_go=False
if not(isinstance(from_obs,bool)):
if isinstance(from_obs,str):
from_obs_=ObsID.index(from_obs)
elif isinstance(from_obs,int):
from_obs_=from_obs
elif isinstance(from_obs,list):
if isinstance(from_obs[0],str):
from_obs_=[]
for o in from_obs:
from_obs_.append(ObsID.index(o))
elif isinstance(from_obs[0],int):
from_obs_=from_obs.copy()
else:
from_obs_=False
else:
if isinstance(to_obs,int) or isinstance(to_obs,list):
to_obs_=to_obs.copy()
else:
good_to_go=False
if cont_type=='scores' and not(isinstance(Y,bool)):
Y=False
if isinstance(Y,bool) and good_to_go:
Xconts=phi.contributions(mvmobj,X,cont_type,Y=False,from_obs=from_obs_,to_obs=to_obs_,lv_space=lv_space)
Yconts=False
elif not(isinstance(Y,bool)) and good_to_go and ('Q' in mvmobj) and cont_type=='spe':
Xconts,Yconts=phi.contributions(mvmobj,X,cont_type,Y=Y,from_obs=from_obs_,to_obs=to_obs_,lv_space=lv_space)
if 'varidX' in mvmobj:
XVar=mvmobj['varidX']
else:
XVar = []
for n in list(np.arange(mvmobj['P'].shape[0])+1):
XVar.append('XVar #'+str(n))
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Contributions"+rnd_num+".html",title='Contributions',mode='inline')
if isinstance(from_obs,list):
from_txt=", ".join(map(str, from_obs))
from_txt=" from obs: "+from_txt
elif isinstance(from_obs,int):
from_txt=" from obs: "+str(from_obs)
elif isinstance(from_obs,str):
from_txt=" from obs: " + from_obs
else:
from_txt=""
if isinstance(to_obs,list):
to_txt=", ".join(map(str, to_obs))
to_txt=", to obs: "+to_txt
elif isinstance(to_obs,str):
to_txt=", to obs: " + to_obs
elif isinstance(to_obs,int):
to_txt =", to obs: "+ str(to_obs)
else:
to_txt=""
TOOLTIPS = [
("Variable","@names")
]
p = figure(x_range=XVar, height=plotheight,width=plotwidth, title="Contributions Plot"+from_txt+to_txt,
tools="save,box_zoom,pan,reset",tooltips=TOOLTIPS)
source1 = ColumnDataSource(data=dict(x_=XVar, y_=Xconts[0].tolist(),names=XVar))
#p.vbar(x=XVar, top=Xconts[0].tolist(), width=0.5)
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.ygrid.grid_line_color = None
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = 'Contributions to '+cont_type
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
p_list=[p]
if not(isinstance(Yconts,bool)):
if 'varidY' in mvmobj:
YVar=mvmobj['varidY']
else:
YVar = []
for n in list(np.arange(mvmobj['Q'].shape[0])+1):
YVar.append('YVar #'+str(n))
p = figure(x_range=YVar, height=plotheight,width=plotwidth, title="Contributions Plot",
tools="save,box_zoom,pan,reset")
#p.vbar(x=YVar, top=Yconts[0].tolist(), width=0.5)
source1 = ColumnDataSource(data=dict(x_=YVar, y_=Yconts[0].tolist(),names=YVar))
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.ygrid.grid_line_color = None
if xgrid:
p.xgrid.grid_line_color = 'lightgray'
else:
p.xgrid.grid_line_color = None
p.yaxis.axis_label = 'Contributions to '+cont_type
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
p.xaxis.major_label_orientation = 45
p_list.append(p)
show(column(p_list))
return
[docs]
def mb_weights(mvmobj,*,plotwidth=600,plotheight=400):
"""Super weights for Multi-block models
mb_weights(mvmobj,*,plotwidth=600,plotheight=400)
Args:
mvmobj: A multi-block PLS model created with phi.mbpls
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
A= mvmobj['T'].shape[1]
lv_prefix='LV #'
lv_labels = []
for a in list(np.arange(A)+1):
lv_labels.append(lv_prefix+str(a))
XVar=mvmobj['Xblocknames']
for i in list(np.arange(A)):
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("blockweights_"+rnd_num+".html",title="Block Weights",mode='inline')
px = figure(x_range=XVar, title="Block weights for MBPLS"+lv_labels[i],
tools="save,box_zoom,hover,reset", tooltips=[("Var:","@x_")],width=plotwidth,height=plotheight)
source1 = ColumnDataSource(data=dict(x_=XVar, y_=mvmobj['Wt'][:,i].tolist(),names=XVar))
px.vbar(x='x_', top='y_', source=source1,width=0.5)
px.y_range.range_padding = 0.1
px.ygrid.grid_line_color = None
px.axis.minor_tick_line_color = None
px.outline_line_color = None
px.yaxis.axis_label = 'Wt'+str(i+1)+']'
px.xaxis.major_label_orientation = 45
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
px.renderers.extend([hline])
if i==0:
p_list=[px]
else:
p_list.append(px)
show(column(p_list))
return
[docs]
def mb_r2pb(mvmobj,*,plotwidth=600,plotheight=400):
"""R2 for each block for Multi-block models
mb_r2pb(mvmobj,*,plotwidth=600,plotheight=400)
Args:
mvmobj: A multi-block PLS model created with phi.mbpls
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
A= mvmobj['T'].shape[1]
lv_prefix='LV #'
lv_labels = []
for a in list(np.arange(A)+1):
lv_labels.append(lv_prefix+str(a))
r2pbX_dict = {'XVar': mvmobj['Xblocknames']}
XVar=mvmobj['Xblocknames']
for i in list(np.arange(A)):
r2pbX_dict.update({lv_labels[i] : mvmobj['r2pbX'][:,i].tolist()})
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("r2perblock"+rnd_num+".html",title="R2 per Block",mode='inline')
#colormap =cm.get_cmap("rainbow")
colormap = matplotlib.colormaps['rainbow']
different_colors=A
color_mapping=colormap(np.linspace(0,1,different_colors),1,True)
bokeh_palette=["#%02x%02x%02x" % (r, g, b) for r, g, b in color_mapping[:,0:3]]
px = figure(x_range=XVar, title="r2 per Block for MBPLS",
tools="save,box_zoom,hover,reset", tooltips="$name @XVar: @$name",width=plotwidth,height=plotheight)
px.vbar_stack(lv_labels, x='XVar', width=0.9,color=bokeh_palette,source=r2pbX_dict)
px.y_range.range_padding = 0.1
px.ygrid.grid_line_color = None
px.axis.minor_tick_line_color = None
px.outline_line_color = None
px.yaxis.axis_label = 'R2 per Block per LV'
px.xaxis.major_label_orientation = 45
show(px)
return
[docs]
def mb_vip(mvmobj,*,plotwidth=600,plotheight=400):
"""VIP per block for Multi-block models
mb_vip(mvmobj,*,plotwidth=600,plotheight=400)
Args:
mvmobj: A multi-block PLS model created with phi.mbpls
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
A= mvmobj['T'].shape[1]
XVar=mvmobj['Xblocknames']
Wt=mvmobj['Wt']
r2y=mvmobj['r2y']
vip=np.zeros((Wt.shape[0],1))
if A>1:
for a in list(range(A)):
vip=vip+Wt[:,[a]]*r2y[a]
else:
vip=Wt[:,[0]]*r2y
vip=np.reshape(vip,-1)
index=np.argsort(vip)
index=index[::-1]
XVar_=[XVar[i] for i in index]
XVar = XVar_
vip=vip[index]
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("blockvip"+rnd_num+".html",title="Block VIP",mode='inline')
source1 = ColumnDataSource(data=dict(x_=XVar, y_=vip.tolist(),names=XVar))
px = figure(x_range=XVar, title="Block VIP for MBPLS",
tools="save,box_zoom,hover,reset",tooltips=[("Block:","@x_")],width=plotwidth,height=plotheight)
px.vbar(x='x_', top='y_', source=source1,width=0.5)
px.y_range.range_padding = 0.1
px.ygrid.grid_line_color = None
px.axis.minor_tick_line_color = None
px.outline_line_color = None
px.yaxis.axis_label = 'Block VIP'
px.xaxis.major_label_orientation = 45
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
px.renderers.extend([hline])
show(px)
return
[docs]
def barplot(yheights,*,plotwidth=600,plotheight=600,
addtitle='',xlabel='',ylabel='',xtick_labels=False,tabtitle='Bar Plot'):
""" Generic Bar plot with Bokeh
barplot(yheights,*,plotwidth=600,plotheight=600,
addtitle='',xlabel='',ylabel='',xtick_labels=False,tabtitle='Bar Plot'):
Args:
yheights: Values of bars
xtick_labels: Variable identifiers for x axis
by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
rnd_num=timestr()
output_file("BarPlot_"+rnd_num+".html",title=tabtitle,mode='inline')
TOOLTIPS = [
("Variable","@names")
]
p = figure(x_range=xtick_labels, title=addtitle,
tools="save,box_zoom,pan,reset",tooltips=TOOLTIPS,width=plotwidth)
source1 = ColumnDataSource(data=dict(x_=xtick_labels, y_=yheights,names=xtick_labels))
p.vbar(x='x_', top='y_', source=source1,width=0.5)
p.xgrid.grid_line_color = None
p.yaxis.axis_label = ylabel
p.xaxis.axis_label = xlabel
p.xaxis.major_label_orientation = 45
show(p)
return
[docs]
def lineplot(X,col_name,*,plot_title='Main Title',tab_title='Tab Title',
xaxis_label='X- axis',plotheight=400,plotwidth=600,
linecolor='blue',linewidth=2,marker=False):
""" Simple way to plot a column of a Pandas DataFrame with Bokeh.
lineplot(X,col_name,*,plot_title='Main Title',tab_title='Tab Title',
xaxis_label='X- axis',plotheight=400,plotwidth=600,
linecolor='blue',linewidth=2,marker=False)
Args:
X: A a pandas object with Data to be plotted,first column is obs id
col_name: The list with names of the column to plot
Optional Parameters:
plot_title
tab_title
xaxis_label
yaxis_label
plotheight
plotwidth
Programmed by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
if isinstance(col_name,str):
col_name=[col_name]
first_plot=True
TOOLS = "save,wheel_zoom,box_zoom,pan,reset,box_select,lasso_select"
TOOLTIPS = [
("Obs #", "@ObsNum"),
("(x,y)", "($x, $y)"),
("Obs: ","@ObsID")
]
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("LinePlot"+rnd_num+".html",title=tab_title,mode='inline')
if isinstance(X,pd.DataFrame):
for this_col_name in col_name:
ObsID_=X.values[:,0]
ObsID_=ObsID_.tolist()
aux=X.loc[:,this_col_name]
y_=aux.values
x_=list(range(1,len(ObsID_)+1))
ObsNum_=[]
for n in list(range(1,len(ObsID_)+1)):
ObsNum_.append('Obs #'+str(n))
if not(first_plot):
plot_title=''
p = figure(tools=TOOLS, tooltips=TOOLTIPS,width=plotwidth,height=plotheight,title=plot_title)
source = ColumnDataSource(data=dict(x=x_, y=y_,ObsID=ObsID_,ObsNum=ObsNum_))
p.xaxis.axis_label = xaxis_label
p.yaxis.axis_label = this_col_name
p.line('x', 'y', source=source,line_color=linecolor,line_width=linewidth)
#p.circle('x', 'y', source=source)
hline = Span(location=0, dimension='width', line_color='black', line_width=2)
p.renderers.extend([hline])
if marker:
p.scatter('x', 'y', source=source)
if first_plot:
p_list=[p]
first_plot=False
else:
p_list.append(p)
show(column(p_list))
return
[docs]
def plot_spectra(X,*,xaxis=False,plot_title='Main Title',tab_title='Tab Title',
xaxis_label='X- axis',yaxis_label='Y- axis',
linecolor='blue',linewidth=2):
"""Simple way to plot Spectra with Bokeh.
plot_spectra(X,*,xaxis=False,plot_title='Main Title',tab_title='Tab Title',
xaxis_label='X- axis',yaxis_label='Y- axis',
linecolor='blue',linewidth=2)
Args:
X: A numpy array or a pandas object with Spectra to be plotted
xaxis: wavenumbers or wavelengths to index the x axis of the plot
* ignored if X is a pandas dataframe *
Optional Parameters:
plot_title
tab_title
xaxis_label
yaxis_label
Programmed by Salvador Garcia-Munoz
(sgarciam@ic.ac.uk ,salvadorgarciamunoz@gmail.com)
"""
if isinstance(X,pd.DataFrame):
x=X.columns[1:].tolist()
x=np.array(x)
x=np.reshape(x,(1,-1))
y=X.values[:,1:].astype(float)
elif isinstance(X,np.ndarray):
y=X.copy()
if isinstance(xaxis,np.ndarray):
x=xaxis
x=np.reshape(x,(1,-1))
elif isinstance(xaxis,list):
x=np.array(xaxis)
x=np.reshape(x,(1,-1))
elif isinstance(xaxis,bool):
x=np.array(list(range(X.shape[1])))
x=np.reshape(x,(1,-1))
#rnd_num=str(int(np.round(1000*np.random.random_sample())))
rnd_num=timestr()
output_file("Spectra"+rnd_num+".html",title=tab_title,mode='inline')
p = figure(title=plot_title)
p.xaxis.axis_label = xaxis_label
p.yaxis.axis_label = yaxis_label
p.multi_line(x.tolist()*y.shape[0],y.tolist(),line_color=linecolor,line_width=linewidth)
show(p)
return