#!/usr/bin/python
# -*- mode: python; coding: utf-8 -*
# Copyright (c) 2019 Tristan Salles
# Licensed under the GNU LGPL Version 3
import gc
import sys
import time
import numpy as np
import pandas as pd
# For readthedoc...
try:
from mpi4py import MPI
except ImportError:
print('mpi4py is required and needs to be installed via pip')
pass
# For readthedoc...
try:
from skimage import graph
except ImportError:
print('scikit-image is required and needs to be installed via pip')
pass
from pyevtk.hl import gridToVTK
from pylab import rcParams
import matplotlib.pyplot as plt
from scipy.interpolate import RegularGridInterpolator
from mpl_toolkits.axes_grid1 import make_axes_locatable
[docs]class landscapeConnectivity(object):
"""
*Landscape elevational connectivity* (**LEC**) quantifies the closeness of a site to all others with
similar elevation. Such closeness is computed over a *graph* whose edges represent connections
among sites and whose weights are proportional to the cost of spreading through patches at
different elevation.
Method:
E. Bertuzzo et al., 2016: Geomorphic controls on elevational gradients of species richness - PNAS
doi_
.. _doi: http://www.pnas.org/cgi/doi/10.1073/pnas.1518922113
Args:
filename (str): CSV file name containing regularly spaced elevation grid [default: None]
XYZ (3D Numpy Array): 3D coordinates array of shape (nn,3) where nn is the number of points [default: None]
Z (2D Numpy Array): Elevation array of shape (nx,ny) where nx and ny are the number of points along the X and Y axis [default: None]
dx (float): grid spacing in metre when the Z argument defined above is used [default: None]
periodic (bool): applied periodic boundary to the elevation grid see explanation_ [default: False]
symmetric (bool): applied symmetric boundary to the elevation grid see explanation_ [default: False]
sigmap (float): species niche width percentage based on elevation extent [default: 0.1]
sigmav (float): species niche fixed width values [default: None]
diagonals (bool): computes the path based on the diagonal moves as well as the axial ones eg. D4/D8 connectivity [default: True]
delimiter (str): elevation grid csv delimiter [default: ' ']
sl (float): sea level position used to remove marine points from the LEC calculation [default: -1.e6]
.. _explanation: https://biolec.readthedocs.io/en/latest/_images/boundcond.jpg
caution:
There are 3 ways to import the elevation dataset in bioLEC:
* either as a CSV file (argument: filename) containing 3 columns for X, Y and Z respectively with no header and ordered along the X axis first
* or as a 3D numpy array (argument: XYZ) containing the X, Y and Z coordinates here again ordered along the X axis first
* or as a 2D numpy array (argument: Z) containing the elevation matrix, in this case the dx argument is also required
Note:
It is worth noting that the boundary conditions are either symmetric or periodic.
Although LEC simply depends on the elevation field and on the niche width, *LEC predicts well the
alpha-diversity simulated by full metacommunity models*.
"""
def __init__(self, filename=None, XYZ=None, Z=None, dx=None, periodic=False, symmetric=False,
sigmap=0.1, sigmav=None, diagonals=True, delimiter=' ', sl=-1.e6, test=False):
# Set MPI communications
self.comm = MPI.COMM_WORLD
self.size = self.comm.Get_size()
self.rank = self.comm.Get_rank()
self.test = test
if self.rank == 0 and not self.test:
print('bioLEC - LANDSCAPE ELEVATIONAL CONNECTIVITY')
print('-------------------------------------------\n')
# Read DEM file
if filename is not None:
self.demfile = filename
df = pd.read_csv(self.demfile, sep=delimiter, engine='c',
header=None, na_filter=False, dtype=np.float,
low_memory=False)
self.X = df.values[:,0]
self.Y = df.values[:,1]
self.Z = df.values[:,2]
elif XYZ is not None:
if XYZ.shape[1] != 3 :
if self.rank == 0:
print('Problem XYZ variable has to be defined with the following shape (nn,3) with nn the number of points!')
return
self.X = XYZ[:,0]
self.Y = XYZ[:,1]
self.Z = XYZ[:,2]
if self.X[1]==self.X[0]:
if self.rank == 0:
print('Problem XYZ variable have to be ordered along the X axis first!')
return
elif Z is not None:
if dx is None:
if self.rank == 0:
print('When using the Z argument then grid spacing [dx] has to be defined!')
return
nx = Z.shape[0]
ny = Z.shape[1]
xgrid = np.arange(0,nx*dx,dx)
ygrid = np.arange(0,ny*dx,dx)
xi, yi = np.meshgrid(xgrid,ygrid)
self.X = xi.flatten()
self.Y = yi.flatten()
self.Z = Z.flatten()
else:
if self.rank == 0:
print('Problem either filename or XYZ or Z variables have to be declared for the function to run properly!')
return
dx = ( self.X[1] - self.X[0] )
if dx == 0:
if self.rank == 0:
print('Problem dx is set to 0!')
return
self.nx = int((self.X.max() - self.X.min())/dx+1)
self.ny = int((self.Y.max() - self.Y.min())/dx+1)
Z = self.Z.reshape(self.ny,self.nx)
if periodic and symmetric:
print('Warning: periodic boundary conditions have been set for this experiment!')
print('In this current version, boundaries are either symmetric or periodic and cannot be both set to True.')
print('See http://biolec.readthedocs.io/en/latest/usage.html#running-examples for more information')
self.diagonals = diagonals
if periodic:
periodicZ = np.zeros((Z.shape[0]*3,Z.shape[1]*3))
kr0 = 0
for r in range(3):
kr1 = kr0 + Z.shape[0]
kc0 = 0
for c in range(3):
kc1 = kc0 + Z.shape[1]
periodicZ[kr0:kr1,kc0:kc1] = Z
kc0 = kc1
kr0 = kr1
self.data = periodicZ
del periodicZ
gc.collect()
self.nr = self.data.shape[0]
self.nc = self.data.shape[1]
self.nr0 = int(self.nr/3.)
self.nr1 = self.nr0 + int(self.nr/3.)
self.nc0 = int(self.nc/3.)
self.nc1 = self.nc0 + int(self.nc/3.)
elif symmetric:
nZ = []
kr0 = 0
symmetricZ = np.zeros((Z.shape[0]*3,Z.shape[1]*3))
nZ.append(Z[::-1,::-1])
nZ.append(np.flipud(Z))
nZ.append(Z[::-1,::-1])
nZ.append(np.fliplr(Z))
nZ.append(Z)
nZ.append(np.fliplr(Z))
nZ.append(Z[::-1,::-1])
nZ.append(np.flipud(Z))
nZ.append(Z[::-1,::-1])
k=0
for r in range(3):
kr1 = kr0 + Z.shape[0]
kc0 = 0
for c in range(3):
kc1 = kc0 + Z.shape[1]
symmetricZ[kr0:kr1,kc0:kc1] = nZ[k]
kc0 = kc1
k+=1
kr0 = kr1
self.data = symmetricZ
del symmetricZ
gc.collect()
self.nr = self.data.shape[0]
self.nc = self.data.shape[1]
self.nr0 = int(self.nr/3.)
self.nr1 = self.nr0 + int(self.nr/3.)
self.nc0 = int(self.nc/3.)
self.nc1 = self.nc0 + int(self.nc/3.)
else:
self.data = Z
self.nr = self.data.shape[0]
self.nc = self.data.shape[1]
self.nr0 = 0
self.nr1 = self.nr
self.nc0 = 0
self.nc1 = self.nc
del Z
gc.collect()
self.sealevel = sl
minz = max(self.data.min(),self.sealevel)
self.nz = self.data.copy()
self.nz[self.nz<self.sealevel] = -1.e6
if sigmap is not None:
sigma = (self.data.max() - minz) * sigmap
elif sigmav is not None:
sigma = sigmav
else:
sigma = (self.data.max() - minz) * sigmap
if self.rank == 0:
print('WARNING: Species niche width is not specified!')
print('A default width is defined based on elevational range: {:.3f}'.format(sigma))
self.sigma2 = 2. * np.square(sigma)
self.nNodes = self.nc * self.nr
self.LEC = None
return
[docs] def computeMinPath(self, r, c):
"""
**Internal function** to compute the minimum path between a specific node and all other ones.
Args:
c (int): row indice of the consider point
r (int): column indice of a consider point
Returns:
mincost: minimum-cost path for the specified node.
Note:
This function relies on scikit-image_ (image processing in python) and finds
distance-weighted minimum cost paths through an n-d costs array.
.. _scikit-image: https://scikit-image.org/docs/dev/api/skimage.graph.html
"""
# Create the cost surface based on the square of the difference in elevation between the considered
# node and all the others vertices
weight = np.square( self.nz - self.nz[r, c] )
# From the weight-surface we create a 'landscape graph' object which can then be
# analysed using distance-weighted minimum cost path
cost = graph.MCP_Geometric( weight, fully_connected=self.diagonals )
# Calculate the least-cost distance from the start cell to all other cells
return cost.find_costs(starts=[(r, c)])[0]
[docs] def splitRowWise(self):
"""
**Rowwise block partitioning** used to perform LEC computation in parallel.
Returns:
disps (1D array int): number of nodes to consider on each partition
startID (1D array int): starting index for each partition
endID (1D array int): ending index for each partition
Note:
From the number of processors (np) available split the array into equal number row wise.
"""
tmpA = np.zeros((self.nr, self.nc))
split = np.array_split(tmpA, self.size, axis = 0)
split_sizes = []
for i in range(0,len(split),1):
split_sizes = np.append(split_sizes, len(split[i]))
split_sizes_input = split_sizes*self.nr
endID = np.cumsum(split_sizes)
startID = np.zeros(self.size,int)
startID[1:] = np.asarray(endID[0:-1])
disps = np.insert(np.cumsum(split_sizes_input),0,0)[0:-1]
del split_sizes_input
gc.collect()
del split_sizes
gc.collect()
del split
gc.collect()
del tmpA
gc.collect()
return disps, startID, endID.astype(int)
def _test_progress(self, job_title, progress):
length = 20
block = int(round(length*progress))
msg = "\r{0}: [{1}] {2}%".format(job_title, "#"*block + "-"*(length-block), round(progress*100, 2))
if progress >= 1: msg += " DONE\r\n"
sys.stdout.write(msg)
sys.stdout.flush()
[docs] def computeLEC(self, fout=500):
"""
This function computes the **minimum path for all nodes** in a given surface and
**measure of the closeness** of each node to other at similar elevation range.
It then provide the *landscape elevational connectivity* array from computed
measure of closeness calculation.
Args:
fout (int): output frequency [default: 500]
"""
time0 = time.clock()
startID = None
endID = None
disps = None
if self.rank == 0:
disps, startID, endID = self.splitRowWise()
rsID = self.comm.bcast(startID, root=0)
reID = self.comm.bcast(endID, root=0)
if self.rank == 0 and self.size > 1:
print("\n Define grid partition based on row number for parallel processing... " )
self.comm.Barrier()
displacements = self.comm.bcast(disps, root = 0)
del disps
gc.collect()
t1 = time.clock()
if self.rank < self.size-1:
steps = int(displacements[self.rank+1]) - int(displacements[self.rank])
else:
steps = self.nNodes - int(displacements[self.rank])
if self.size > 1 and not self.test:
print(' + Domain for processor {:3d} starts at row {:4d} and end at row {:4d}'.format(self.rank,rsID[self.rank],reID[self.rank]))
del displacements
gc.collect()
self.comm.Barrier()
if self.rank == 0 and not self.test:
print("\n Starting LEC computation... \n " )
lc = reID[self.rank]-rsID[self.rank]
localLEC = np.zeros((self.nr,self.nc))
k = 0
for r in range(rsID[self.rank],reID[self.rank]):
for c in range(0,self.nc):
if k%fout==0 and k>0:
if self.rank == 0 and not self.test:
print(' + Compute closeness between sites in {:.2f} s - completion: {:.2f} %'.format(time.clock()-t1,k*100./steps))
if self.test:
self._test_progress("Test bioLEC installation:", k/steps)
t1 = time.clock()
if self.nz[r,c]>=self.sealevel:
localLEC += np.exp(-self.computeMinPath(r, c)/self.sigma2)
k += 1
del startID
gc.collect()
del endID
gc.collect()
flatLEC = np.reshape(localLEC, (np.product(localLEC.shape),))
del localLEC
gc.collect()
if self.rank==0:
valLEC = np.zeros_like(flatLEC)
else:
valLEC = None
self.comm.Reduce(flatLEC, valLEC, op=MPI.SUM, root=0)
del flatLEC
gc.collect()
if self.rank == 0:
self.LEC = np.reshape(valLEC,(self.nr, self.nc))
if self.test:
self._test_progress("Test bioLEC installation:", k/steps)
if abs(int(self.LEC.sum())-4283765.)<=1.:
if abs(int(self.LEC.max())-760.)<=1.:
if abs(int(self.LEC.min())-26.)<=1.:
print('All tests were successful...')
else:
print('Error when testing installation...',self.LEC.min())
else:
print('Error when testing installation...',self.LEC.max())
else:
print('Error when testing installation...',self.LEC.sum())
del valLEC
gc.collect()
if self.rank == 0 and not self.test:
print('\n Landscape Elevation Connectivity calculation took: {:.2f} s'.format(time.clock()-time0))
return
[docs] def viewResult(self, imName=None, size=(9,5), fsize=11, cmap1=plt.cm.summer_r, cmap2=plt.cm.coolwarm, dpi=200):
"""
This function **plots** and **saves** in a figure the result of the *LEC computation*.
Args:
imName (str): (string) image name with extension
size : size of the image [default: (9,5)]
fsize (int): title font size [default: 11]
cmap1 : color map for elevation grid [default: plt.cm.summer_r]
cmap2 : color map for LEC grid [default: cmap2=plt.cm.coolwarm]
dpi (int): resolution of the saved image [default: 200]
"""
rcParams['figure.figsize'] = size
fig, (ax1, ax2) = plt.subplots(1,2)
fig.suptitle('Elevation grid and associated landscape elevational connectivity', fontsize=fsize)
# SUBPLOT 1
ax1.set_title('Elevation', fontsize=fsize-1)
im1 = ax1.imshow(np.flipud(self.data), vmin=self.data.min(), vmax=self.data.max(),
cmap=cmap1, aspect='auto')
divider1 = make_axes_locatable(ax1)
cax1 = divider1.append_axes("right", size="5%", pad=0.05)
cbar1 = plt.colorbar(im1, cax=cax1)
cbar1.ax.tick_params(labelsize=fsize-3)
# Remove ticks from ax1
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
# SUBPLOT 2
ax2.set_title('LEC', fontsize=fsize-1)
im2 = ax2.imshow(np.flipud(self.LEC), vmin=self.LEC.min(), vmax=self.LEC.max(),
cmap=cmap2, aspect='auto')
divider2 = make_axes_locatable(ax2)
cax2 = divider2.append_axes("right", size="5%", pad=0.05)
cbar2 = plt.colorbar(im2, cax=cax2)
cbar2.ax.tick_params(labelsize=fsize-3)
# Remove xticks from ax2
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
# Make space for title
plt.tight_layout()
plt.subplots_adjust(top=0.85)
if self.size == 1:
plt.show()
if imName is not None:
fig.savefig(imName, dpi=dpi, bbox_inches='tight')
plt.close(fig)
return
[docs] def viewElevFrequency(self, input=None, imName=None, nbins=80, size=(6,6), fsize=11, dpi=200):
"""
This function **plots** and **saves** in a figure the *distribution of elevation*.
Args:
input (str): CSV file name containing the dataset
imName (str): (string) image name with extension
nbins (int): number of bins for the histogram (default: 80)
size : size of the image [default: (6,6)]
fsize (int): title font size [default: 11]
dpi (int): resolution of the saved image [default: 200]
Warning:
The filename needs to be provided without extension.
"""
data = pd.read_csv(input+'.csv')
minZ = max(self.sealevel,data.Z.min())
ax = data.Z.plot(kind='hist', color='Blue', alpha=0.2, bins=nbins, xlim=(minZ,data.Z.max()))
ax.set_title('Elevation frequency as function of site elevation', fontsize=fsize)
plt.xlabel('Elevation', fontsize=fsize-1)
fig = data.Z.plot(kind='density', figsize=size, ax=ax, xlim=(minZ,data.Z.max()),
linewidth=4, fontsize = fsize-3, secondary_y=True, y='Density').get_figure()
ax.set_ylabel('Frequency count', fontsize=fsize-1)
ax.tick_params(axis='x', labelsize=fsize-3)
ax.tick_params(axis='y', labelsize=fsize-3)
plt.ylabel('Density', fontsize=fsize-1)
if self.size == 1:
plt.show()
if imName is not None:
fig.savefig(imName, dpi=dpi, bbox_inches='tight')
plt.close(fig)
plt.close()
del data
gc.collect()
return
[docs] def viewLECFrequency(self, input=None, imName=None, size=(6,6), fsize=11, dpi=200):
"""
This function **plots** and **saves** in a figure the *distribution of LEC with elevation*.
Args:
input (str): CSV file name containing the dataset
imName (str): (string) image name with extension
size : size of the image [default: (6,6)]
fsize (int): title font size [default: 11]
dpi (int): resolution of the saved image [default: 200]
Warning:
The filename needs to be provided without extension.
"""
data = pd.read_csv(input+'.csv')
minZ = max(self.sealevel,data['Z'].min())
ff = data.plot(kind='scatter', x='Z', y='LEC', c='white', edgecolors='lightgray', figsize=size,
xlim=(minZ,data['Z'].max()), s=5)
ff.set_title('Landscape elevational connectivity as function of site elevation', fontsize=fsize)
plt.xlabel('Elevation', fontsize=fsize-1)
ff.set_ylabel('LEC', fontsize=fsize-1)
ff.tick_params(axis='x', labelsize=fsize-3)
ff.tick_params(axis='y', labelsize=fsize-3)
if self.size == 1:
plt.show()
if imName is not None:
fig = ff.get_figure()
fig.savefig(imName, dpi=dpi, bbox_inches='tight')
plt.close()
del data
gc.collect()
return
[docs] def viewLECZbar(self, input=None, imName=None, nbins=40, size=(6,6), fsize=11, dpi=200):
"""
This function **plots** and **saves** in a figure the *distribution of LEC and elevation with elevation*
as well as the *standard deviation for LEC values* for each bins.
Args:
input (str): CSV file name containing the dataset
imName (str): (string) image name with extension
nbins (int): number of bins for the histogram (default: 40)
size : size of the image [default: (6,6)]
fsize (int): title font size [default: 11]
dpi (int): resolution of the saved image [default: 200]
Warning:
The filename needs to be provided without extension.
"""
data = pd.read_csv(input+'.csv')
n, _ = np.histogram(data.Z, bins=nbins)
sy, _ = np.histogram(data.Z, bins=nbins, weights=data.LEC)
sy2, _ = np.histogram(data.Z, bins=nbins, weights=data.LEC*data.LEC)
mean = sy / n
std = np.sqrt(sy2/n - mean*mean)
fig, ax = plt.subplots(figsize=size)
ax.set_title('Landscape elevational connectivity as function of site elevation with error bars', fontsize=fsize)
plt.plot((_[1:] + _[:-1])/2, mean,color='steelblue',zorder=2,linewidth=3)
plt.scatter(data.Z, data.LEC, c='w',edgecolors='lightgray', zorder=0,alpha=1.,s=5)
minZ = max(data.Z.min(),self.sealevel)
ax.set_xlim(minZ,data.Z.max())
ax.tick_params(axis='x', labelsize=fsize-3)
ax.tick_params(axis='y', labelsize=fsize-3)
plt.xlabel('Elevation', fontsize=fsize-1)
plt.ylabel('LEC', fontsize=fsize-1)
(_, caps, _) = plt.errorbar(
(_[1:] + _[:-1])/2, mean, yerr=std, fmt='-o', c='steelblue',markersize=4, capsize=3,zorder=1,linewidth=1.25)
for cap in caps:
cap.set_markeredgewidth(1)
ax2=ax.twinx()
data.Z.plot(kind='density',secondary_y=True, ax=ax2, xlim=(minZ,data.Z.max()),
color='green', linewidth=3, zorder=0, fontsize = fsize-3)
plt.ylabel('Density', color='green', fontsize=fsize-1)
plt.tick_params(axis='x', labelsize=fsize-3)
plt.tick_params(axis='y', labelsize=fsize-3)
if self.size == 1:
plt.show()
if imName is not None:
fig.savefig(imName, dpi=dpi, bbox_inches='tight')
plt.close()
del data, _, n, sy, sy2, mean, std, caps
gc.collect()
return
[docs] def viewLECZFrequency(self, input=None, imName=None, size=(6,6), fsize=11, dpi=200):
"""
This function **plots** and **saves** in a figure the *distribution of LEC and elevation with elevation*.
Args:
input (str): CSV file name containing the dataset
imName (str): (string) image name with extension
size : size of the image [default: (6,6)]
fsize (int): title font size [default: 11]
dpi (int): resolution of the saved image [default: 200]
Warning:
The filename needs to be provided without extension.
"""
data = pd.read_csv(input+'.csv')
minZ = max(self.sealevel,data['Z'].min())
ax = data.plot(kind='scatter', x='Z', y='LEC', c='white', edgecolors='lightgray', figsize=size,
xlim=(minZ,data['Z'].max()), s=5)
ax.set_title('Landscape elevational connectivity as function of site elevation', fontsize=fsize)
plt.xlabel('Elevation', fontsize=fsize-1)
ax2=ax.twinx()
fig = data.Z.plot(kind='density',secondary_y=True, ax=ax2, xlim=(minZ,data['Z'].max()),
fontsize = fsize-3, linewidth=4).get_figure()
ax.set_ylabel('LEC', fontsize=fsize-1)
ax.tick_params(axis='x', labelsize=fsize-3)
ax.tick_params(axis='y', labelsize=fsize-3)
plt.ylabel('Density', fontsize=fsize-1)
if self.size == 1:
plt.show()
if imName is not None:
fig.savefig(imName, dpi=dpi, bbox_inches='tight')
plt.close(fig)
plt.close()
del data
gc.collect()
return
[docs] def writeLEC(self, filename='LECout'):
"""
This function writes the computed landscape elevational connectivity array in a **CSV file**
and create a **VTK visualisation file** (.vts).
Args:
filename (str): output file name without format extension.
Warning:
The filename needs to be provided without extension.
"""
time0 = time.clock()
if self.rank == 0:
XX = self.X.flatten()
YY = self.Y.flatten()
ZZ = self.Z.flatten()
LL = self.LEC[self.nr0:self.nr1,self.nc0:self.nc1].flatten()
df = pd.DataFrame({'X':XX,'Y':YY,'Z':ZZ,'LEC':LL})
df.to_csv(filename+'.csv', columns=['X', 'Y', 'Z', 'LEC'], sep=',', index=False , header=1)
vtkfile = filename
XX = XX.reshape(self.ny,self.nx)
YY = YY.reshape(self.ny,self.nx)
ZZ = ZZ.reshape(self.ny,self.nx)
LL = LL.reshape(self.ny,self.nx)
Xv = np.zeros((self.ny,self.nx,2))
Yv = np.zeros((self.ny,self.nx,2))
Zv = np.zeros((self.ny,self.nx,2))
LECv = np.zeros((self.ny,self.nx,2))
normLEC = np.zeros((self.ny,self.nx,2))
Xv[:,:,0] = XX
Yv[:,:,0] = YY
Zv[:,:,0] = ZZ
LECv[:,:,0] = LL
normLEC[:,:,0] = LL/LL.max()
Xv[:,:,1] = XX
Yv[:,:,1] = YY
Zv[:,:,1] = ZZ
LECv[:,:,1] = LL
normLEC[:,:,1] = LL/LL.max()
gridToVTK(vtkfile, Xv, Yv, Zv, pointData = {"elevation" : Zv, "LEC" :LECv, "nLEC" :normLEC})
del XX,YY, ZZ, LL, Xv, Yv, Zv, LECv, normLEC
self.comm.Barrier()
if self.rank == 0:
print('\n Writing results on disk took: {:.2f} s'.format(time.clock()-time0))
return