# -*- coding: utf-8 -*-
"""
Created on Fri Sep 25 14:09:12 2020

@author: jbobowsk
"""

# This tutorial demonstrates shows how to create histograms in MATLAB.

import math
import numpy as np
import matplotlib.pyplot as plt

# First, we create a list of 5,000 random numbers drawn from a Poisson
# distribution.
mu = 6;
n = np.random.poisson(mu, 5000)

# A histogram can be created simply using 'hist(n)'.
plt.hist(n)
plt.xlabel('bin centres')
plt.ylabel('counts')

# Here's some formatting options.  I've also changed the plot from an absolute
# distribution to a probability density.
plt.figure()
plt.hist(n, color='red', edgecolor='k', density = True)
plt.xlabel('bin centres')
plt.ylabel('counts')

# Rather than having hist decide for you, you can specify the number of
# bins used to generate the histogram.
plt.figure()
nbins = 15
plt.hist(n, nbins, color='red', edgecolor='k', density = True)
plt.xlabel('bin centres')
plt.ylabel('counts')

# Instead of specifying the number of bins, you can provide a list of the
# edge positions of the bins
plt.figure()
xbins = np.arange(0, 21, 1) # First bin is from 0..1, second is from 1..2, ...
plt.hist(n, xbins, color='red', edgecolor='k', density = True)
plt.xlabel('bin centres')
plt.ylabel('counts')
plt.axis((-2, 20, 0, 0.2,))

# It's worth noting that you can get vectors of the counts in each bin and
# the positions of the bin edges of a histogram using:
counts, edges = np.histogram(n, xbins)
print('counts:', counts)
print('edge positions:', edges)

# We can calculate the centres of the bins from the bin edge poisitions.  Note
# that is only works for uniformly spaced bins.
spacing = (edges[1] - edges[0])/2
centres = edges[1:] - spacing
print(centres)

# Now that we have the counts and the bin centres we can, if we want, add a
# scatter plot to our histogram.  In fact, I chose a Poisson distribution
# at the beginning of this script because it arises in counting experiments
# and we know that for Poisson distributions the error in the number of
# counts in a bin is just the square root of the number of counts.
plt.errorbar(centres, np.array(counts)/5000, np.sqrt(np.array(counts))/5000,\
             fmt = 'k.', markersize = 12, linewidth = 1.5, capsize = 5)

# Here's a way to display multiple histograms in a single plot.  The 'plt.hist'
# option 'fc' specifies the (R, G, B, transparency) colour where the last 
# element is the transparency of the bars.
plt.figure()
xbins = np.arange(0, 21, 1)
for j in range(3):
    n = np.random.poisson(mu, 2000)
    plt.hist(n, xbins, fc=(0, 0, 1, 0.2), edgecolor = 'k')
plt.xlabel('bin centres')
plt.ylabel('counts')
plt.axis((-2, 20, 0, 400))

# We can now create a historgram and then add to it the expected Poisson pdf 
# using the known mean.  We will calculate the Poission pdf at the bin centres.  
# Note that the Poisson distribution is a discrete distribution.  That is, it 
# is only defined for integer values of x. However, we can use the 
# 'scipy.special' module to allow use to calculate factorials of non-integer
# numbers and create a continuous plot.

# Here's the factorial
from scipy.special import factorial
print('A non-integer factorial 3.4! =', factorial(3.4))

# Here's a histogram of 50e3 number randomly drawn from a Poisson distribution
# with mean 5.  I also shiftered the bin edges so that the centres lie at
# integer values.
mu = 5;
n = np.random.poisson(mu, 50000)
plt.figure()
xbins = np.arange(-0.5, 21, 1)
plt.hist(n, xbins, color='red', edgecolor='k', density = True)
plt.xlabel('bin centres')
plt.ylabel('counts')

# Here we add a continuous representation of the Poisson distribution.
xx = np.arange(0, 20, 0.01)
pois = np.exp(-mu)*mu**xx/factorial(xx)
plt.plot(xx, pois, 'k-', linewidth = 2)

# Because the Poisson distribtion is strictly speaking only defined for 
# integer values, we can add points at the integer values.
xx = np.arange(0, 20, 1)
pois = np.exp(-mu)*mu**xx/factorial(xx)
plt.plot(xx, pois, 'k.', markersize = 12)