Source code for pysampler.generate

'''
This module samples from any probability distribution.
'''

__version__ = '0.1'

__author__ = 'Lucia F. de la Bella'
__contributor__='Jesus Rubio Jimenez'
__email__ = 'lucia.fonseca-de-la-bella@port.ac.uk'
__license__ = 'MIT'
__copyright__ = '2020, Lucia Fonseca de la Bella'

__all__ = [
    'sampler',
    'statistics',
]

import numpy as np
from scipy.integrate import cumtrapz


[docs]def sampler(distribution, x_min, x_max, resolution=100, size=None, scale=1.): """Distribution sampler. This function sample from any given probability distribution [1]. Parameters ---------- distribution : float or int Probability distribution function to sample from. x_min, x_max : array_like Lower and upper bounds for the random variable x. resolution : int Resolution of the inverse transform sampling spline. Default is 100. size: int, optional Output shape of samples. If size is None and scale is a scalar, a single sample is returned. If size is None and scale is an array, an array of samples is returned with the same shape as scale. scale: array-like, optional Scale factor for the returned samples. Default is 1. Returns ------- x_sample : array_like Samples drawn from the Black Body spectrum. References ---------- .. [1] Statistics notes. """ if size is None: size = np.broadcast(x_min, x_max, scale).shape or None x = np.linspace(np.min(x_min), np.max(x_max), resolution) pdf = distribution(x) CDF = cumtrapz(pdf, x, initial=0) CDF = CDF / CDF[-1] n_uniform = np.random.uniform(size=size) return np.interp(n_uniform, CDF, x)
[docs]def statistics(realisations, nbins): """Data from the Black Body distribution. This function returns the average number counts and standard deviation from different realisations sampled from the same distribution. Parameters ---------- realisations: list List of samples. nbins: int Number of bins. Returns ------- average: (nbins,) array_like Average sample. bin_center: (nbins,) array_like Centers of the bins. average_counts: (nbins,) array_like Average number counts at the bin centers. std: (nbins,) array_like Standard deviation at the bin centers. References ---------- .. [1] Averaging over realisations. """ # Read the data of length n over nmocks mocks data = realisations nmocks = len(data) counts = [] for d in data: counts.append(np.histogram(d, bins=nbins)[0]) # Downsample to n / nmocks length n = [len(d) for d in data] fn = [np.int(np.round(np.divide(N, nmocks))) for N in n] subsample = [] for i, d in enumerate(data): rows_numbers_to_keep = np.random.choice(n[i], fn[i], replace=False) subsample.append(d[rows_numbers_to_keep]) # Merge subsamples into a single average catalog of length # n = nmocks * n / nmocks average = np.hstack(subsample) # Center values: counts, bins and standard deviation average_counts, bin_edges = np.histogram(average, bins=nbins) bin_center = 0.5 * (bin_edges[:-1] + bin_edges[1:]) counts_center = [] for c in counts: counts_center.append(0.5 * (c[:-1] + c[1:])) std = np.std(counts, axis=0) return average, bin_center, average_counts, std