diff --git a/= b/= new file mode 100644 index 0000000..117af58 Binary files /dev/null and b/= differ diff --git a/USAGE.md b/USAGE.md index eba2473..948ff89 100644 --- a/USAGE.md +++ b/USAGE.md @@ -113,13 +113,13 @@ # to calculate mean >>> print(exponential.calculate_mean(1)) -2.0 +2.0 -# to calculate standard deviation +# to calculate standard deviation >>> print(exponential.calculate_stdev(1)) -2.0 +2.0 -# to calculate pdf +# to calculate pdf >>> print(exponential.calculate_pdf(5, 5)) 0.04104 @@ -129,11 +129,12 @@ >>> print(exponential.calculate_cdf(-2)) 0.0 -# plot pdf of exponential distribution ->>> exponential.plot_bar_pdf() +# plot pdf of exponential distribution +>>> exponential.plot_bar_pdf() ``` ## For Gamma Distribution + ``` >>> from probdists import Gamma @@ -185,33 +186,34 @@ # The resulting gamma three will have k=3, theta=2. # This add magic method fails if thetas are not equal since they wouldn't be summable ``` -## For Uniform Distribution + +## For Uniform Distribution ``` ->>> from probdists import Uniform +>>> from probdists import Uniform # default value of the interval is (1,10) >>> uniform = Uniform() >>> uniform2 = Uniform(1, 5) -# interval of uniform2 is (1,5) +# interval of uniform2 is (1,5) >>> uniform.read_data_file('demo_uniform_data') # pass in your filename to read data from filename -# to access data +# to access data >>> print(uniform.data) -[4,5,2,3,3,2,2,5,4,3,1,3,5,3,4] +[4,5,2,3,3,2,2,5,4,3,1,3,5,3,4] # to calculate mean >>> print(uniform.calculate_mean()) -5 +5 -# to calculate standard deviation +# to calculate standard deviation >>> print(uniform.calculate_stdev()) -2.89 +2.89 -# to calculate pdf +# to calculate pdf >>> print(uniform.calculate_pdf(5)) 0.1 @@ -225,10 +227,10 @@ # plot histogram of data >>> uniform.plot_histogram() -# plot pdf of uniform distribution ->>> uniform.plot_bar_pdf() +# plot pdf of uniform distribution +>>> uniform.plot_bar_pdf() -# to calculate cdf +# to calculate cdf >>> uniform.replace_stats_with_data() >>> print(uniform.calculate_cdf(0)) 0 @@ -237,6 +239,7 @@ >>> print(uniform.calculate_cdf(4)) 0.75 ``` + ## For Bernoulli Distribution ``` @@ -271,9 +274,9 @@ >>> print(bernoulli.calculate_pdf(1, 1)) 0.3 -# to calculate cdf +# to calculate cdf >>> print(bernoulli.calculate_cdf(0.7)) -0.7 +0.7 >>> print(bernoulli.calculate_cdf(2)) 1 @@ -297,6 +300,7 @@ ``` ## For Triangular Distribution + ``` >>> from probdists import Triangular @@ -324,17 +328,29 @@ >>> print(triangle.calculate_pdf(0.5)) 2 -# to calculate cdf +# to calculate cdf >>> print(triangle.calculate_cdf(0.5)) 0.5 >>> print(triangle.calculate_cdf(1)) 1 -# to access data +# to access data >>> print(triangle.data) [1, 2, 3, 4, 5, 5, 6, 8, 9, 13] -# plot pdf of triangular distribution ->>> triangle.plot_bar_pdf() +# plot pdf of triangular distribution +>>> triangle.plot_bar_pdf() + +``` +## For chi Square distribution + +``` +# importing Chi_squareDistribution +>>>from probdists import Chi_squareDistribution +>>>chi = Chi_squareDistribution() +# to read data from csv +>>>chi.read_data_file("filename.csv ") +# to print chi Square value +>>>chi.chi_square() ``` diff --git a/probdists/Bernoullidistribution.py b/probdists/Bernoullidistribution.py index 92a6d4e..14fe8ee 100644 --- a/probdists/Bernoullidistribution.py +++ b/probdists/Bernoullidistribution.py @@ -2,10 +2,11 @@ import matplotlib.pyplot as plt from .Generaldistribution import Distribution from .Binomialdistribution import Binomial +import seaborn as sns class Bernoulli(Distribution): - """ Bernoulli distribution class for calculating and + """Bernoulli distribution class for calculating and visualizing a Bernoulli distribution. Attributes: @@ -23,7 +24,7 @@ def __init__(self, prob=0.5): self.calculate_stdev()) def calculate_mean(self, round_to=2): - """ Method to calculate the mean of a Bernoulli distribution + """Method to calculate the mean of a Bernoulli distribution Args: round_to (int): Round the mean value. Defaults to 2. @@ -51,7 +52,7 @@ def calculate_stdev(self, round_to=2): return round(self.stdev, round_to) def replace_stats_with_data(self): - """ Method to calculate p from the data set + """Method to calculate p from the data set Args: None @@ -67,8 +68,8 @@ def replace_stats_with_data(self): return self.p def plot_bar(self): - """ Method to plot a histogram of the instance variable data using - matplotlib pyplot library. + """Method to plot a histogram of the instance variable data using + seaborn pyplot library. Args: None @@ -77,13 +78,12 @@ def plot_bar(self): None """ - plt.bar(x=['0', '1'], height=[(1 - self.p), self.p]) - plt.title('Bar Chart of Data') - plt.xlabel('outcome') - plt.ylabel('count') + sns.barplot(x=["0", "1"], height=[(1 - self.p), self.p]).set( + title="Bar Chart of Data", xlabel="outcome", ylabel="count" + ) def calculate_pdf(self, k, round_to=2): - """ Method to calculate pdf for the bernoulli distribution. + """Method to calculate pdf for the bernoulli distribution. Args: k (float): point for calculating the probability density function. Range of k: {0,1} @@ -102,7 +102,7 @@ def calculate_pdf(self, k, round_to=2): return round(self.pdf, round_to) def calculate_cdf(self, k, round_to=2): - """ Method to calculate cdf for the bernoulli distribution. + """Method to calculate cdf for the bernoulli distribution. Args: k (float): point for calculating the cumulative distribution function @@ -112,7 +112,7 @@ def calculate_cdf(self, k, round_to=2): float: cumulative distribution function output """ - val = 0 # default value of cdf for k < 0 + val = 0 # default value of cdf for k < 0 if 0 <= k < 1: val = 1 - self.p elif k > 1: @@ -121,7 +121,7 @@ def calculate_cdf(self, k, round_to=2): return round(self.cdf, round_to) def plot_bar_pdf(self): - """ Method to plot the pdf of the bernoulli distribution + """Method to plot the pdf of the bernoulli distribution Args: None @@ -137,17 +137,15 @@ def plot_bar_pdf(self): y.append(self.pdf) # draw the plots - plt.bar(x, y) - plt.title("Distribution of Outcomes") - plt.ylabel("Probability") - plt.xlabel("Outcome") - + plt.barplot(x, y).set( + title="Distribution of Outcomes", ylabel="Probability", xlabel="Outcome" + ) plt.show() return x, y def __add__(self, other): - """ Method to add together two Bernoulli distributions with equal p + """Method to add together two Bernoulli distributions with equal p Args: other (Bernoulli): Bernoulli instance @@ -157,7 +155,7 @@ def __add__(self, other): """ try: - assert self.p == other.p, 'p values are not equal' + assert self.p == other.p, "p values are not equal" except AssertionError: raise @@ -170,7 +168,7 @@ def __add__(self, other): return result def __repr__(self): - """ Method to output the characteristics of this Bernoulli instance + """Method to output the characteristics of this Bernoulli instance Args: None @@ -178,5 +176,7 @@ def __repr__(self): string: characteristics of this Bernoulli instance """ - return 'mean {0}, standard deviation {1}, \ - p {2}, q {3}'.format(self.mean, self.stdev, self.p, 1.0 - self.p) + return "mean {0}, standard deviation {1}, \ + p {2}, q {3}".format( + self.mean, self.stdev, self.p, 1.0 - self.p + ) diff --git a/probdists/Binomialdistribution.py b/probdists/Binomialdistribution.py index 70d9941..08b09fb 100644 --- a/probdists/Binomialdistribution.py +++ b/probdists/Binomialdistribution.py @@ -1,10 +1,11 @@ import math import matplotlib.pyplot as plt from .Generaldistribution import Distribution +import seaborn as sns class Binomial(Distribution): - """ Binomial distribution class for calculating and + """Binomial distribution class for calculating and visualizing a Binomial distribution. Attributes: @@ -15,13 +16,12 @@ class Binomial(Distribution): n (int) number of trials """ - def __init__(self, prob=.5, size=20): + def __init__(self, prob=0.5, size=20): self.n = size self.p = prob - Distribution.__init__(self, self.calculate_mean(), - self.calculate_stdev()) + Distribution.__init__(self, self.calculate_mean(), self.calculate_stdev()) def calculate_mean(self, round_to=2): """Function to calculate the mean from p and n @@ -71,7 +71,7 @@ def replace_stats_with_data(self): def plot_bar(self): """Function to output a histogram of the instance variable data using - matplotlib pyplot library. + seaborn library. Args: None @@ -79,11 +79,10 @@ def plot_bar(self): Returns: None """ - - plt.bar(x=['0', '1'], height=[(1 - self.p) * self.n, self.p * self.n]) - plt.title('Bar Chart of Data') - plt.xlabel('outcome') - plt.ylabel('count') + sns.barplot(x=[0, 1], y=[(1 - self.p) * self.n, self.p * self.n]).set( + title="Bar Chart of Data", xlabel="outcome", ylabel="count" + ) + plt.show() def calculate_pdf(self, k, round_to=2): """Probability density function calculator for the binomial distribution. @@ -115,7 +114,7 @@ def calculate_cdf(self, k, round_to=2): """ total_p = 0 - for i in range(k+1): + for i in range(k + 1): self.calculate_pdf(i) total_p += self.pdf self.cdf = total_p @@ -142,10 +141,10 @@ def plot_bar_pdf(self): y.append(self.pdf) # make the plots - plt.bar(x, y) - plt.title('Distribution of Outcomes') - plt.ylabel('Probability') - plt.xlabel('Outcome') + + sns.barplot(x=x, y=y).set( + title="Distribution of Outcomes", ylabel="Probability", xlabel="Outcome" + ) plt.show() @@ -162,7 +161,7 @@ def __add__(self, other): """ try: - assert self.p == other.p, 'p values are not equal' + assert self.p == other.p, "p values are not equal" except AssertionError as error: raise @@ -184,5 +183,5 @@ def __repr__(self): string: characteristics of the Binomial """ - return f'mean {self.mean}, standard deviation {self.stdev}, \ - p {self.p}, n {self.n}' + return f"mean {self.mean}, standard deviation {self.stdev}, \ + p {self.p}, n {self.n}" diff --git a/probdists/Chi_squareDistribution.py b/probdists/Chi_squareDistribution.py new file mode 100644 index 0000000..f2f1d5e --- /dev/null +++ b/probdists/Chi_squareDistribution.py @@ -0,0 +1,34 @@ +import pandas as pd +# from .Generaldistribution import Distribution + + +class Chi_squareDistribution(): + """Chi_squareDistribution Chi-square distribution class + is for caluclating chi-square value. + """ + + def __init__(self, filename=None, data=None): + self.filename = filename + self.data = data + + def read_data_file(self, filename): + """Read data file will read data from csv file. + in which there will be observed and expected values. + + """ + self.filename = filename + self.data = pd.read_csv(filename) + + def chi_square(self): + """ Chi_square method will extract the observed and expected values columns from the data file(.csv) and return + the vlaue of chi-square. + """ + + observed = self.data["observed"].values.tolist() + expected = self.data["expected"].values.tolist() + + chi = 0 + for i, el in enumerate(observed): + chi = chi+(((el-expected[i])**2)/expected[i]) + print(chi) + return chi diff --git a/probdists/Exponentialdistribution.py b/probdists/Exponentialdistribution.py index 8eda38f..b3a9c49 100644 --- a/probdists/Exponentialdistribution.py +++ b/probdists/Exponentialdistribution.py @@ -1,10 +1,11 @@ import math import matplotlib.pyplot as plt from .Generaldistribution import Distribution +import seaborn as sns class Exponential(Distribution): - """ Exponential distribution class for calculating and + """Exponential distribution class for calculating and visualizing a Exponential distribution. Attributes: @@ -18,15 +19,14 @@ class Exponential(Distribution): """ - def __init__(self, lmbda=.5): + def __init__(self, lmbda=0.5): self.lmbda = lmbda - Distribution.__init__(self, self.calculate_mean(), - self.calculate_stdev()) + Distribution.__init__(self, self.calculate_mean(), self.calculate_stdev()) def calculate_mean(self, round_to=2): - """ Method to calculate the mean from lambda + """Method to calculate the mean from lambda Args: round_to (int): Round the mean value. [Default value: 2 floating point] @@ -35,12 +35,12 @@ def calculate_mean(self, round_to=2): float: mean of the distribution """ - self.mean = (1.0 / self.lmbda) + self.mean = 1.0 / self.lmbda return round(self.mean, round_to) def calculate_stdev(self, round_to=2): - """ Method to calculate the standard deviation from lmbda + """Method to calculate the standard deviation from lmbda Args: round_to (int): Round the mean value. [Default value: 2 floating point] @@ -49,12 +49,12 @@ def calculate_stdev(self, round_to=2): float: standard deviation of the distribution """ - self.stdev = (1.0 / self.lmbda) + self.stdev = 1.0 / self.lmbda return round(self.stdev, round_to) def calculate_pdf(self, x, round_to=2): - """ Probability density function calculator for the exponential distribution. + """Probability density function calculator for the exponential distribution. Args: x (float): point for caluclating the probability density function @@ -86,7 +86,7 @@ def calculate_cdf(self, x, round_to=2): return round(self.cdf, round_to) def plot_bar_pdf(self, points=100): - """ Method to plot the pdf of the exponential distribution. + """Method to plot the pdf of the exponential distribution. Args: points (int): number of discrete data points @@ -107,10 +107,12 @@ def plot_bar_pdf(self, points=100): y.append(self.pdf) # make the plots - plt.bar(x, y) - plt.title('Probability Density Plot for Exponential Distribution') - plt.ylabel('Probability') - plt.xlabel('x') + sns.barplot(x, y).set( + title="Probability Density Plot for Exponential Distribution", + ylabel="Probability", + xlabel="x", + ) + print(y) plt.show() @@ -124,11 +126,13 @@ def plot_bar_pdf(self, points=100): # def __repr__(self): - """ Method to outputthe characteristics of the Exponential instace. + """Method to outputthe characteristics of the Exponential instace. Args: None Returns: string: characteristics of the Exponential """ - return "mean {0}, standard deviation {1}, lambda{2}".format(self.mean, self.stdev, self.lmbda) + return "mean {0}, standard deviation {1}, lambda{2}".format( + self.mean, self.stdev, self.lmbda + ) diff --git a/probdists/Gammadistribution.py b/probdists/Gammadistribution.py index 1b630c2..e27f173 100644 --- a/probdists/Gammadistribution.py +++ b/probdists/Gammadistribution.py @@ -1,19 +1,20 @@ import math import matplotlib.pyplot as plt from .Generaldistribution import Distribution +import seaborn as sns class Gamma(Distribution): - """ Gamma distribution class for calculating and visualizing a Gamma distribution. - Attributes: - mean (float) representing the mean value of the distribution - stdev (float) representing the standard deviation of the distribution - data_list (list of floats) extracted from the data file - k (float) shape parameter representing shape of distribution (k > 0) - theta (float) scale parameter that stretches/shrinks distribution (theta > 0) + """Gamma distribution class for calculating and visualizing a Gamma distribution. + Attributes: + mean (float) representing the mean value of the distribution + stdev (float) representing the standard deviation of the distribution + data_list (list of floats) extracted from the data file + k (float) shape parameter representing shape of distribution (k > 0) + theta (float) scale parameter that stretches/shrinks distribution (theta > 0) """ - def __init__(self, k=2, theta=2, fit=False, data_file='demo_gamma_data'): + def __init__(self, k=2, theta=2, fit=False, data_file="demo_gamma_data"): """ Init function to instantiate Gamma distribution Args: @@ -35,7 +36,7 @@ def __init__(self, k=2, theta=2, fit=False, data_file='demo_gamma_data'): sample_mean = total / float(len(self.data)) running = 0 for each in self.data: - running += math.pow((each-sample_mean), 2) + running += math.pow((each - sample_mean), 2) sample_var = running / float(len(self.data)) self.k = round(math.pow(sample_mean, 2) / sample_var) self.theta = sample_var / sample_mean @@ -75,8 +76,11 @@ def calculate_pdf(self, x, round_to=2): Returns: float: probability density function output """ - self.pdf = (1 / (math.factorial(self.k - 1) * math.pow(self.theta, self.k))) * (math.pow(x, self.k - 1)) * ( - math.exp((-1 * x / self.theta))) + self.pdf = ( + (1 / (math.factorial(self.k - 1) * math.pow(self.theta, self.k))) + * (math.pow(x, self.k - 1)) + * (math.exp((-1 * x / self.theta))) + ) return round(self.pdf, round_to) def plot_bar_pdf(self, points=25): @@ -98,10 +102,11 @@ def plot_bar_pdf(self, points=25): y.append(self.pdf) # make the plots - plt.bar(x, y) - plt.title('Probability Density Plot for Gamma Distribution') - plt.ylabel('Probability') - plt.xlabel('x') + sns.barplot(x, y).set( + title="Probability Density Plot for Gamma Distribution", + ylabel="Probability", + xlabel="x", + ) plt.show() return x, y @@ -116,19 +121,21 @@ def calculate_cdf(self, x, is_upper=True, round_to=2): Returns: float: CDF output based on 'is_upper' argument rounded to 'round_to' """ - #initialize and declare the return variable self.cdf + # initialize and declare the return variable self.cdf self.cdf = 0 if x >= 0: - #initiate cdfvalue variable + # initiate cdfvalue variable cdfvalue = 0 - for i in range (self.k): - cdfvalue += (math.pow((x / self.theta), i) * math.exp(-1 * x / self.theta)) / math.factorial(i) + for i in range(self.k): + cdfvalue += ( + math.pow((x / self.theta), i) * math.exp(-1 * x / self.theta) + ) / math.factorial(i) if is_upper == True: self.cdf = cdfvalue elif is_upper == False: self.cdf = 1 - cdfvalue else: - raise Exception ('x has to be a positive real number') + raise Exception("x has to be a positive real number") return round(self.cdf, round_to) def __add__(self, other): diff --git a/probdists/Gaussiandistribution.py b/probdists/Gaussiandistribution.py index fe3b3cc..f3958f7 100644 --- a/probdists/Gaussiandistribution.py +++ b/probdists/Gaussiandistribution.py @@ -1,10 +1,11 @@ import math import matplotlib.pyplot as plt from .Generaldistribution import Distribution +import seaborn as sns class Gaussian(Distribution): - """ Gaussian distribution class for calculating and + """Gaussian distribution class for calculating and visualizing a Gaussian distribution. Attributes: @@ -66,58 +67,59 @@ def calculate_stdev(self, sample=True, round_to=2): def calculate_cdf(self, x, round_to=2): """Cumulative distribution function calculator for the gaussian distribution. - Args: - x (float): point for calculating the - cumulative distribution function - round_to (int): Round the mean value. [Default value: 2 floating point] + Args: + x (float): point for calculating the + cumulative distribution function + round_to (int): Round the mean value. [Default value: 2 floating point] - Returns: - float: cumulative distribution function output - """ - self.cdf = (0.5 * (1 + math.erf((x - self.mean) / (self.stdev * math.sqrt(2))))) + Returns: + float: cumulative distribution function output + """ + self.cdf = 0.5 * (1 + math.erf((x - self.mean) / (self.stdev * math.sqrt(2)))) return round(self.cdf, round_to) def plot_histogram(self): """Function to output a histogram of the instance variable data using - matplotlib pyplot library. + seaborn library. - Args: - None + Args: + None - Returns: - None - """ - plt.hist(self.data) - plt.title("Histogram of Data") - plt.xlabel("data") - plt.ylabel("count") + Returns: + None + """ + ax = sns.histplot(data=self.data) + ax.set(xlabel="data", ylabel="count", title="Histogram") + plt.show() def calculate_pdf(self, x, round_to=2): """Probability density function calculator for the gaussian distribution. - Args: - x (float): point for calculating the - probability density function - round_to (int): Round the mean value. [Default value: 2 floating point] + Args: + x (float): point for calculating the + probability density function + round_to (int): Round the mean value. [Default value: 2 floating point] - Returns: - float: probability density function output + Returns: + float: probability density function output """ - self.pdf = (1.0 / (self.stdev * math.sqrt(2 * math.pi))) * math.exp(-0.5 * ((x - self.mean) / self.stdev) ** 2) + self.pdf = (1.0 / (self.stdev * math.sqrt(2 * math.pi))) * math.exp( + -0.5 * ((x - self.mean) / self.stdev) ** 2 + ) return round(self.pdf, round_to) def plot_histogram_pdf(self, n_spaces=50): """Function to plot the normalized histogram of the data and a plot of the - probability density function along the same range + probability density function along the same range - Args: - n_spaces (int): number of data points + Args: + n_spaces (int): number of data points - Returns: - list: x values for the pdf plot - list: y values for the pdf plot + Returns: + list: x values for the pdf plot + list: y values for the pdf plot - """ + """ min_range = min(self.data) max_range = max(self.data) @@ -138,16 +140,13 @@ def plot_histogram_pdf(self, n_spaces=50): # make the plots fig, axes = plt.subplots(2, sharex=True) fig.subplots_adjust(hspace=0.5) - axes[0].hist(self.data, density=True) - axes[0].set_title("Normed Histogram of Data") - axes[0].set_ylabel("Density") - - axes[1].plot(x, y) - axes[1].set_title( - "Normal Distribution for \n \ - Sample Mean and Sample Standard Deviation" + sns.histplot(ax=axes[0], data=self.data).set( + title="Normed Histogram of Data", ylabel="Density" + ) + sns.lineplot(ax=axes[1], x=x, y=y).set( + title="Normal Distribution for \n Sample Mean and Sample Standard Deviation", + ylabel="Density", ) - axes[0].set_ylabel("Density") plt.show() return x, y @@ -155,13 +154,13 @@ def plot_histogram_pdf(self, n_spaces=50): def __add__(self, other): """Function to add together two Gaussian distributions - Args: - other (Gaussian): Gaussian instance + Args: + other (Gaussian): Gaussian instance - Returns: - Gaussian: Gaussian distribution + Returns: + Gaussian: Gaussian distribution - """ + """ result = Gaussian() result.mean = self.mean + other.mean @@ -172,12 +171,12 @@ def __add__(self, other): def __repr__(self): """Function to output the characteristics of the Gaussian instance - Args: - None + Args: + None - Returns: - string: characteristics of the Gaussian + Returns: + string: characteristics of the Gaussian - """ + """ return "mean {}, standard deviation {}".format(self.mean, self.stdev) diff --git a/probdists/Generaldistribution.py b/probdists/Generaldistribution.py index 4beac8b..836c07a 100644 --- a/probdists/Generaldistribution.py +++ b/probdists/Generaldistribution.py @@ -6,7 +6,6 @@ class Distribution: """ Generic distribution class for calculating and visualizing a probability distribution. - Attributes: mean (float) representing the mean value of the distribution stdev (float) representing the standard deviation of the distribution @@ -24,24 +23,18 @@ def __init__(self, mu=0, sigma=1): self.cdf = None def read_data_file(self, file_name, separator='\\n', header=None): - """Function to read in data from a txt file, csv file and excel formats (xls, xlsx, xlsm, xlsb, odf, ods and odt) - The txt file should have one number (float) per line or numbers should be separator seperated. - No need for separator argument with csv file, it will by default be ',' so csv files should have , seperated numbers - For excel file formats. There should only be one column containing numbers, and if 0th row is header then header argument should be 0. The numbers are taken from next row mentioned in header parameter. - The numbers are stored in the data attribute. - Args: file_name (string): name of a file to read from separator (character): custom separator to use if required @@ -62,7 +55,8 @@ def read_data_file(self, file_name, separator='\\n', header=None): } if file_name in file_name_map: dirname = Path(__file__).parent.parent.absolute() - file_name = str(Path(dirname, 'probdists/' + file_name_map[file_name])) + file_name = str( + Path(dirname, 'probdists/' + file_name_map[file_name])) # Finding the file extension and selecting separator for csv file extension = file_name.split('.')[-1] diff --git a/probdists/Triangulardistribution.py b/probdists/Triangulardistribution.py index 6bcb778..f7a7a62 100644 --- a/probdists/Triangulardistribution.py +++ b/probdists/Triangulardistribution.py @@ -2,6 +2,7 @@ import matplotlib.pyplot as plt from .Generaldistribution import Distribution from collections import Counter +import seaborn as sns class Triangular(Distribution): @@ -33,8 +34,7 @@ def __init__(self, a=0, b=1, mode=0.5): self.b = b self.mode = mode - Distribution.__init__(self, self.calculate_mean(), - self.calculate_stdev()) + Distribution.__init__(self, self.calculate_mean(), self.calculate_stdev()) def calculate_mean(self, round_to=2): """ @@ -47,7 +47,7 @@ def calculate_mean(self, round_to=2): float: mean of the data set """ - self.mean = 1/3 * (self.a + self.b + self.mode) + self.mean = 1 / 3 * (self.a + self.b + self.mode) return round(self.mean, round_to) @@ -62,16 +62,21 @@ def calculate_stdev(self, round_to=2): float: standard deviation of the data set """ - summation = (self.a ** 2) + (self.b ** 2) + (self.mode ** 2) - \ - (self.a * self.b) - (self.a * self.mode) - \ - (self.b * self.mode) + summation = ( + (self.a ** 2) + + (self.b ** 2) + + (self.mode ** 2) + - (self.a * self.b) + - (self.a * self.mode) + - (self.b * self.mode) + ) variance = summation / 18 self.stdev = math.sqrt(variance) return round(self.stdev, round_to) def replace_stats_with_data(self): - """ Method to calculate a, b, mode from the data set + """Method to calculate a, b, mode from the data set Args: None @@ -139,13 +144,11 @@ def calculate_pdf(self, x, round_to=2): value = 0 # default value for when x < min or x > max if self.a <= x < self.mode: - value = (2 * (x - self.a)) / ( - (self.b - self.a) * (self.mode - self.a)) + value = (2 * (x - self.a)) / ((self.b - self.a) * (self.mode - self.a)) elif self.mode == x: value = 2 / (self.b - self.a) elif self.mode < x <= self.b: - value = (2 * (self.b - x)) / ( - (self.b - self.a) * (self.b - self.mode)) + value = (2 * (self.b - x)) / ((self.b - self.a) * (self.b - self.mode)) self.pdf = value return round(self.pdf, round_to) @@ -168,11 +171,11 @@ def calculate_cdf(self, x, round_to=2): if x < self.a: value = 0 elif self.a <= x <= self.mode: - num = ((x - self.a) ** 2) + num = (x - self.a) ** 2 den = (self.b - self.a) * (self.mode - self.a) value = num / den elif self.mode < x <= self.b: - num = ((self.b - x) ** 2) + num = (self.b - x) ** 2 den = (self.b - self.a) * (self.b - self.mode) value = 1 - (num / den) else: @@ -195,10 +198,11 @@ def plot_bar_pdf(self): peak = 2 / (self.b - self.a) y = [0, peak, 0] - plt.plot(x, y) - plt.title('Probability Density Plot for Triangular Distribution') - plt.xlabel('Probability') - plt.ylabel('x') + sns.lineplot(x, y).set( + title="Probability Density Plot for Triangular Distribution", + xlabel="Probability", + ylabel="x", + ) plt.show() @@ -214,17 +218,19 @@ def __repr__(self): string: characteristics of the Triangle """ - return f"minimum: {self.a}, maximum: {self.b}, mode: {self.mode}, " \ - f"mean: {self.mean}, standard deviation: {self.stdev}" + return ( + f"minimum: {self.a}, maximum: {self.b}, mode: {self.mode}, " + f"mean: {self.mean}, standard deviation: {self.stdev}" + ) class TriangularValueException(Exception): """ - Defines Exception raised when minimum, maximum or mode values are equal - and TriangularDistribution instance cannot be created + Defines Exception raised when minimum, maximum or mode values are equal + and TriangularDistribution instance cannot be created - Attributes: - message (str): Error message to return + Attributes: + message (str): Error message to return """ def __init__(self, msg=None): diff --git a/probdists/Uniformdistribution.py b/probdists/Uniformdistribution.py index 2fe2ef8..f9c0237 100644 --- a/probdists/Uniformdistribution.py +++ b/probdists/Uniformdistribution.py @@ -1,10 +1,11 @@ import math import matplotlib.pyplot as plt from .Generaldistribution import Distribution +import seaborn as sns class Uniform(Distribution): - """ Uniform distribution class for calculating and + """Uniform distribution class for calculating and visualizing a Uniform distribution. Attributes: @@ -17,26 +18,29 @@ class Uniform(Distribution): def __init__(self, low=0, high=10): if low == high: - raise Exception('Invalid interval - start and end of interval cannot be the same') + raise Exception( + "Invalid interval - start and end of interval cannot be the same" + ) self.low = low self.high = high - Distribution.__init__(self, self.calculate_mean(), - self.calculate_stdev()) + Distribution.__init__(self, self.calculate_mean(), self.calculate_stdev()) def replace_stats_with_data(self): """Function to calculate low and high from the data set - # Args: - None + # Args: + None - Returns: - float: the low value - float: the high value - """ + Returns: + float: the low value + float: the high value + """ self.low = min(self.data) self.high = max(self.data) if self.low == self.high: - raise Exception('Invalid interval - start and end of interval cannot be the same') + raise Exception( + "Invalid interval - start and end of interval cannot be the same" + ) self.calculate_mean() self.calculate_stdev() return self.low, self.high @@ -75,17 +79,17 @@ def calculate_stdev(self, sample=True, round_to=2): def calculate_cdf(self, x, round_to=2): """Cumulative distribution function calculator for the uniform distribution. - Args: - x (float): point for calculating the - cumulative distribution function + Args: + x (float): point for calculating the + cumulative distribution function - Returns: - float: cumulative distribution function output - """ + Returns: + float: cumulative distribution function output + """ if x < self.low: self.cdf = 0 - elif self.low<=x<=self.high: - self.cdf = (x - self.low)/(self.high-self.low) + elif self.low <= x <= self.high: + self.cdf = (x - self.low) / (self.high - self.low) else: self.cdf = 1 @@ -93,32 +97,32 @@ def calculate_cdf(self, x, round_to=2): def plot_histogram(self): """Function to output a histogram of the instance variable data using - matplotlib pyplot library. - - Args: - None - - Returns: - None - """ - plt.hist(self.data) - plt.title("Histogram of Data") - plt.xlabel("data") - plt.ylabel("count") + seaborn library. + + Args: + None + + Returns: + None + """ + + sns.histplot(data=self.data).set( + xlabel="data", ylabel="count", title="Histogram of data" + ) plt.show() def calculate_pdf(self, x, round_to=2): """Probability density function calculator for the uniform distribution. - Args: - x (float): point for calculating the - probability density function - round_to (int): Round the mean value. [Default value: 2 floating point] + Args: + x (float): point for calculating the + probability density function + round_to (int): Round the mean value. [Default value: 2 floating point] - Returns: - float: probability density function output + Returns: + float: probability density function output """ - self.pdf = 1/(self.high-self.low) if self.high >= x >= self.low else 0 + self.pdf = 1 / (self.high - self.low) if self.high >= x >= self.low else 0 return round(self.pdf, round_to) def plot_bar_pdf(self): @@ -136,16 +140,17 @@ def plot_bar_pdf(self): y = [] # calculate the x values to visualize - for i in range(int(self.low)-5, int(self.high)+5): + for i in range(int(self.low) - 5, int(self.high) + 5): x.append(i) self.calculate_pdf(i) y.append(self.pdf) # make the plots - plt.bar(x, y) - plt.title('Probability Density for Uniform Distribution') - plt.ylabel('Probability') - plt.xlabel('x') + sns.barplot(x=x, y=y).set( + title="Probability Density for Uniform Distribution", + ylabel="Probability", + xlabel="x", + ) plt.show() @@ -154,12 +159,12 @@ def plot_bar_pdf(self): def __repr__(self): """Function to output the characteristics of the Uniform instance - Args: - None + Args: + None - Returns: - string: characteristics of the Uniform distribution + Returns: + string: characteristics of the Uniform distribution - """ + """ return "mean {}, standard deviation {}".format(self.mean, self.stdev) diff --git a/probdists/__init__.py b/probdists/__init__.py index 664a26b..fa237e0 100644 --- a/probdists/__init__.py +++ b/probdists/__init__.py @@ -6,3 +6,4 @@ from .Bernoullidistribution import Bernoulli from .Uniformdistribution import Uniform from .Triangulardistribution import Triangular, TriangularValueException +from .Chi_squareDistribution import Chi_squareDistribution diff --git a/probdists/numbers.xls b/probdists/numbers.xls index 467dd88..c6b6f4c 100644 Binary files a/probdists/numbers.xls and b/probdists/numbers.xls differ diff --git a/probdists/numbers_chiSquare.csv b/probdists/numbers_chiSquare.csv new file mode 100644 index 0000000..798bca3 --- /dev/null +++ b/probdists/numbers_chiSquare.csv @@ -0,0 +1,13 @@ +category,observed,expected +1,29,21.33 +2,24,21.33 +3,22,21.33 +4,19,21.33 +5,21,21.33 +6,18,21.33 +7,19,21.33 +8,20,21.33 +9,23,21.33 +10,18,21.33 +11,20,21.33 +12,23,21.33 diff --git a/requirements.txt b/requirements.txt index bbec779..c74652b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,22 @@ -probdists==1.9 -certifi==2020.6.20 -cycler==0.10.0 -kiwisolver==1.3.0 -matplotlib==3.3.2 -numpy==1.19.3 -pandas==1.1.3 -Pillow==8.0.1 -pyparsing==2.4.7 -python-dateutil==2.8.1 -pytz==2020.1 -six==1.15.0 -xlrd==1.2.0 +certifi==2020.6.20 +click==8.0.1 +colorama==0.4.4 +cycler==0.10.0 +kiwisolver==1.3.0 +matplotlib==3.3.2 +mypy-extensions==0.4.3 +numpy==1.19.3 +pandas==1.1.3 +pathspec==0.9.0 +Pillow==8.0.1 +platformdirs==2.4.0 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2020.1 +regex==2021.9.30 +scipy==1.5.4 +seaborn==0.11.2 +six==1.15.0 +tomli==1.2.1 +typing-extensions==3.10.0.2 +xlrd==1.2.0 diff --git a/setup.py b/setup.py index 42734ad..9a54d19 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,8 @@ ], python_requires='>=3.6', install_requires=[ - 'matplotlib' + 'matplotlib', + 'seaborn' ], zip_safe=False )