Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additions to the python interface #33

Merged
merged 7 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 109 additions & 14 deletions CLUEstering/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,32 +71,38 @@ def test_blobs(n_samples: int, n_dim: int , n_blobs: int = 4, mean: float = 0,
centers = []
if n_dim == 2:
data = {'x0': np.array([]), 'x1': np.array([]), 'weight': np.array([])}
centers = [[x_max * rnd.random(), y_max * rnd.random()] for _ in range(n_blobs)]
blob_data = make_blobs(n_samples=n_samples, centers=np.array(centers))[0]
centers = [[x_max * rnd.random(),
y_max * rnd.random()] for _ in range(n_blobs)]
blob_data = make_blobs(n_samples=n_samples,
centers=np.array(centers))[0]

data['x0'] = blob_data.T[0]
data['x1'] = blob_data.T[1]
data['weight'] = np.full(shape=len(blob_data.T[0]), fill_value=1)


return pd.DataFrame(data)
if n_dim == 3:
data = {'x0': [], 'x1': [], 'x2': [], 'weight': []}
sqrt_samples = int(sqrt(n_samples))
z_values = np.random.normal(mean, sigma,sqrt_samples)
centers = [[x_max * rnd.random(), y_max * rnd.random()] for _ in range(n_blobs)]
z_values = np.random.normal(mean, sigma, sqrt_samples)
centers = [[x_max * rnd.random(),
y_max * rnd.random()] for _ in range(n_blobs)]

for value in z_values: # for every z value, a layer is generated.
blob_data = make_blobs(n_samples=sqrt_samples, centers=np.array(centers))[0]
for value in z_values: # for every z value, a layer is generated.
blob_data = make_blobs(n_samples=sqrt_samples,
centers=np.array(centers))[0]
data['x0'] = np.concatenate([data['x0'], blob_data.T[0]])
data['x1'] = np.concatenate([data['x1'], blob_data.T[1]])
data['x2'] = np.concatenate([data['x2'], np.full(shape=sqrt_samples,
fill_value=value)])
data['weight'] = np.concatenate([data['weight'], np.full(shape=sqrt_samples,
fill_value=1)])
data['x2'] = np.concatenate([data['x2'],
np.full(shape=sqrt_samples,
fill_value=value)])
data['weight'] = np.concatenate([data['weight'],
np.full(shape=sqrt_samples,
fill_value=1)])

return pd.DataFrame(data)


@dataclass()
class clustering_data:
"""
Expand Down Expand Up @@ -217,7 +223,14 @@ def __init__(self, dc_: float, rhoc_: float, outlier_: float, ppbin: int = 10):
self.clust_prop = None
self.elapsed_time = 0.

def _read_array(self, input_data: Union[list,np.ndarray]) -> None:
def set_params(self, dc: float, rhoc: float,
outlier: float, ppbin: int = 10) -> None:
self.dc_ = dc
self.rhoc = rhoc
self.outlier = outlier
self.ppbin = ppbin

def _read_array(self, input_data: Union[list, np.ndarray]) -> None:
"""
Reads data provided with lists or np.ndarrays

Expand Down Expand Up @@ -281,7 +294,7 @@ def _read_string(self, input_data: str) -> Union[pd.DataFrame,None]:
df_ = pd.read_csv(input_data)
return df_

def _read_dict_df(self, input_data: Union[dict,pd.DataFrame]) -> pd.DataFrame:
def _read_dict_df(self, input_data: Union[dict, pd.DataFrame]) -> pd.DataFrame:
"""
Reads data provided using dictionaries or pandas dataframes

Expand Down Expand Up @@ -504,6 +517,42 @@ def choose_kernel(self,
raise ValueError("Invalid kernel. The allowed choices for the"
+ " kernels are: flat, exp, gaus and custom.")

# getters for the properties of the clustering data
@property
def coords(self) -> np.ndarray:
'''
Returns the coordinates of the points used for clustering.
'''
return self.clust_data.coords

@property
def original_coords(self) -> np.ndarray:
'''
Returns the original, non-normalized coordinates.
'''
return self.clust_data.originalcoords

@property
def weight(self) -> np.ndarray:
'''
Returns the weight of the points.
'''
return self.clust_data.weight

@property
def n_dim(self) -> int:
'''
Returns the number of dimensions of the points.
'''
return self.clust_data.n_dim

@property
def n_points(self) -> int:
'''
Returns the number of points in the dataset.
'''
return self.clust_data.n_points

def list_devices(self, backend: str = "all") -> None:
"""
Lists the devices available for the chosen backend.
Expand Down Expand Up @@ -667,7 +716,53 @@ def run_clue(self,
print(f'CLUE executed in {self.elapsed_time} ms')
print(f'Number of clusters found: {self.clust_prop.n_clusters}')

def input_plotter(self, plot_title: str='', title_size: float = 16,
# getters for the properties of the clusters
@property
def n_clusters(self) -> int:
'''
Returns the number of clusters found.
'''

return self.clust_prop.n_clusters

@property
def cluster_ids(self) -> np.ndarray:
'''
Returns the index of the cluster to which each point belongs.
'''
return self.clust_prop.cluster_ids

@property
def is_seed(self) -> np.ndarray:
'''
Returns an array of integers containing '1' if a point is a seed
and '0' if it isn't.
'''
return self.clust_prop.is_seed

@property
def cluster_points(self) -> np.ndarray:
'''
Returns an array containing, for each cluster, the list of its points.
'''
return self.clust_prop.cluster_points

@property
def points_per_cluster(self) -> np.ndarray:
'''
Returns an array containing the number of points belonging to each cluster.
'''
return self.clust_prop.points_per_cluster

@property
def output_df(self) -> pd.DataFrame:
'''
Returns a dafaframe containing the cluster_ids and the is_seed values.
'''
return self.clust_prop.output_df


def input_plotter(self, plot_title: str = '', title_size: float = 16,
x_label: str = 'x', y_label: str = 'y', z_label: str = 'z',
label_size: float = 16, pt_size: float = 1, pt_colour: str = 'b',
grid: bool = True, grid_style: str = '--', grid_size: float = 0.2,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from setuptools import setup

__version__ = "2.1.3"
__version__ = "2.2.0"
this_directory = Path(__file__).parent
long_description = (this_directory/'README.md').read_text()

Expand Down
Loading