Title: | Detecting Class Overlapping Regions in Multidimensional Data |
---|---|
Description: | The issue of overlapping regions in multidimensional data arises when different classes or clusters share similar feature representations, making it challenging to delineate distinct boundaries between them accurately. This package provides methods for detecting and visualizing these overlapping regions using partitional clustering techniques based on nearest neighbor distances. |
Authors: | Priyanga Dilini Talagala [aut, cre, cph] |
Maintainer: | Priyanga Dilini Talagala <[email protected]> |
License: | GPL-3 |
Version: | 0.1.0 |
Built: | 2024-11-03 04:35:39 UTC |
Source: | https://github.com/pridiltal/clap |
The issue of overlapping regions in multidimensional data arises when different classes or clusters share similar feature representations, making it challenging to delineate distinct boundaries between them accurately. This package provides methods for detecting and visualizing these overlapping regions using partitional clustering techniques based on nearest neighbor distances.
Priyanga Dilini Talagala _PACKAGE
This function computes the cluster composition based on the input object of class 'clap' returned by perform_clustering function. It merges the data and cluster assignments, computes cluster composition statistics including counts, IDs, and percentages, and filters based on the specified percentage threshold.
compute_cluster_composition(x)
compute_cluster_composition(x)
x |
An object of class 'clap' returned by perform_clustering function, containing members (list of clusters), cluster_df (data frame of cluster assignments), and the original dataset. |
filtered data frame summarizing cluster composition with class 'clap'.
if (requireNamespace("ggplot2", quietly = TRUE)) { # Generate dummy data class1 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(1, 1), each = 50), ncol = 2) class2 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(-1, -1), each = 50), ncol = 2) datanew <- rbind(class1, class2) training <- data.frame(datanew, class = factor(c(rep(1, 50), rep(2, 50)))) # Plot the dummy data to visualize overlaps p <- ggplot2::ggplot(training, ggplot2::aes(x = X1, y = X2, color = class)) + ggplot2::geom_point() + ggplot2::labs(title = "Dummy Data with Overlapping Classes") print(p) # Perform clustering cluster_result <- perform_clustering(training, class_column = class) # Compute cluster composition composition <- compute_cluster_composition(cluster_result) }
if (requireNamespace("ggplot2", quietly = TRUE)) { # Generate dummy data class1 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(1, 1), each = 50), ncol = 2) class2 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(-1, -1), each = 50), ncol = 2) datanew <- rbind(class1, class2) training <- data.frame(datanew, class = factor(c(rep(1, 50), rep(2, 50)))) # Plot the dummy data to visualize overlaps p <- ggplot2::ggplot(training, ggplot2::aes(x = X1, y = X2, color = class)) + ggplot2::geom_point() + ggplot2::labs(title = "Dummy Data with Overlapping Classes") print(p) # Perform clustering cluster_result <- perform_clustering(training, class_column = class) # Compute cluster composition composition <- compute_cluster_composition(cluster_result) }
This function extracts IDs from a data frame containing filtered composition data and converts them into a numeric vector.
extract_ids_vector(composition)
extract_ids_vector(composition)
composition |
An object of class 'clap' returned by 'compute_cluster_composition' function, containing cluster composition data including IDs. |
A numeric vector of IDs.
if (requireNamespace("ggplot2", quietly = TRUE)) { # Generate dummy data class1 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(1, 1), each = 50), ncol = 2) class2 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(-1, -1), each = 50), ncol = 2) datanew <- rbind(class1, class2) training <- data.frame(datanew, class = factor(c(rep(1, 50), rep(2, 50)))) # Plot the dummy data to visualize overlaps p <- ggplot2::ggplot(training, ggplot2::aes(x = X1, y = X2, color = class)) + ggplot2::geom_point() + ggplot2::labs(title = "Dummy Data with Overlapping Classes") print(p) # Perform clustering cluster_result <- perform_clustering(training, class_column = class) # Compute cluster composition composition <- compute_cluster_composition(cluster_result) # Extract IDs to numeric vector ids_vector <- extract_ids_vector(composition) # Subset data based on extracted IDs overlapdata <- training[ids_vector, ] # Plot overlapping data points p2 <- p + ggplot2::geom_point(data = overlapdata, ggplot2::aes(X1, X2), colour = "black") print(p2) }
if (requireNamespace("ggplot2", quietly = TRUE)) { # Generate dummy data class1 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(1, 1), each = 50), ncol = 2) class2 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(-1, -1), each = 50), ncol = 2) datanew <- rbind(class1, class2) training <- data.frame(datanew, class = factor(c(rep(1, 50), rep(2, 50)))) # Plot the dummy data to visualize overlaps p <- ggplot2::ggplot(training, ggplot2::aes(x = X1, y = X2, color = class)) + ggplot2::geom_point() + ggplot2::labs(title = "Dummy Data with Overlapping Classes") print(p) # Perform clustering cluster_result <- perform_clustering(training, class_column = class) # Compute cluster composition composition <- compute_cluster_composition(cluster_result) # Extract IDs to numeric vector ids_vector <- extract_ids_vector(composition) # Subset data based on extracted IDs overlapdata <- training[ids_vector, ] # Plot overlapping data points p2 <- p + ggplot2::geom_point(data = overlapdata, ggplot2::aes(X1, X2), colour = "black") print(p2) }
Perform clustering based on nearest neighbor distances
perform_clustering(data, class_column = NULL)
perform_clustering(data, class_column = NULL)
data |
A numeric matrix or data frame of data points. |
class_column |
A character string or unquoted name specifying the name of the column containing class labels. |
This function first removes the specified class column from the data, calculates the nearest neighbor distances, and then performs clustering using a radius based on the maximum nearest neighbor distance.
An object of class 'clap' containing:
A list of clusters with their respective data point IDs.
A data frame with cluster assignments for each data point.
The original dataset.
if (requireNamespace("ggplot2", quietly = TRUE)) { # Generate dummy data class1 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(1, 1), each = 50), ncol = 2) class2 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(-1, -1), each = 50), ncol = 2) datanew <- rbind(class1, class2) training <- data.frame(datanew, class = factor(c(rep(1, 50), rep(2, 50)))) # Plot the dummy data to visualize overlaps p <- ggplot2::ggplot(training, ggplot2::aes(x = X1, y = X2, color = class)) + ggplot2::geom_point() + ggplot2::labs(title = "Dummy Data with Overlapping Classes") print(p) # Perform clustering cluster_result <- perform_clustering(training, class_column = class) }
if (requireNamespace("ggplot2", quietly = TRUE)) { # Generate dummy data class1 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(1, 1), each = 50), ncol = 2) class2 <- matrix(rnorm(100, mean = 0, sd = 1), ncol = 2) + matrix(rep(c(-1, -1), each = 50), ncol = 2) datanew <- rbind(class1, class2) training <- data.frame(datanew, class = factor(c(rep(1, 50), rep(2, 50)))) # Plot the dummy data to visualize overlaps p <- ggplot2::ggplot(training, ggplot2::aes(x = X1, y = X2, color = class)) + ggplot2::geom_point() + ggplot2::labs(title = "Dummy Data with Overlapping Classes") print(p) # Perform clustering cluster_result <- perform_clustering(training, class_column = class) }