Chameleon Dataset

import pandas as pd
import time
import os.path

import warnings
warnings.filterwarnings('ignore')
# install DenMune clustering algorithm using pip command from the offecial Python repository, PyPi
# from https://pypi.org/project/denmune/
!pip install denmune

# then import it
from denmune import DenMune
# clone datasets from our repository datasets
if not os.path.exists('datasets'):
  !git clone https://github.com/egy1st/datasets
Cloning into 'datasets'...
remote: Enumerating objects: 52, done.
remote: Counting objects: 100% (52/52), done.
remote: Compressing objects: 100% (43/43), done.
remote: Total 52 (delta 8), reused 49 (delta 8), pack-reused 0
Unpacking objects: 100% (52/52), done.
data_path = 'datasets/denmune/chameleon/'

#@title  { run: "auto", vertical-output: true, form-width: "50%" }
chameleon_dataset = "t7.10k" #@param ["t4.8k", "t5.8k", "t7.10k", "t8.8k"]
show_noize_checkbox = True #@param {type:"boolean"}
show_analyzer_checkbox = True #@param {type:"boolean"}
k_nearest_slider = 39 #@param {type:"slider", min:1, max:100, step:1}

# train file
data_file = data_path + chameleon_dataset + '.csv'
X_train = pd.read_csv(data_file, sep=',', header=None)

dm = DenMune(train_data=X_train, k_nearest=k_nearest_slider, rgn_tsne=False)
labels, validity = dm.fit_predict(show_noise=show_noize_checkbox,
                                  show_analyzer=show_analyzer_checkbox)
Plotting train data
../_images/output_4_14.png
Validating train data
├── exec_time
│   ├── DenMune: 9.7
│   ├── NGT: 0.612
│   └── t_SNE: 0
├── n_clusters
│   ├── actual: 0
│   └── detected: 9
└── n_points
    ├── dim: 2
    ├── noise
    │   ├── type-1: 0
    │   └── type-2: 516
    ├── plot_size: 10000
    ├── size: 10000
    ├── strong: 5860
    └── weak
        ├── all: 4140
        ├── failed to merge: 516
        └── succeeded to merge: 3624
data_path = 'datasets/denmune/chameleon/'

#@title  { run: "auto", vertical-output: true, form-width: "50%" }
chameleon_dataset = "clusterable" #@param ["t4.8k", "t5.8k", "t7.10k", "t8.8k", "clusterable"]
show_noize_checkbox = True #@param {type:"boolean"}
show_analyzer_checkbox = True #@param {type:"boolean"}
k_nearest_slider = 24 #@param {type:"slider", min:1, max:100, step:1}

# train file
data_file = data_path + chameleon_dataset + '.csv'
X_train = pd.read_csv(data_file, sep=',', header=None)

dm = DenMune(train_data=X_train, k_nearest=k_nearest_slider, rgn_tsne=False)
labels, validity = dm.fit_predict(show_noise=show_noize_checkbox,
                                  show_analyzer=show_analyzer_checkbox)
Plotting train data
../_images/output_5_14.png
Validating train data
├── exec_time
│   ├── DenMune: 1.393
│   ├── NGT: 0.121
│   └── t_SNE: 0
├── n_clusters
│   ├── actual: 0
│   └── detected: 8
└── n_points
    ├── dim: 2
    ├── noise
    │   ├── type-1: 0
    │   └── type-2: 141
    ├── plot_size: 2309
    ├── size: 2309
    ├── strong: 1352
    └── weak
        ├── all: 957
        ├── failed to merge: 141
        └── succeeded to merge: 816