Clustering Propagation Snapshots
import pandas as pd
import time
import os.path
import glob
import warnings
warnings.filterwarnings('ignore')
# install DenMune clustering algorithm using pip command from the offecial Python repository, PyPi
# from https://pypi.org/project/denmune/
!pip install denmune
# then import it
from denmune import DenMune
# clone datasets from our repository datasets
if not os.path.exists('datasets'):
!git clone https://github.com/egy1st/datasets
Cloning into 'datasets'...
remote: Enumerating objects: 52, done.[K
remote: Counting objects: 100% (52/52), done.[K
remote: Compressing objects: 100% (43/43), done.[K
remote: Total 52 (delta 8), reused 49 (delta 8), pack-reused 0[K
Unpacking objects: 100% (52/52), done.
#@title { run: "auto", vertical-output: true, form-width: "50%" }
dataset = "t7.10k" #@param ["t4.8k", "t5.8k", "t7.10k", "t8.8k"]
show_noize_checkbox = True #@param {type:"boolean"}
data_path = 'datasets/denmune/chameleon/'
# train file
data_file = data_path + dataset +'.csv'
X_train = pd.read_csv(data_file, sep=',', header=None)
from itertools import chain
# Denmune's Paramaters
knn = 39 # number of k-nearest neighbor, the only parameter required by the algorithm
# create list of differnt snapshots of the propagation
snapshots = chain([0], range(2,5), range(5,50,5), range(50, 100, 10), range(100,500,50), range(500,1000, 100), range(1000,3000, 250),range(3000,5500,500))
from IPython.display import clear_output
for snapshot in snapshots:
print ("itration", snapshot )
#clear_output(wait=True)
dm = DenMune(train_data=X_train, k_nearest=knn, rgn_tsne=False, prop_step=snapshot)
labels, validity = dm.fit_predict(show_analyzer=False, show_noise=False)
itration 0
itration 2
itration 3
itration 4
itration 5
itration 10
itration 15
itration 20
itration 25
itration 30
itration 35
itration 40
itration 45
itration 50
itration 60
itration 70
itration 80
itration 90
itration 100
itration 150
itration 200
itration 250
itration 300
itration 350
itration 400
itration 450
itration 500
itration 600
itration 700
itration 800
itration 900
itration 1000
itration 1250
itration 1500
itration 1750
itration 2000
itration 2250
itration 2500
itration 2750
itration 3000
itration 3500
itration 4000
itration 4500
itration 5000
from PIL import Image
# collect immages for each snapshot automatically by the algorithm in a folder named propagation
images = []
prop_folder = 'propagation'
img_files = os.listdir(prop_folder)
img_files = [os.path.join(prop_folder, f) for f in img_files]
sorted_files = sorted (img_files, key=os.path.getmtime)
for filename in sorted_files:
im = Image.open(filename)
images.append(im)
# create annimated gif to show evolution of the propagation
images[0].save('propagation.gif', save_all=True, append_images=images[1:], optimize=False, duration=800, loop=1)