Skip to content

Commit

Permalink
Implemented a first heuristic for splitting clusters, resolves #3
Browse files Browse the repository at this point in the history
  • Loading branch information
phspo committed Jan 15, 2018
1 parent 4f4a201 commit 743dd79
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/ClusterEditingInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ bool ClusterEditingInstance::isDirty() const {
return _workingCopyInstance->isDirty();
}

int ClusterEditingInstance::getSize() const{
unsigned int ClusterEditingInstance::getSize() const{
return _orig.maxNodeId()+1;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ClusterEditingInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class ClusterEditingInstance {

std::vector<int>* getCluster(lemon::FullGraph::Node u) const;

int getSize() const;
unsigned int getSize() const;

friend std::ostream& operator <<(std::ostream &os, ClusterEditingInstance &inst);

Expand Down
4 changes: 3 additions & 1 deletion src/ClusterEditingSolutions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,13 @@ namespace ysk {
}

void ClusterEditingSolutions::printSolution(size_t index){
int idx = 0;
for(auto &entry : _solutions[index]){
cout << "Cluster: ";
cout << "Cluster " << idx << ": ";
for (auto i = entry.begin(); i != entry.end(); ++i)
cout << *i<<" ";
cout <<endl;
idx ++;
}
cout << endl;
}
Expand Down
1 change: 1 addition & 0 deletions src/CoreAlgorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ namespace ysk {
//Heuristic K-Cluster postprocessing if desired

if (_parameter.targetClusterCount != -1 && _parameter.useHeuristic){
//TODO: Recursively attempt to merge or split, there are instances where a better score can be reached!
if (verbosity >= 2)
cout << "Aiming for the following cluster count: "<<_parameter.targetClusterCount << endl;
//Generate a new k-clustifier instance
Expand Down
96 changes: 93 additions & 3 deletions src/KClustifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* KClustifier.cpp
*
* Created on: Dec 18, 2017
* Author: philipp
* Author: Philipp Spohr
*/

#include "KClustifier.h"
Expand Down Expand Up @@ -59,7 +59,96 @@ void KClustifier::kClustify(unsigned int k, size_t solutionID){
cout << "Not enough clusters generated (" << solution.size() << "), we need: " << k <<endl;
}
KClustifier::calculateLowerBoundSplitCosts(solution);
//We then simply split until we reach the required number of clusters
while (solution.size()<k){
KClustifier::splitCheapest(solution);
if (verbosity > 4 ){
_solutions->printSolution(solutionID);
}
}
}
}

void KClustifier::splitCheapest(vector<vector<int>>& solution){
//Pick the minimum first
Separation cheapestSeparation;
cheapestSeparation.cost = std::numeric_limits<double>::infinity();
int targetCluster = -1;

for (auto const &entry : _lowerBoundSplitCosts){
if (entry.second.cost < cheapestSeparation.cost){
cheapestSeparation = entry.second;
targetCluster = entry.first;
}
}

if (targetCluster == -1){
cerr << "Critical Error: Could not find a valid split, should not happen at this step!" << endl;
exit(-1);
}

if (verbosity > 4){
cout << "Splitting cluster: " << targetCluster << endl;
cout << "Into: " << endl;
cout << "Nodes: " <<endl;
for (vector<int>::iterator sep1 = cheapestSeparation.cluster1.begin(); sep1 != cheapestSeparation.cluster1.end(); ++sep1){
cout << *sep1 << " ";
}
cout << endl;

cout << "Nodes: "<<endl;
for (vector<int>::iterator sep2 = cheapestSeparation.cluster2.begin(); sep2 != cheapestSeparation.cluster2.end(); ++sep2){
cout << *sep2 << " ";
}
cout << endl <<endl;
}

//Delete the original cluster and add the new ones, remember the positions
solution.erase(solution.begin()+targetCluster);
int indexCluster1 = solution.size();
solution.push_back(cheapestSeparation.cluster1);
int indexCluster2 = solution.size();
solution.push_back(cheapestSeparation.cluster2);

_editingCosts += cheapestSeparation.cost;

//We need to add the edge weights between all the other nodes which we ignored in the heuristic step
for (auto const &node1 : cheapestSeparation.cluster1){
for (auto const &node2 : cheapestSeparation.cluster2){
FullGraph::Edge edge = _instance->getOrig().findEdge(_instance->getOrig().nodeFromId(node1) , _instance->getOrig().nodeFromId(node2) , INVALID);
_editingCosts += _instance->getWeight(edge);
}
}

//Update the lower bound splitting table for further splits

//Define a new map
std::map<int,Separation> newBounds = std::map<int,Separation>();

//Copy and modify entries as appropriate
for (auto const &entry : _lowerBoundSplitCosts){
if (entry.first < targetCluster){ //Copy such entries
newBounds[entry.first] = entry.second;
}
else if (entry.first == targetCluster){
//This entry is obsolete as the cluster no longer exists
}
else if (entry.first > targetCluster){
newBounds[entry.first - 1] = entry.second;
}

}

//Now we need entries for our two new clusters

newBounds[indexCluster1] = suggestSeparation(cheapestSeparation.cluster1);
newBounds[indexCluster2] = suggestSeparation(cheapestSeparation.cluster2);

//And replace it

_lowerBoundSplitCosts = newBounds;


}

void KClustifier::mergeCheapest(vector<vector<int>>& solution){
Expand Down Expand Up @@ -171,13 +260,13 @@ void KClustifier::calculateLowerBoundSplitCosts(vector<vector<int>>& solution){

cout << "Nodes: ";
for (vector<int>::iterator sep1 = suggestedSeparation.cluster1.begin(); sep1 != suggestedSeparation.cluster1.end(); ++sep1){
cout << _instance->getNodeName(_instance->getOrig().nodeFromId(*sep1));
cout << *sep1<< " ";
}
cout << endl;

cout << "Nodes: ";
for (vector<int>::iterator sep2 = suggestedSeparation.cluster2.begin(); sep2 != suggestedSeparation.cluster2.end(); ++sep2){
cout << _instance->getNodeName(_instance->getOrig().nodeFromId(*sep2));
cout << *sep2 << " ";
}
cout << endl;

Expand All @@ -204,6 +293,7 @@ Separation KClustifier::suggestSeparation(vector<int>& cluster){
for (vector<int>::iterator it = cluster.begin(); it != cluster.end(); ++it){
FullGraph::Node node1 = _instance->getOrig().nodeFromId(*it);
for (vector<int>::iterator it2 = it; it2 != cluster.end(); ++it2){
if (it == it2) continue;

//We initialize a new separation
Separation separation;
Expand Down
2 changes: 1 addition & 1 deletion src/KClustifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class KClustifier {
void printMergeCosts();

Separation suggestSeparation(std::vector<int>& cluster);

void splitCheapest(std::vector<std::vector<int>>& solution);


std::map<std::pair<int,int>,double> _mergeCosts;
Expand Down

0 comments on commit 743dd79

Please sign in to comment.