From 4b1950295e4ea10a6f229f64f5526af1e008de12 Mon Sep 17 00:00:00 2001 From: Philipp Spohr Date: Mon, 15 Jan 2018 13:57:29 +0100 Subject: [PATCH] work on silhouette value, refs #4 --- CMakeLists.txt | 4 + src/main.cpp | 16 +++- src/measurement/SilhouetteValue.cpp | 111 ++++++++++++++++++++++++++++ src/measurement/SilhouetteValue.h | 35 +++++++++ src/output/ClusterEditingOutput.h | 2 +- 5 files changed, 166 insertions(+), 2 deletions(-) create mode 100644 src/measurement/SilhouetteValue.cpp create mode 100644 src/measurement/SilhouetteValue.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c6f02ca..a902cc5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,8 @@ set( Yoshiko_Hdr src/WorkingCopyInstance.h src/input/ClusterEditingInput.h + + src/measurement/SilhouetteValue.h src/reductionRules/AlmostCliqueRule.h src/reductionRules/CliqueRule.h @@ -57,6 +59,8 @@ set( Yoshiko_Src src/WorkingCopyInstance.cpp src/input/ClusterEditingInput.cpp + + src/measurement/SilhouetteValue.cpp src/reductionRules/AlmostCliqueRule.cpp src/reductionRules/CliqueRule.cpp diff --git a/src/main.cpp b/src/main.cpp index 6eca951..3fae365 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,6 +17,7 @@ #include "ClusterEditingSolutions.h" #include "CoreAlgorithm.h" #include "Globals.h" +#include "measurement/SilhouetteValue.h" #include "output/ClusterEditingOutput.h" @@ -74,6 +75,7 @@ int main(int argc, char * const argv[]) { int inputFileFormat = 0; int outputFileFormat = 0; bool exportLP = false; + bool printSilhouetteValue = false; YParameterSet parameter; @@ -95,7 +97,9 @@ int main(int argc, char * const argv[]) { ap.refOption("m", "multiplicative factor for real valued edge weights in SimilarNeighborhoodRule (the higher the better the reduction results and the slower the performance) [1]", parameter.multiplicativeFactor, false); ap.refOption("g", "graph label []", graphLabel, false); ap.refOption("r", "explicitly turn on/off reduction rules, bit string (right to left): bit 0 = CliqueRule, bit 1 = CriticalCliqueRule, bit 2 = AlmostCliqueRule, bit 3 = HeavyEdgeRule3in1, bit 4 = ParameterDependentReductionRule, bit 5 = SimilarNeighborhoodRule [111111]", parameter.rulesBitMask, false); - ap.refOption("k", "[EXPERIMENTAL!!!]define the number of desired clusters, -1 determines this value automatically [-1]",parameter.targetClusterCount,false); + ap.refOption("k", "[EXPERIMENTAL!!!] Define the number of desired clusters, -1 determines this value automatically [-1]",parameter.targetClusterCount,false); + ap.refOption("s", "[EXPERIMENTAL!!!] Prints the silhouette value at the end of the run",printSilhouetteValue,false); + // Perform the parsing process // (in case of any error it terminates the program) -> tb improved @@ -126,6 +130,8 @@ int main(int argc, char * const argv[]) { std::cout << " -g: " << graphLabel << std::endl; std::cout << " -r: " << parameter.rulesBitMask << std::endl; std::cout << " -k: " << parameter.targetClusterCount << std::endl; + std::cout << " -s: " << printSilhouetteValue << std::endl; + } ifstream is(inputFilename.c_str()); @@ -177,6 +183,14 @@ int main(int argc, char * const argv[]) { ClusterEditingSolutions* ces = core->run(); + //Print Silhouette Value if required + //TODO: Make accessible from library, add support for printing it to certain output formats if required + if (printSilhouetteValue){ + for (unsigned int i = 0; i < ces->getNumberOfSolutions(); i++){ + cout << "Silhouette Value: " << SilhouetteValue(instance,ces->getSolution(i)).getValue() << endl; + } + } + //Output generation ClusterEditingOutput* output; output = ClusterEditingOutput::newInstance( diff --git a/src/measurement/SilhouetteValue.cpp b/src/measurement/SilhouetteValue.cpp new file mode 100644 index 0000000..97e8700 --- /dev/null +++ b/src/measurement/SilhouetteValue.cpp @@ -0,0 +1,111 @@ +/* + * SilhouetteValue.cpp + * + * Created on: Jan 15, 2018 + * Author: philipp + */ + +#include "SilhouetteValue.h" + +using namespace std; +using namespace lemon; + +namespace ysk{ + +//TODO: Experimental Stuff, makes 0 sense right now as edges are not a distance function + + SilhouetteValue::~SilhouetteValue(){}; + + double SilhouetteValue::getValue(){ + //Calculate average dissimilarities to own clusters (a(i)) + map _averageDissimilaritiesOwn = map(); + for(auto const &cluster : _solution){ + for (auto const &node : cluster){ + double dissim = 1.0/cluster.size(); + //Sum Factor + double sum = 0.0; + for (auto const &otherNode : cluster){ + if (node == otherNode) continue; //Not of interest + FullGraph::Edge edge = _instance->getOrig().findEdge( + _instance->getOrig().nodeFromId(node), + _instance->getOrig().nodeFromId(otherNode), + INVALID); + sum += _instance->getWeight(edge); + } + //cout << dissim << " " << sum << endl; + dissim *= sum; + _averageDissimilaritiesOwn[node]=dissim; + + } + } + +// if (verbosity > 5){ +// cout << "Average Dissimilarities to own clusters" << endl << endl; +// for (auto const &cluster : _solution){ +// for (auto const &node: cluster){ +// cout << node << " : " << _averageDissimilaritiesOwn[node]<< endl; +// } +// } +// } + + //Calculate average dissimilarities to other clusters (b(i)) + map _averageDissimilaritiesForeign= map(); + for(auto const &cluster : _solution){ + for (auto const &node : cluster){ + double dissim = 1.0/(_instance->getSize()-cluster.size()); + //Sum Factor + double sum = 0.0; + for (auto const &otherCluster : _solution){ + for (auto const &otherNode : otherCluster){ + FullGraph::Edge edge = _instance->getOrig().findEdge( + _instance->getOrig().nodeFromId(node), + _instance->getOrig().nodeFromId(otherNode), + INVALID); + sum += _instance->getWeight(edge); + } + } + dissim *= sum; + _averageDissimilaritiesForeign[node]=dissim; + + } + } + + if (verbosity > 4){ + cout << "Average Dissimilarities to other clusters" << endl << endl; + for (auto const &cluster : _solution){ + for (auto const &node: cluster){ + cout << node << " : " << _averageDissimilaritiesForeign[node]<< endl; + } + } + } + + //Calculate silhouettes + map _silhouettes= map(); + for (auto const &cluster: _solution){ + for (auto const &node : cluster){ + _silhouettes[node] = + (1.0/_instance->getSize())* + ( + (_averageDissimilaritiesForeign[node] -_averageDissimilaritiesOwn[node]) + / + (std::max(_averageDissimilaritiesForeign[node],_averageDissimilaritiesOwn[node])) + ); + } + } + + //Calculate silhouette value + + double factor = 1.0/_instance->getSize(); + int sum = 0.0; + + for (auto const &cluster : _solution){ + for (auto const &node : cluster){ + sum += _silhouettes[node]; + } + } + + return factor*sum; + } +} + + diff --git a/src/measurement/SilhouetteValue.h b/src/measurement/SilhouetteValue.h new file mode 100644 index 0000000..257a1e4 --- /dev/null +++ b/src/measurement/SilhouetteValue.h @@ -0,0 +1,35 @@ +/* + * SilhouetteValue.h + * + * Created on: Jan 15, 2018 + * Author: philipp + */ + +#ifndef SRC_MEASUREMENT_SILHOUETTEVALUE_H_ +#define SRC_MEASUREMENT_SILHOUETTEVALUE_H_ + +#include "ClusterEditingInstance.h" + +#include +#include +#include + +namespace ysk{ + class SilhouetteValue { + public: + SilhouetteValue(ClusterEditingInstance* instance,std::vector>& solution): + _instance(instance), + _solution(solution) + {}; + + virtual ~SilhouetteValue(); + double getValue(); + private: + ClusterEditingInstance* _instance; + std::vector> _solution; + + }; +} + + +#endif /* SRC_MEASUREMENT_SILHOUETTEVALUE_H_ */ diff --git a/src/output/ClusterEditingOutput.h b/src/output/ClusterEditingOutput.h index 55d05ae..770a16e 100644 --- a/src/output/ClusterEditingOutput.h +++ b/src/output/ClusterEditingOutput.h @@ -17,7 +17,7 @@ #include "ClusterEditingSolutions.h" #include "WorkingCopyInstance.h" - +//TODO: Add Console Output -> Redundant (Verbosity is sufficient?) namespace ysk {