diff --git a/gui/simple.ui b/gui/simple.ui
index a7bdf13..746942e 100644
--- a/gui/simple.ui
+++ b/gui/simple.ui
@@ -6,8 +6,8 @@
0
0
- 868
- 630
+ 870
+ 667
@@ -120,7 +120,7 @@
1000
- 15
+ 17
@@ -199,8 +199,7 @@
- 请注意:
-依据不同状况查重过程可能需要耗费数秒至数分钟, 请耐心等待
+ <html><head/><body><p><span style=" font-size:11pt; color:#ff0000;">注意:</span><span style=" font-size:11pt; color:#000000;">依据不同状况查重过程可能需要耗费数秒至数分钟,请耐心等待</span></p></body></html>
@@ -243,7 +242,7 @@
0.100000000000000
- 95.000000000000000
+ 98.500000000000000
@@ -362,7 +361,7 @@
- 注意:初次使用时,请添加至少一个索引图库路径,并更新索引记录
+ <html><head/><body><p><span style=" font-size:12pt; color:#ff0000;">注意:</span><span style=" font-size:12pt;">初次使用时,请添加至少一个索引图库路径,并更新索引记录</span></p></body></html>
true
diff --git a/utils.py b/utils.py
index 06a90f1..40b9578 100644
--- a/utils.py
+++ b/utils.py
@@ -63,18 +63,25 @@ def checkout(image_path, exists_index, match_n=5):
return [(sim[i], exists_index[ids[i]]) for i in range(len(ids))]
-def get_duplicate(exists_index, threshold, match_n=30):
- ret_ids = []
+def get_duplicate(exists_index, threshold):
+ matched = set()
for idx in tqdm(range(len(exists_index)), ascii=True):
- if idx in ret_ids:
- continue
+ match_n = 5
try:
fv = ir_engine.hnsw_index.get_items([idx])[0]
except RuntimeError:
continue
sim, ids = ir_engine.match(fv, match_n)
+ while sim[-1] > threshold:
+ match_n = round(match_n*1.5)
+ sim, ids = ir_engine.match(fv, match_n)
for i in range(len(ids)):
- if (sim[i] > threshold) and (ids[i] != idx) and (not idx in ret_ids):
- ret_ids.append(ids[i])
- ret_ids.append(idx)
- yield (exists_index[idx], exists_index[ids[i]], sim[i])
+ if ids[i] == idx:
+ continue
+ if sim[i] < threshold:
+ continue
+ if ids[i] in matched:
+ continue
+ if not idx in matched:
+ matched.add(idx)
+ yield (exists_index[idx], exists_index[ids[i]], sim[i])