diff --git a/gui/simple.ui b/gui/simple.ui index a7bdf13..746942e 100644 --- a/gui/simple.ui +++ b/gui/simple.ui @@ -6,8 +6,8 @@ 0 0 - 868 - 630 + 870 + 667 @@ -120,7 +120,7 @@ 1000 - 15 + 17 @@ -199,8 +199,7 @@ - 请注意: -依据不同状况查重过程可能需要耗费数秒至数分钟, 请耐心等待 + <html><head/><body><p><span style=" font-size:11pt; color:#ff0000;">注意:</span><span style=" font-size:11pt; color:#000000;">依据不同状况查重过程可能需要耗费数秒至数分钟,请耐心等待</span></p></body></html> @@ -243,7 +242,7 @@ 0.100000000000000 - 95.000000000000000 + 98.500000000000000 @@ -362,7 +361,7 @@ - 注意:初次使用时,请添加至少一个索引图库路径,并更新索引记录 + <html><head/><body><p><span style=" font-size:12pt; color:#ff0000;">注意:</span><span style=" font-size:12pt;">初次使用时,请添加至少一个索引图库路径,并更新索引记录</span></p></body></html> true diff --git a/utils.py b/utils.py index 06a90f1..40b9578 100644 --- a/utils.py +++ b/utils.py @@ -63,18 +63,25 @@ def checkout(image_path, exists_index, match_n=5): return [(sim[i], exists_index[ids[i]]) for i in range(len(ids))] -def get_duplicate(exists_index, threshold, match_n=30): - ret_ids = [] +def get_duplicate(exists_index, threshold): + matched = set() for idx in tqdm(range(len(exists_index)), ascii=True): - if idx in ret_ids: - continue + match_n = 5 try: fv = ir_engine.hnsw_index.get_items([idx])[0] except RuntimeError: continue sim, ids = ir_engine.match(fv, match_n) + while sim[-1] > threshold: + match_n = round(match_n*1.5) + sim, ids = ir_engine.match(fv, match_n) for i in range(len(ids)): - if (sim[i] > threshold) and (ids[i] != idx) and (not idx in ret_ids): - ret_ids.append(ids[i]) - ret_ids.append(idx) - yield (exists_index[idx], exists_index[ids[i]], sim[i]) + if ids[i] == idx: + continue + if sim[i] < threshold: + continue + if ids[i] in matched: + continue + if not idx in matched: + matched.add(idx) + yield (exists_index[idx], exists_index[ids[i]], sim[i])