File size: 1,277 Bytes
ffc9a51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from pathlib import Path
from collections import defaultdict

# n割以上に共通してるタグをピックアップするやつ

def count_tags(directory_path):
    dir_path = Path(directory_path)
    tag_count = defaultdict(int)
    total_files = 0

    for file_path in dir_path.glob('*.txt'):
        total_files += 1
        with open(file_path, 'r') as f:
            tags = f.read().replace(" ", "").strip().split(',')
            for tag in tags:
                tag_count[tag] += 1
                print(tag)

    return tag_count, total_files

def find_common_tags(tag_count, total_files, threshold):
    common_tags = [tag for tag, count in tag_count.items() if count / total_files >= threshold]
    return common_tags

if __name__ == "__main__":
    directory_path = r"E:\Dataset\XXXXXXXX"  # ここにディレクトリへのパスを入力してください
    threshold = 0.8  # 8割以上のキャプションファイルに使われているタグのみ
    
    tag_count, total_files = count_tags(directory_path)
    print(tag_count)
    print(total_files)
    common_tags = find_common_tags(tag_count, total_files, threshold)
    output = ", ".join(common_tags)
    
    print(f"Common tags (used in {threshold * 100}% or more of the files): {output}")