File size: 1,277 Bytes
ffc9a51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from pathlib import Path
from collections import defaultdict
# n割以上に共通してるタグをピックアップするやつ
def count_tags(directory_path):
dir_path = Path(directory_path)
tag_count = defaultdict(int)
total_files = 0
for file_path in dir_path.glob('*.txt'):
total_files += 1
with open(file_path, 'r') as f:
tags = f.read().replace(" ", "").strip().split(',')
for tag in tags:
tag_count[tag] += 1
print(tag)
return tag_count, total_files
def find_common_tags(tag_count, total_files, threshold):
common_tags = [tag for tag, count in tag_count.items() if count / total_files >= threshold]
return common_tags
if __name__ == "__main__":
directory_path = r"E:\Dataset\XXXXXXXX" # ここにディレクトリへのパスを入力してください
threshold = 0.8 # 8割以上のキャプションファイルに使われているタグのみ
tag_count, total_files = count_tags(directory_path)
print(tag_count)
print(total_files)
common_tags = find_common_tags(tag_count, total_files, threshold)
output = ", ".join(common_tags)
print(f"Common tags (used in {threshold * 100}% or more of the files): {output}")
|