LeeJE20
diff --git a/‎Data/Trends_book_system.py
+164 b/‎Data/Trends_book_system.py
+164
diff --git a/‎Data/__pycache__/cuda_model.cpython-37.pyc
1.03 KB b/‎Data/__pycache__/cuda_model.cpython-37.pyc
1.03 KB
diff --git a/‎Data/__pycache__/mysql_insert.cpython-37.pyc
5.43 KB b/‎Data/__pycache__/mysql_insert.cpython-37.pyc
5.43 KB
diff --git a/‎Data/__pycache__/qdrant_searching.cpython-37.pyc
3.03 KB b/‎Data/__pycache__/qdrant_searching.cpython-37.pyc
3.03 KB
diff --git a/‎Data/cuda_model.py
+21 b/‎Data/cuda_model.py
+21
diff --git a/‎Data/google/api.py
+16 b/‎Data/google/api.py
+16
diff --git a/‎Data/html_filter_test.py
+26 b/‎Data/html_filter_test.py
+26
diff --git a/‎Data/live_searching_api.py
+70 b/‎Data/live_searching_api.py
+70
@@ -0,0 +1,164 @@
+from qdrant_searching import QdrantSearcher
+from cuda_model import CudaModel
+from youtube.youtube_collector import YouTubeColletor
+from mysql_insert import Mysql_Manager
+
+import pandas as pd
+from itertools import combinations
+import time
+from tqdm import tqdm
+
+
+pd.set_option('display.max_columns', 10)
+pd.set_option('display.max_rows', 10)
+pd.set_option('display.width', None)
+pd.set_option('display.max_colwidth', None)
+
+num_books_to_collect = 30
+num_main_keywords = 4
+# fixed_datetime = '2024-05-14 02:00:00'
+namnams = ['2024-05-16 02:00:00']#'2024-05-13 02:00:00', '2024-05-12 02:00:00', '2024-05-11 02:00:00', '2024-05-10 02:00:00','2024-05-09 02:00:00'
+
+def make_clu_sentence(sent, k=4):
+    words = sent.split()
+
+    # 문장에 있는 단어 수가 k 이하라면 원래 문장 반환
+    if len(words) <= k:
+        return sent
+
+    clu_list = [' '.join(combo) for combo in combinations(words, k)]
+
+    return clu_list
+
+
+if __name__ == "__main__":
+    process_start_time = time.time()
+
+    now_time = time.time()
+    cuda_model = CudaModel('CPU')
+    print(f"사용할 디바이스: {cuda_model.device}")
+    print(f'CUDA 로딩 시간: {time.time() - now_time}')
+
+    now_time = time.time()
+    Qsearcher = QdrantSearcher()
+    print(f'Qdrant 로딩 시간: {time.time() - now_time}')
+
+    now_time = time.time()
+    youtubeCollector = YouTubeColletor()
+    print(f'YouTubeColletor 로딩 시간: {time.time() - now_time}')
+
+    now_time = time.time()
+    sql_manager = Mysql_Manager()
+    print(f'Mysql_Manager 로딩 시간: {time.time() - now_time}')
+
+    youtube_category_map = {25: "NEWS", 28: "IT", 15: "ANIMAL", 10:"MUSIC", 24:"ENTERTAINMENT", 0:"NEWMEDIA"}
+    keywords_id_map = {}
+
+    auto_category = [28, 24, 25, 15, 0]#28, 24, 25, 15, 0
+
+    for fixed_datetime in namnams:
+        # while True:
+        for auto_c in auto_category:
+            must_not_category = []
+            # search_mode = int(input('Enter mode(0: find only input sentence, 1: flexible search, 2: Youtube Popular Video): '))
+            search_mode = 2
+
+            if search_mode == 2:
+                # video_category = int(input('1 : Film & Animation | 10 : Music | 15 : Pets & Animals\n'
+                #                            '17 : Sports | 18 : Short Movies | 19 : Travel & Events\n'
+                #                            '20 : Gaming | 24 : Entertainment | 25 : News & Politics\n'
+                #                            '27 : Education | 28 : Science & Technology | 30 : Movies\n'
+                #                            '36 : Drama\n'
+                #                            'Enter youtube categoty:'))
+                video_category = auto_c
+                print(f'Start :: {youtube_category_map[auto_c]}')
+
+
+                # must_not_category = input('Enter to must not category').split()
+                # print(f'제외 시킬 카테고리 : {must_not_category}')
+
+                # search_sentence, video_list = youtubeCollector.get_search_keyword_by_popular_videos(video_category)
+                if video_category == 28:#IT
+                    search_sentence, video_list = youtubeCollector.get_search_IT_video(fixed_datetime[:10])
+                elif video_category == 24:#ENTERTAINMENT
+                    search_sentence, video_list = youtubeCollector.get_search_ENTERTAINMENT_video(fixed_datetime[:10])
+                elif video_category == 25:#NEWS
+                    search_sentence, video_list = youtubeCollector.get_search_NEWS_video(fixed_datetime[:10])
+                elif video_category == 15:#ANIMAL
+                    search_sentence, video_list = youtubeCollector.get_search_ANIMAL_video(fixed_datetime[:10])
+                elif video_category == 0:#NEWMEDIA
+                    search_sentence, video_list = youtubeCollector.get_search_NEWMEDIA_video(fixed_datetime[:10])
+                else:
+                    search_sentence, video_list = "", []
+
+                #sql 처리 파트
+                keywords_id_map = {}
+                #키워드 먼저 넣기
+                keywords = search_sentence.split()
+
+                keyword_rank_cnt = 0
+                for kw in keywords:
+                    keyword_id = sql_manager.insert_keyword(kw, 1 if keyword_rank_cnt < num_main_keywords else 0, youtube_category_map[video_category], fixed_datetime)
+                    keywords_id_map[kw] = keyword_id
+                    keyword_rank_cnt+=1
+
+                #원본 영상 넣기
+                for video in video_list:
+                    video_id = sql_manager.insert_origin_data(video, "YOUTUBE", youtube_category_map[video_category], fixed_datetime)
+
+                    #원본 데이터 - 키워드 Join 테이블 채우기
+                    for pkw in video['video_keywords']:
+                        if pkw in keywords_id_map:
+                            sql_manager.insert_trend_source(keywords_id_map[pkw], video_id, fixed_datetime)
+                #############
+            else:
+                search_sentence = input('Enter sentence(exit : ''): ')
+
+            now_time = time.time()
+            results = []
+
+            if search_sentence is not '':
+                print(f'Start Searching {search_sentence}')
+                if search_mode == 0:
+                    search_vector = cuda_model.model.encode(search_sentence)
+
+                    results = Qsearcher.search_items(search_vector, search_sentence)
+                elif search_mode == 1 or search_mode == 2:
+                    search_part_sentences = make_clu_sentence(search_sentence)
+
+                    tmp_results = []
+                    for search_part_sentence in tqdm(search_part_sentences, desc="Processing"):
+                        search_vector = cuda_model.model.encode(search_part_sentence)
+
+                        tmp_results.extend(Qsearcher.search_items(search_vector, search_part_sentence.split()))
+
+                        if len(tmp_results) > 200:
+                            tmp_results = sorted(tmp_results, key=lambda x: x['score'], reverse=True)
+                            tmp_results = tmp_results[:num_books_to_collect]
+
+                    tmp_results = sorted(tmp_results, key = lambda x: x['score'], reverse=True)
+
+                    existing_books = set([])
+                    tmp_idx = 0
+
+                    while len(results) < num_books_to_collect and tmp_idx < len(tmp_results):
+                        if tmp_results[tmp_idx]['book_name'] not in existing_books:
+                            existing_books.add(tmp_results[tmp_idx]['book_name'])
+                            results.append(tmp_results[tmp_idx])
+                        else:
+                            pass
+
+                        tmp_idx += 1
+            else:
+                break
+
+            print(f'[{search_sentence} :: {search_mode}] 검색 시간: {time.time() - now_time}')
+            for result in results:
+                print(result, end="\n----------------------------\n")
+                # sql 처리 파트
+                book_pk = sql_manager.insert_daily_recommend(result['book_id'], fixed_datetime)
+
+                for kw in result['searched_keywords']:
+                    sql_manager.insert_recommend_keyword(book_pk, keywords_id_map[kw], fixed_datetime)
+
+        print(f'전체 걸린 시간 : {time.time() - process_start_time}')
@@ -0,0 +1,21 @@
+
+from sentence_transformers import SentenceTransformer
+import torch
+
+class CudaModel:
+    def __init__(self, mode = 'GPU', model_name='sentence-transformers/distiluse-base-multilingual-cased-v2'):
+        self.device = None
+
+        if mode == 'CPU':
+            self.device = torch.device("cpu")
+        else:
+            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.model = self.load_model(model_name)
+        self.move_model_to_device()
+
+    def load_model(self, model_name):
+        model = SentenceTransformer(model_name)
+        return model
+
+    def move_model_to_device(self):
+        self.model.to(self.device)
@@ -0,0 +1,16 @@
+import MySQLdb
+from pytrends.request import TrendReq
+from datetime import datetime
+
+# Google Trends 객체 생성
+pytrends = TrendReq(hl='ko-KR', tz=540)
+
+# 현재 날짜 가져오기
+current_date = datetime.now().strftime('%Y-%m-%d')
+
+# 트렌드하는 검색어 가져오기
+df = pytrends.trending_searches(pn='south_korea')
+
+# 결과 출력
+print("현재 날짜:", current_date)
+print(df)
@@ -0,0 +1,26 @@
+from bs4 import BeautifulSoup
+import json
+from tqdm import tqdm
+
+def remove_html_tags(text):
+    # BeautifulSoup 객체를 생성하여 HTML 태그를 파싱합니다.
+    soup = BeautifulSoup(text, "html.parser")
+    # .get_text() 메소드를 사용하여 모든 태그를 제거하고 순수 텍스트만 추출합니다.
+    return soup.get_text()
+
+
+
+
+if __name__ == '__main__':
+    # JSON 파일 경로
+    file_path = "C:/Users/SSAFY/jupyter/S10P31S203/book_yes24.json"
+
+    with open(file_path, "r", encoding="utf-8") as json_file:
+        cnt = 0
+        for line in tqdm(json_file, desc="Processing"):
+            if cnt > 3:
+                break
+
+            clean_text = remove_html_tags(json.loads(line)['contents'])
+
+            print(clean_text)
@@ -0,0 +1,70 @@
+from qdrant_searching import QdrantSearcher
+from cuda_model import CudaModel
+from mysql_insert import Mysql_Manager
+import time
+
+
+class LiveBookSearcher:
+    def __init__(self):
+        now_time = time.time()
+        self.cuda_model = CudaModel('CPU')
+        print(f'CUDA 로딩 시간: {time.time() - now_time}')
+
+        now_time = time.time()
+        self.Qsearcher = QdrantSearcher()
+        print(f'Qdrant 로딩 시간: {time.time() - now_time}')
+
+        now_time = time.time()
+        self.sql_manager = Mysql_Manager()
+        print(f'Mysql_Manager 로딩 시간: {time.time() - now_time}')
+
+
+    #For Fast API
+    def live_keyword_searching(self, search_sentence):
+        if search_sentence is not '':
+            print(f'Start Searching {search_sentence}')
+
+            search_vector = self.cuda_model.model.encode(search_sentence)
+
+            search_results = self.Qsearcher.search_items(search_vector, search_sentence, 10)
+
+            results = []
+            for search_result in search_results:
+                results.append(search_result['book_id'])
+
+            return results
+
+        return None
+
+
+    #For Fast API
+    def memorial_book_searching(self, memorial_book):# memorial_book :: product_id
+        if memorial_book is not None:
+            # 연관 책 추천 :: 현재는 DB의 모든 책들 중에서 검색,
+            # 추후에 같은 질문을 선택한 사람들이 선택한 책중으로 바뀔 예정
+            results = self.Qsearcher.find_memorial_book(memorial_book)
+
+            '''
+            DB에 SQL로 결과를 업로드할 부분 :: 테이블 미완성으로 대기
+            self.sql_manager.{TBD}
+            '''
+
+            return results
+
+        return None
+
+    def memorial_book_searching_real_service(self, memorial_book, question_id):# memorial_book :: product_id
+        if memorial_book is not None:
+            # 연관 책 추천 :: 현재는 DB의 모든 책들 중에서 검색,
+            # 추후에 같은 질문을 선택한 사람들이 선택한 책중으로 바뀔 예정
+            results = self.Qsearcher.find_memorial_book_real_service(memorial_book, question_id)
+
+            '''
+            DB에 SQL로 결과를 업로드할 부분 :: 테이블 미완성으로 대기
+            self.sql_manager.{TBD}
+            '''
+
+            return results
+
+        return None
+