23 print_to_console_verbosely: bool = False):
24 """
25 Скачивает необходимые молекулы из базы ChEMBL
26
27 Args:
28 need_primary_analysis (bool, optional): нужен ли первичный анализ скачанных файлов. Defaults to False.
29 need_combining (bool, optional): нужно ли собирать все скачанные файлы в один. Defaults to True.
30 delete_downloaded_after_combining (bool, optional): нужно ли удалять все скачанные файлы после комбинирования. Defaults to True.
31 skip_downloaded_files (bool, optional): пропускать ли уже скачанные файлы. Defaults to False.
32 testing_flag (bool, optional): спец. флаг для тестирования функционала. Defaults to False.
33 print_to_console_verbosely (bool, optional): нужен ли более подробный вывод в консоль. Defaults to False.
34 """
35
36 if delete_downloaded_after_combining and not need_combining:
37 raise ValueError(
38 "DownloadChEMBLCompounds: delete_downloaded_after_combining=True but need_combine=False")
39
40 if skip_downloaded_files and need_primary_analysis:
41 raise ValueError(
42 "DownloadChEMBLCompounds: skip_downloaded_files=True, nothing to analyse")
43
44 UpdateLoggerFormat(logger_label, "fg #BBDD7C")
45
46 logger.info(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")
47
48 CreateFolder("results")
49
50 CreateFolder(results_folder_name)
51
52 if need_primary_analysis:
53 CreateFolder(f"{results_folder_name}/{primary_analysis_folder_name}")
54
55 logger.info(f"{'-' * 77}")
56
57 mw_ranges: list[tuple[int, int]] = [
58 (000, 190), (190, 215), (215, 230), (230, 240),
59 (240, 250), (250, 260), (260, 267), (267, 273),
60 (273, 280), (280, 285), (285, 290), (290, 295),
61 (295, 299), (299, 303), (303, 307), (307, 311),
62 (311, 315), (315, 319), (319, 323), (323, 327),
63 (327, 330), (330, 334), (334, 337), (337, 340),
64 (340, 343), (343, 346), (346, 349), (349, 352),
65 (352, 355), (355, 359), (359, 363), (363, 367),
66 (367, 371), (371, 375), (375, 379), (379, 383),
67 (383, 387), (387, 391), (391, 395), (395, 399),
68 (399, 403), (403, 407), (407, 411), (411, 415),
69 (415, 419), (419, 423), (423, 427), (427, 431),
70 (431, 435), (435, 439), (439, 443), (443, 447),
71 (447, 451), (451, 456), (456, 461), (461, 466),
72 (466, 471), (471, 476), (476, 481), (481, 487),
73 (487, 493), (493, 499), (499, 506), (506, 514),
74 (514, 522), (522, 531), (531, 541), (541, 552),
75 (552, 565), (565, 579), (579, 596), (596, 617),
76 (617, 648), (648, 693), (693, 758), (758, 868),
77 (868, 1101), (1101, 1200), (1200, 12_546_42)]
78
79 if testing_flag:
80 mw_ranges = [(0, 50), (50, 75)]
81
82 for less_limit, greater_limit in mw_ranges:
83 if not skip_downloaded_files or not IsFileInFolder(f"{results_folder_name}",
84 f"range_{less_limit}_{greater_limit}_mw_mols.csv"):
85 DownloadCompoundsByMWRange(
86 less_limit, greater_limit, need_primary_analysis=need_primary_analysis,
87 print_to_console=(print_to_console_verbosely or testing_flag))
88
89 else:
90 logger.warning(f"Molecules with mw in range [{less_limit}, {
91 greater_limit}) is already downloaded, skip".ljust(77))
92
93 logger.info(f"{'-' * 77}")
94
95 if need_combining:
96 CombineCSVInFolder(results_folder_name,
97 combined_file_name,
98 skip_downloaded_files=skip_downloaded_files,
99 print_to_console=(print_to_console_verbosely or testing_flag))
100
101 UpdateLoggerFormat(logger_label, "fg #BBDD7C")
102
103 if delete_downloaded_after_combining:
104 logger.info(f"Deleting files after combining in '{
105 results_folder_name}'...".ljust(77))
106
107 try:
108 DeleteFilesInFolder(results_folder_name,
109 f"{combined_file_name}.csv")
110 logger.success(
111 f"Deleting files after combining in '{results_folder_name}'".ljust(77))
112
113 except Exception as exception:
114 PrintException(exception, logger_label, "fg #BBDD7C")
115
116 logger.success(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")