DrugDesign Data Analysis
https://gitlab.com/UmbrellaLeaf5/drugdesign_parsing
Loading...
Searching...
No Matches
download Namespace Reference

Functions

None DownloadChEMBLCellLines (bool need_primary_analysis=False, bool download_all=False, bool download_activities=True, bool skip_downloaded_files=False, bool testing_flag=False, bool print_to_console_verbosely=False)
 
 DownloadChEMBLCompounds (bool need_primary_analysis=False, bool need_combining=True, bool delete_downloaded_after_combining=True, bool skip_downloaded_files=False, bool testing_flag=False, bool print_to_console_verbosely=False)
 
None DownloadChEMBLTargets (bool need_primary_analysis=False, bool download_all=False, bool download_activities=True, bool skip_downloaded_files=False, bool testing_flag=False, bool print_to_console_verbosely=False)
 
None DownloadTargetChEMBLActivities (pd.DataFrame targets_data, str results_folder_name="results/activities", bool download_compounds_sdf=True, bool print_to_console=False, bool skip_downloaded_activities=False)
 
None GetCellLineChEMBLActivitiesFromCSV (pd.DataFrame cell_lines_data, str raw_csv_folder_name, str results_folder_name="results/activities", bool download_compounds_sdf=True, bool print_to_console=False, bool skip_gotten_activities=False)
 

Variables

str activities_results_folder_name = "results/activities"
 
str combined_file_name = "combined_cell_lines_data_from_ChEMBL"
 
 comp = pcp.Compound.from_cid(1423)
 
str logger_label = "ChEMBL____cells"
 
str primary_analysis_folder_name = "primary_analysis"
 
str results_folder_name = "results/cell_lines"
 

Function Documentation

◆ DownloadChEMBLCellLines()

None download.DownloadChEMBLCellLines ( bool need_primary_analysis = False,
bool download_all = False,
bool download_activities = True,
bool skip_downloaded_files = False,
bool testing_flag = False,
bool print_to_console_verbosely = False )
Скачивает необходимые клеточные линии из базы ChEMBL

Args:
    need_primary_analysis (bool, optional): нужен ли первичный анализ скачанных файлов. Defaults to False.
    download_all (bool, optional): скачивать ли все цели (или использовать только те, что нужны DrugDesign). Defaults to False.
    download_activities (bool, optional): скачивать ли наборы активностей к целям (по IC50 и Ki). Defaults to True.
    skip_downloaded_files (bool, optional): пропускать ли уже скачанные файлы. Defaults to False.
    testing_flag (bool, optional): спец. флаг для тестирования функционала. Defaults to False.
    print_to_console_verbosely (bool, optional): нужен ли более подробный вывод в консоль. Defaults to False.
24 print_to_console_verbosely: bool = False) -> None:
25 """
26 Скачивает необходимые клеточные линии из базы ChEMBL
27
28 Args:
29 need_primary_analysis (bool, optional): нужен ли первичный анализ скачанных файлов. Defaults to False.
30 download_all (bool, optional): скачивать ли все цели (или использовать только те, что нужны DrugDesign). Defaults to False.
31 download_activities (bool, optional): скачивать ли наборы активностей к целям (по IC50 и Ki). Defaults to True.
32 skip_downloaded_files (bool, optional): пропускать ли уже скачанные файлы. Defaults to False.
33 testing_flag (bool, optional): спец. флаг для тестирования функционала. Defaults to False.
34 print_to_console_verbosely (bool, optional): нужен ли более подробный вывод в консоль. Defaults to False.
35 """
36
37 UpdateLoggerFormat(logger_label, "fg #CB507C")
38
39 logger.info(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")
40
41 CreateFolder("results")
42 CreateFolder(results_folder_name)
43
44 if need_primary_analysis:
45 CreateFolder(f"{results_folder_name}/{primary_analysis_folder_name}")
46
47 if download_activities:
48 CreateFolder(activities_results_folder_name)
49
50 logger.info(f"{'-' * 77}")
51
52 id_list: list[str] = ["CHEMBL4295386", "CHEMBL3307781", "CHEMBL4295453", "CHEMBL4295483",
53 "CHEMBL3308509", "CHEMBL3706569", "CHEMBL3307715", "CHEMBL3307525",
54 "CHEMBL3307970", "CHEMBL4295409", "CHEMBL3307501", "CHEMBL3307364",
55 "CHEMBL3308499", "CHEMBL3307481", "CHEMBL3308021", "CHEMBL3307755",
56 "CHEMBL3307614"]
57
58 if testing_flag:
59 id_list = ["CHEMBL4295386", "CHEMBL3307781"]
60
61 if not skip_downloaded_files or not IsFileInFolder(f"{results_folder_name}",
62 "cell_lines_data_from_ChEMBL.csv"):
63 if download_all:
64 id_list = [] # в случае пустого списка в DownloadCellLinesFromIdList скачаются все
65
66 DownloadCellLinesFromIdList(cell_line_chembl_id_list=id_list,
67 need_primary_analysis=need_primary_analysis,
68 get_activities=download_activities,
69 activities_results_folder_name=activities_results_folder_name,
70 print_to_console=(
71 print_to_console_verbosely or testing_flag),
72 skip_gotten_activities=skip_downloaded_files)
73
74 else:
75 logger.warning(
76 f"cell_lines_data_from_ChEMBL is already downloaded, skip".ljust(77))
77
78 logger.success(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")

◆ DownloadChEMBLCompounds()

download.DownloadChEMBLCompounds ( bool need_primary_analysis = False,
bool need_combining = True,
bool delete_downloaded_after_combining = True,
bool skip_downloaded_files = False,
bool testing_flag = False,
bool print_to_console_verbosely = False )
Скачивает необходимые молекулы из базы ChEMBL

Args:
    need_primary_analysis (bool, optional): нужен ли первичный анализ скачанных файлов. Defaults to False.
    need_combining (bool, optional): нужно ли собирать все скачанные файлы в один. Defaults to True.
    delete_downloaded_after_combining (bool, optional): нужно ли удалять все скачанные файлы после комбинирования. Defaults to True.
    skip_downloaded_files (bool, optional): пропускать ли уже скачанные файлы. Defaults to False.
    testing_flag (bool, optional): спец. флаг для тестирования функционала. Defaults to False.
    print_to_console_verbosely (bool, optional): нужен ли более подробный вывод в консоль. Defaults to False.
23 print_to_console_verbosely: bool = False):
24 """
25 Скачивает необходимые молекулы из базы ChEMBL
26
27 Args:
28 need_primary_analysis (bool, optional): нужен ли первичный анализ скачанных файлов. Defaults to False.
29 need_combining (bool, optional): нужно ли собирать все скачанные файлы в один. Defaults to True.
30 delete_downloaded_after_combining (bool, optional): нужно ли удалять все скачанные файлы после комбинирования. Defaults to True.
31 skip_downloaded_files (bool, optional): пропускать ли уже скачанные файлы. Defaults to False.
32 testing_flag (bool, optional): спец. флаг для тестирования функционала. Defaults to False.
33 print_to_console_verbosely (bool, optional): нужен ли более подробный вывод в консоль. Defaults to False.
34 """
35
36 if delete_downloaded_after_combining and not need_combining:
37 raise ValueError(
38 "DownloadChEMBLCompounds: delete_downloaded_after_combining=True but need_combine=False")
39
40 if skip_downloaded_files and need_primary_analysis:
41 raise ValueError(
42 "DownloadChEMBLCompounds: skip_downloaded_files=True, nothing to analyse")
43
44 UpdateLoggerFormat(logger_label, "fg #BBDD7C")
45
46 logger.info(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")
47
48 CreateFolder("results")
49
50 CreateFolder(results_folder_name)
51
52 if need_primary_analysis:
53 CreateFolder(f"{results_folder_name}/{primary_analysis_folder_name}")
54
55 logger.info(f"{'-' * 77}")
56
57 mw_ranges: list[tuple[int, int]] = [
58 (000, 190), (190, 215), (215, 230), (230, 240),
59 (240, 250), (250, 260), (260, 267), (267, 273),
60 (273, 280), (280, 285), (285, 290), (290, 295),
61 (295, 299), (299, 303), (303, 307), (307, 311),
62 (311, 315), (315, 319), (319, 323), (323, 327),
63 (327, 330), (330, 334), (334, 337), (337, 340),
64 (340, 343), (343, 346), (346, 349), (349, 352),
65 (352, 355), (355, 359), (359, 363), (363, 367),
66 (367, 371), (371, 375), (375, 379), (379, 383),
67 (383, 387), (387, 391), (391, 395), (395, 399),
68 (399, 403), (403, 407), (407, 411), (411, 415),
69 (415, 419), (419, 423), (423, 427), (427, 431),
70 (431, 435), (435, 439), (439, 443), (443, 447),
71 (447, 451), (451, 456), (456, 461), (461, 466),
72 (466, 471), (471, 476), (476, 481), (481, 487),
73 (487, 493), (493, 499), (499, 506), (506, 514),
74 (514, 522), (522, 531), (531, 541), (541, 552),
75 (552, 565), (565, 579), (579, 596), (596, 617),
76 (617, 648), (648, 693), (693, 758), (758, 868),
77 (868, 1101), (1101, 1200), (1200, 12_546_42)]
78
79 if testing_flag:
80 mw_ranges = [(0, 50), (50, 75)]
81
82 for less_limit, greater_limit in mw_ranges:
83 if not skip_downloaded_files or not IsFileInFolder(f"{results_folder_name}",
84 f"range_{less_limit}_{greater_limit}_mw_mols.csv"):
85 DownloadCompoundsByMWRange(
86 less_limit, greater_limit, need_primary_analysis=need_primary_analysis,
87 print_to_console=(print_to_console_verbosely or testing_flag))
88
89 else:
90 logger.warning(f"Molecules with mw in range [{less_limit}, {
91 greater_limit}) is already downloaded, skip".ljust(77))
92
93 logger.info(f"{'-' * 77}")
94
95 if need_combining:
96 CombineCSVInFolder(results_folder_name,
97 combined_file_name,
98 skip_downloaded_files=skip_downloaded_files,
99 print_to_console=(print_to_console_verbosely or testing_flag))
100
101 UpdateLoggerFormat(logger_label, "fg #BBDD7C")
102
103 if delete_downloaded_after_combining:
104 logger.info(f"Deleting files after combining in '{
105 results_folder_name}'...".ljust(77))
106
107 try:
108 DeleteFilesInFolder(results_folder_name,
109 f"{combined_file_name}.csv")
110 logger.success(
111 f"Deleting files after combining in '{results_folder_name}'".ljust(77))
112
113 except Exception as exception:
114 PrintException(exception, logger_label, "fg #BBDD7C")
115
116 logger.success(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")

◆ DownloadChEMBLTargets()

None download.DownloadChEMBLTargets ( bool need_primary_analysis = False,
bool download_all = False,
bool download_activities = True,
bool skip_downloaded_files = False,
bool testing_flag = False,
bool print_to_console_verbosely = False )
Скачивает необходимые цели из базы ChEMBL

Args:
    need_primary_analysis (bool, optional): нужен ли первичный анализ скачанных файлов. Defaults to False.
    download_all (bool, optional): скачивать ли все цели (или использовать только те, что нужны DrugDesign). Defaults to False.
    download_activities (bool, optional): скачивать ли наборы активностей к целям (по IC50 и Ki). Defaults to True.
    skip_downloaded_files (bool, optional): пропускать ли уже скачанные файлы. Defaults to False.
    testing_flag (bool, optional): спец. флаг для тестирования функционала. Defaults to False.
    print_to_console_verbosely (bool, optional): нужен ли более подробный вывод в консоль. Defaults to False.
24 print_to_console_verbosely: bool = False) -> None:
25 """
26 Скачивает необходимые цели из базы ChEMBL
27
28 Args:
29 need_primary_analysis (bool, optional): нужен ли первичный анализ скачанных файлов. Defaults to False.
30 download_all (bool, optional): скачивать ли все цели (или использовать только те, что нужны DrugDesign). Defaults to False.
31 download_activities (bool, optional): скачивать ли наборы активностей к целям (по IC50 и Ki). Defaults to True.
32 skip_downloaded_files (bool, optional): пропускать ли уже скачанные файлы. Defaults to False.
33 testing_flag (bool, optional): спец. флаг для тестирования функционала. Defaults to False.
34 print_to_console_verbosely (bool, optional): нужен ли более подробный вывод в консоль. Defaults to False.
35 """
36
37 UpdateLoggerFormat(logger_label, "fg #CBDD7C")
38
39 logger.info(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")
40
41 CreateFolder("results")
42 CreateFolder(results_folder_name)
43
44 if need_primary_analysis:
45 CreateFolder(f"{results_folder_name}/{primary_analysis_folder_name}")
46
47 if download_activities:
48 CreateFolder(activities_results_folder_name)
49
50 logger.info(f"{'-' * 77}")
51
52 id_list: list[str] = ["CHEMBL220", "CHEMBL251", "CHEMBL229", "CHEMBL1867",
53 "CHEMBL213", "CHEMBL210", "CHEMBL1871", "CHEMBL216",
54 "CHEMBL211", "CHEMBL245", "CHEMBL218", "CHEMBL253",
55 "CHEMBL2056", "CHEMBL217", "CHEMBL252", "CHEMBL231",
56 "CHEMBL214", "CHEMBL1898", "CHEMBL224", "CHEMBL1833",
57 "CHEMBL240", "CHEMBL258", "CHEMBL1951", "CHEMBL4777",
58 "CHEMBL2034", "CHEMBL236", "CHEMBL233", "CHEMBL222", "CHEMBL228"]
59
60 if testing_flag:
61 id_list = ["CHEMBL1951", "CHEMBL2034"]
62
63 if not skip_downloaded_files or not IsFileInFolder(f"{results_folder_name}",
64 "targets_data_from_ChEMBL.csv"):
65 if download_all:
66 id_list = [] # в случае пустого списка в DownloadTargetsFromIdList скачаются все
67
68 DownloadTargetsFromIdList(target_chembl_id_list=id_list,
69 need_primary_analysis=need_primary_analysis,
70 download_activities=download_activities,
71 activities_results_folder_name=activities_results_folder_name,
72 print_to_console=(
73 print_to_console_verbosely or testing_flag),
74 skip_downloaded_activities=skip_downloaded_files)
75
76 else:
77 logger.warning(
78 f"targets_data_from_ChEMBL is already downloaded, skip".ljust(77))
79
80 logger.success(f"{'-' * 21} ChEMBL downloading for DrugDesign {'-' * 21}")

◆ DownloadTargetChEMBLActivities()

None download.DownloadTargetChEMBLActivities ( pd.DataFrame targets_data,
str results_folder_name = "results/activities",
bool download_compounds_sdf = True,
bool print_to_console = False,
bool skip_downloaded_activities = False )
Скачивает необходимые activities из базы ChEMBL по данным по целям.

Args:
    targets_data (pd.DataFrame): данные по целям.
    results_folder_name (str, optional): имя папки для закачки. Defaults to "results/activities".
    download_compounds_sdf (bool, optional): нужно ли скачивать .sdf файл с molfile для каждой молекулы. Defaults to True.
    print_to_console (bool, optional): нужно ли выводить логирование в консоль. Defaults to False.
    skip_downloaded_activities (bool, optional): пропускать ли уже скачанные файлы activities. Defaults to False.
19 skip_downloaded_activities: bool = False) -> None:
20 """
21 Скачивает необходимые activities из базы ChEMBL по данным по целям.
22
23 Args:
24 targets_data (pd.DataFrame): данные по целям.
25 results_folder_name (str, optional): имя папки для закачки. Defaults to "results/activities".
26 download_compounds_sdf (bool, optional): нужно ли скачивать .sdf файл с molfile для каждой молекулы. Defaults to True.
27 print_to_console (bool, optional): нужно ли выводить логирование в консоль. Defaults to False.
28 skip_downloaded_activities (bool, optional): пропускать ли уже скачанные файлы activities. Defaults to False.
29 """
30
31 UpdateLoggerFormat("ChEMBL__IC50&Ki", "fg #61B78C")
32
33 logger.info(
34 f"Start download activities connected with targets...".ljust(77))
35
36 logger.info(f"{'-' * 77}")
37
38 for target_id in targets_data['target_chembl_id']:
39 file_name_ic50: str = f"{target_id}_IC50_activities"
40 file_name_ki: str = f"{target_id}_Ki_activities"
41
42 if skip_downloaded_activities \
43 and IsFileInFolder(results_folder_name, f"{file_name_ic50}.csv") \
44 and IsFileInFolder(results_folder_name, f"{file_name_ki}.csv"):
45 logger.warning(f"Activities connected with target {
46 target_id} is already downloaded, skip".ljust(77))
47 logger.info(f"{'-' * 77}")
48
49 continue
50
51 if print_to_console:
52 logger.info(f"Downloading activities connected with {
53 target_id}...".ljust(77))
54
55 activities_ic50: QuerySet = QuerySetActivitiesByIC50(target_id)
56 activities_ki: QuerySet = QuerySetActivitiesByKi(target_id)
57
58 if print_to_console:
59 logger.info(f"Amount: IC50: {CountTargetActivitiesByIC50(target_id)}; Ki: {
60 CountTargetActivitiesByKi(target_id)}".ljust(77))
61
62 logger.success(f"Downloading activities connected with {
63 target_id}: SUCCESS".ljust(77))
64
65 logger.info(
66 "Collecting activities to pandas.DataFrame()...".ljust(77))
67
68 try:
69 data_frame_ic50 = CleanedTargetActivitiesDF(pd.DataFrame(
70 activities_ic50), target_id=target_id, activities_type="IC50",
71 print_to_console=print_to_console)
72
73 data_frame_ki = CleanedTargetActivitiesDF(pd.DataFrame(
74 activities_ki), target_id=target_id, activities_type="Ki",
75 print_to_console=print_to_console)
76
77 if print_to_console:
78 logger.success(
79 "Collecting activities to pandas.DataFrame(): SUCCESS".ljust(77))
80
81 logger.info(
82 "Recording new values 'IC50', 'Ki' in targets DataFrame...".ljust(77))
83
84 targets_data.loc[targets_data["target_chembl_id"]
85 == target_id, "IC50_new"] = len(data_frame_ic50)
86
87 targets_data.loc[targets_data["target_chembl_id"]
88 == target_id, "Ki_new"] = len(data_frame_ki)
89
90 if print_to_console:
91 logger.info(f"Amount: IC50: {len(data_frame_ic50)}; Ki: {
92 len(data_frame_ki)}".ljust(77))
93
94 logger.success(
95 "Recording new values 'IC50', 'Ki' in targets DataFrame: SUCCESS".ljust(77))
96
97 logger.info(
98 f"Collecting activities to .csv file in '{results_folder_name}'...".ljust(77))
99
100 # if need_primary_analysis:
101 # DataAnalysisByColumns(data_frame,
102 # f"targets_data_from_ChEMBL",
103 # f"{results_folder_name}/{primary_analysis_folder_name}")
104
105 full_file_name_ic50: str = f"{
106 results_folder_name}/{file_name_ic50}.csv"
107 full_file_name_ki: str = f"{
108 results_folder_name}/{file_name_ki}.csv"
109
110 data_frame_ic50.to_csv(full_file_name_ic50, sep=';', index=False)
111 data_frame_ki.to_csv(full_file_name_ki, sep=';', index=False)
112
113 if print_to_console:
114 logger.success(
115 f"Collecting activities to .csv file in '{results_folder_name}': SUCCESS".ljust(77))
116
117 if download_compounds_sdf:
118 if print_to_console:
119 UpdateLoggerFormat("ChEMBL_compound", "fg #CCA87A")
120
121 logger.info(
122 f"Start download molfiles connected with {target_id} to .sdf...".ljust(77))
123
124 CreateFolder("results/compounds", "compounds")
125 CreateFolder("results/compounds/molfiles", "molfiles")
126
127 if print_to_console:
128 logger.info(
129 "Saving connected with IC50 molfiles...".ljust(77))
130
131 try:
132 SaveMolfilesToSDFByIdList(
133 data_frame_ic50['molecule_chembl_id'].tolist(),
134 f"results/compounds/molfiles/{
135 file_name_ic50}_molfiles",
136 extra_data=data_frame_ic50,
137 print_to_console=print_to_console)
138
139 if print_to_console:
140 logger.success(
141 "Saving connected with IC50 molfiles".ljust(77))
142
143 except Exception as exception:
144 PrintException(exception, "ChEMBL__IC50&Ki", "fg #61B78C")
145
146 if print_to_console:
147 logger.info(
148 "Saving connected with Ki molfiles...".ljust(77))
149
150 try:
151 SaveMolfilesToSDFByIdList(
152 data_frame_ki['molecule_chembl_id'].tolist(),
153 f"results/compounds/molfiles/{file_name_ki}_molfiles",
154 extra_data=data_frame_ki,
155 print_to_console=print_to_console)
156
157 if print_to_console:
158 logger.success(
159 "Saving connected with Ki molfiles".ljust(77))
160
161 logger.success(
162 f"End download molfiles connected with {target_id} to .sdf".ljust(77))
163
164 except Exception as exception:
165 PrintException(exception, "ChEMBL__IC50&Ki", "fg #61B78C")
166
167 UpdateLoggerFormat("ChEMBL__IC50&Ki", "fg #61B78C")
168
169 if print_to_console:
170 logger.info(f"{'-' * 77}")
171
172 except Exception as exception:
173 PrintException(exception, "ChEMBL__IC50&Ki", "fg #61B78C")
174
175 logger.success(
176 f"End download activities connected with targets: SUCCESS".ljust(77))
177
178
179@IgnoreWarnings

◆ GetCellLineChEMBLActivitiesFromCSV()

None download.GetCellLineChEMBLActivitiesFromCSV ( pd.DataFrame cell_lines_data,
str raw_csv_folder_name,
str results_folder_name = "results/activities",
bool download_compounds_sdf = True,
bool print_to_console = False,
bool skip_gotten_activities = False )
Получает необходимые данные об activities из базы ChEMBL по данным по клеточным линиям.

Args:
    cell_lines_data (pd.DataFrame): данные по клеточным линиям.
    raw_csv_folder_name (str): путь к .csv файлам с activities
    results_folder_name (str, optional): имя папки для закачки. Defaults to "results/activities".
    download_compounds_sdf (bool, optional): нужно ли скачивать .sdf файл с molfile для каждой молекулы. Defaults to True.
    print_to_console (bool, optional): нужно ли выводить логирование в консоль. Defaults to False.
    skip_gotten_activities (bool, optional): пропускать ли уже полученные файлы activities. Defaults to False.
185 skip_gotten_activities: bool = False) -> None:
186 """
187 Получает необходимые данные об activities из базы ChEMBL по данным по клеточным линиям.
188
189 Args:
190 cell_lines_data (pd.DataFrame): данные по клеточным линиям.
191 raw_csv_folder_name (str): путь к .csv файлам с activities
192 results_folder_name (str, optional): имя папки для закачки. Defaults to "results/activities".
193 download_compounds_sdf (bool, optional): нужно ли скачивать .sdf файл с molfile для каждой молекулы. Defaults to True.
194 print_to_console (bool, optional): нужно ли выводить логирование в консоль. Defaults to False.
195 skip_gotten_activities (bool, optional): пропускать ли уже полученные файлы activities. Defaults to False.
196 """
197
198 UpdateLoggerFormat("ChEMBL__IC&GI50", "fg #6785C6")
199
200 logger.info(
201 f"Start getting activities connected with cell lines...".ljust(77))
202
203 logger.info(f"{'-' * 77}")
204
205 for cell_id in cell_lines_data['cell_chembl_id']:
206 file_name_ic50: str = f"{cell_id}_IC50_activities"
207 file_name_gi50: str = f"{cell_id}_GI50_activities"
208
209 if skip_gotten_activities \
210 and IsFileInFolder(results_folder_name, f"{file_name_ic50}.csv") \
211 and IsFileInFolder(results_folder_name, f"{file_name_gi50}.csv"):
212 logger.warning(f"Activities connected with target {
213 cell_id} is already gotten, skip".ljust(77))
214 logger.info(f"{'-' * 77}")
215
216 continue
217
218 if print_to_console:
219 logger.info(f"Getting activities connected with {
220 cell_id}...".ljust(77))
221
222 data_frame_ic50 = pd.read_csv(
223 f"{raw_csv_folder_name}/{file_name_ic50}.csv", sep=';', low_memory=False)
224 data_frame_gi50 = pd.read_csv(
225 f"{raw_csv_folder_name}/{file_name_gi50}.csv", sep=';', low_memory=False)
226
227 if print_to_console:
228 logger.info(f"Amount: IC50: {len(data_frame_ic50)}; GI50: {
229 len(data_frame_gi50)}".ljust(77))
230
231 logger.success(f"Getting activities connected with {
232 cell_id}: SUCCESS".ljust(77))
233
234 logger.info(
235 "Cleaning activities...".ljust(77))
236
237 try:
238 data_frame_ic50 = CleanedCellLineActivitiesDF(data_frame_ic50,
239 cell_id=cell_id,
240 activities_type="IC50",
241 print_to_console=print_to_console)
242
243 data_frame_gi50 = CleanedCellLineActivitiesDF(data_frame_gi50,
244 cell_id=cell_id,
245 activities_type="GI50",
246 print_to_console=print_to_console)
247 if print_to_console:
248 logger.success(
249 "Collecting activities to pandas.DataFrame(): SUCCESS".ljust(77))
250
251 logger.info(
252 "Recording new values 'IC50', 'GI50' in targets DataFrame...".ljust(77))
253
254 cell_lines_data.loc[cell_lines_data["cell_chembl_id"]
255 == cell_id, "IC50_new"] = len(data_frame_ic50)
256
257 cell_lines_data.loc[cell_lines_data["cell_chembl_id"]
258 == cell_id, "GI50_new"] = len(data_frame_gi50)
259
260 if print_to_console:
261 logger.info(f"Amount: IC50: {len(data_frame_ic50)}; GI50: {
262 len(data_frame_gi50)}".ljust(77))
263
264 logger.success(
265 "Recording new values 'IC50', 'GI50' in targets DataFrame: SUCCESS".ljust(77))
266
267 logger.info(
268 f"Collecting activities to .csv file in '{results_folder_name}'...".ljust(77))
269
270 full_file_name_ic50: str = f"{
271 results_folder_name}/{file_name_ic50}.csv"
272 full_file_name_gi50: str = f"{
273 results_folder_name}/{file_name_gi50}.csv"
274
275 data_frame_ic50.to_csv(
276 full_file_name_ic50, sep=';', index=False)
277 data_frame_gi50.to_csv(
278 full_file_name_gi50, sep=';', index=False)
279
280 if print_to_console:
281 logger.success(
282 f"Collecting activities to .csv file in '{results_folder_name}': SUCCESS".ljust(77))
283
284 if download_compounds_sdf:
285 if print_to_console:
286 UpdateLoggerFormat("ChEMBL_compound", "fg #CCA87A")
287
288 logger.info(
289 f"Start download molfiles connected with {cell_id} to .sdf...".ljust(77))
290
291 CreateFolder("results/compounds", "compounds")
292 CreateFolder("results/compounds/molfiles", "molfiles")
293
294 if print_to_console:
295 logger.info(
296 "Saving connected with IC50 molfiles...".ljust(77))
297
298 try:
299 SaveMolfilesToSDFByIdList(
300 data_frame_ic50['molecule_chembl_id'].tolist(),
301 f"results/compounds/molfiles/{
302 file_name_ic50}_molfiles",
303 extra_data=data_frame_ic50,
304 print_to_console=print_to_console)
305
306 if print_to_console:
307 logger.success(
308 "Saving connected with IC50 molfiles".ljust(77))
309
310 except Exception as exception:
311 PrintException(
312 exception, "ChEMBL__IC&GI50", "fg #6785C6")
313
314 if print_to_console:
315 logger.info(
316 "Saving connected with GI50 molfiles...".ljust(77))
317
318 try:
319 SaveMolfilesToSDFByIdList(
320 data_frame_gi50['molecule_chembl_id'].tolist(),
321 f"results/compounds/molfiles/{
322 file_name_gi50}_molfiles",
323 extra_data=data_frame_gi50,
324 print_to_console=print_to_console)
325
326 if print_to_console:
327 logger.success(
328 "Saving connected with GI50 molfiles".ljust(77))
329
330 logger.success(
331 f"End download molfiles connected with {cell_id} to .sdf".ljust(77))
332
333 except Exception as exception:
334 PrintException(
335 exception, "ChEMBL__IC&GI50", "fg #6785C6")
336
337 UpdateLoggerFormat("ChEMBL__IC&GI50", "fg #6785C6")
338
339 if print_to_console:
340 logger.info(f"{'-' * 77}")
341
342 except Exception as exception:
343 PrintException(exception, "ChEMBL__IC&GI50", "fg #6785C6")
344
345 logger.success(
346 f"End download activities connected with cell lines: SUCCESS".ljust(77))

Variable Documentation

◆ activities_results_folder_name

str download.activities_results_folder_name = "results/activities"

◆ combined_file_name

str download.combined_file_name = "combined_cell_lines_data_from_ChEMBL"

◆ comp

download.comp = pcp.Compound.from_cid(1423)

◆ logger_label

str download.logger_label = "ChEMBL____cells"

◆ primary_analysis_folder_name

str download.primary_analysis_folder_name = "primary_analysis"

◆ results_folder_name

str download.results_folder_name = "results/cell_lines"