219):
220 """
221 Сохраняет molfiles из pd.DataFrame в .sdf файл.
222
223 Args:
224 data (pd.DataFrame): DataFrame с колонками molfile и id.
225 file_name (str): имя файла (без ".sdf").
226 molecule_id_column_name (str): имя колонки с id соединения.
227 extra_data (pd.DataFrame, optional): дополнительная информация.
228 Defaults to pd.DataFrame().
229 indexing_lists (bool, optional): нужно ли индексировать списки.
230 Defaults to False.
231 """
232
233 def WriteColumnAndValueToSDF(file: TextIOWrapper, value: Any, column: str = ""):
234 """
235 Записывает столбец и значение в .sdf файл.
236
237 Args:
238 file (TextIOWrapper): открытый файл для записи.
239 value (Any): значение, которое нужно записать.
240 column (str, optional): имя столбца. Defaults to "".
241 """
242
243
244 if not column:
245 return
246
247
248 if isinstance(value, list) or isinstance(value, pd.Series):
249 file.write(f"> <{column}>\n")
250
251 i: int = 0
252
253 for elem in value:
254
255 if isinstance(elem, dict):
256
257 WriteColumnAndValueToSDF(file, elem)
258
259
260 else:
261
262 elem = str(elem)
263
264
265 if elem not in {"nan", "None", ""}:
266
267 if indexing_lists:
268 file.write(f"{i}: {elem}\n")
269
270 else:
271 file.write(f"{elem}\n")
272 i += 1
273
274
275 elif isinstance(value, dict):
276 file.write(f"> <{column}>\n")
277
278
279 for key, elem in value.items():
280
281 elem = str(elem)
282
283
284 if elem not in {"nan", "None", ""}:
285 file.write(f"{key}: {elem}\n")
286
287
288 else:
289
290 value = str(value)
291
292
293 if value not in {"nan", "None", ""}:
294 file.write(f"> <{column}>\n")
295
296 file.write(f"{value}\n")
297
298
299 file.write("\n")
300
301
302 with open(f"{file_name}.sdf", "w", encoding="utf-8") as f:
303
304 for value in data.to_numpy():
305
306 molecule_id, molfile = value
307
308
309 f.write(f"{molecule_id}{molfile}\n\n")
310
311
312 if not extra_data.empty:
313
314 df = extra_data.set_index(f"{molecule_id_column_name}")
315
316
317 for column in df.columns:
318
319 WriteColumnAndValueToSDF(f, df.loc[molecule_id, column], column)
320
321
322 f.write("$$$$\n")
323
324 v_logger.info(f"Writing {molecule_id} data to .sdf file...", LogMode.VERBOSELY)
325
326
327
328 with open(f"{file_name}.sdf", encoding="utf-8") as f:
329 sdf_content = f.read()
330
331
332 max_n_amounts = 0
333 while "\n" * max_n_amounts in sdf_content:
334 max_n_amounts += 1
335
336
337
338 for amount in range(max_n_amounts, 2, -1):
339 sdf_content = sdf_content.replace("\n" * amount, "\n\n")
340
341
342 sdf_content = sdf_content.replace("$$$$\n\n", "$$$$\n")
343
344
345 with open(f"{file_name}.sdf", "w", encoding="utf-8") as f:
346 f.write(sdf_content)