18 save_to_csv: bool = True) -> None:
19 """
20 Проводит первичный анализ pd.DataFrame с возможностью вывода в консоль и сохранения в .csv файл
21
22 Args:
23 data_frame (pd.DataFrame): исходный pd.DataFrame
24 data_name (str): имя исходных данных (нужно для логирования)
25 folder_name (str): имя папки, куда сохранять .csv файл
26 logger_label (str, optional): текст заголовка логирования. Defaults to "ChEMBL_analysis".
27 print_to_console (bool, optional): нужно ли выводить информацию в консоль. Defaults to False.
28 save_to_csv (bool, optional): нужно ли сохранять информацию в .csv файл. Defaults to True.
29 """
30
31 UpdateLoggerFormat(logger_label, "fg #C48BC0")
32
33 logger.info(f"Start analysis of '{data_name}'...".ljust(77))
34
35 summary: dict = {'Column': [],
36 'Data type': [],
37 'Non-empty strings': [],
38 'Common value': [],
39 'Max value': [],
40 'Min value': []}
41
42 for column in data_frame.columns:
43
44 if print_to_console:
45 logger.info("-" * 77)
46 logger.info(f"{"Column".ljust(30)}: {column}".ljust(77))
47
48 if save_to_csv:
49 summary['Column'].append(column)
50
51
52 try:
53 data_type = data_frame[column].dtype
54
55 if print_to_console:
56 logger.info(f"{"Type of data".ljust(30)}: {
57 data_type}".ljust(77))
58
59 if save_to_csv:
60 summary['Data type'].append(data_type)
61
62 except Exception as exception:
63 if print_to_console:
64 logger.warning(
65 f"{"Data type:EXCEPTION".ljust(30)}: {exception}".ljust(77))
66
67 if save_to_csv:
68 summary['Data type'].append("")
69
70
71 non_null_count = 0
72 for value in data_frame[column]:
73 if value:
74 non_null_count += 1
75
76 if print_to_console:
77 logger.info(f"{"Non-empty strings".ljust(30)
78 }: {non_null_count}".ljust(77))
79
80 if save_to_csv:
81 summary['Non-empty strings'].append(non_null_count)
82
83
84 try:
85 mode_values = data_frame[column].mode()
86 if len(mode_values) > 0:
87 common_value = mode_values[0]
88
89 else:
90 common_value = ""
91
92 if print_to_console:
93 logger.info(f"{"Common value".ljust(30)}: {
94 common_value}".ljust(77))
95
96 if save_to_csv:
97 summary['Common value'].append(common_value)
98
99 except Exception as exception:
100 if print_to_console:
101 logger.warning(
102 f"{"Common value:EXCEPTION".ljust(30)}: {exception}".ljust(77))
103
104 if save_to_csv:
105 summary['Common value'].append("")
106
107
108 try:
109 try:
110 max_value = data_frame[column].max()
111 min_value = data_frame[column].min()
112
113 except TypeError:
114 max_value = None
115 min_value = None
116
117 for value in data_frame[column]:
118 if value is None:
119 continue
120
121 elif isinstance(value, (list, str)):
122 if max_value is None or len(value) > len(max_value):
123 max_value = value
124 if min_value is None or len(value) < len(min_value):
125 min_value = value
126
127 if print_to_console:
128 logger.info(f"{"Max value".ljust(30)}: {max_value}".ljust(77))
129 logger.info(f"{"Min value".ljust(30)}: {min_value}".ljust(77))
130
131 if save_to_csv:
132 summary['Max value'].append(max_value)
133 summary['Min value'].append(min_value)
134
135 except Exception as exception:
136 if print_to_console:
137 logger.warning(
138 f"{"Max value:EXCEPTION".ljust(30)}: {exception}".ljust(77))
139 logger.warning(
140 f"{"Min value:EXCEPTION".ljust(30)}: {exception}".ljust(77))
141
142 if save_to_csv:
143 summary['Max value'].append("")
144 summary['Min value'].append("")
145
146 if save_to_csv:
147 try:
148 logger.info(
149 "Saving primary analysis to .csv file...".ljust(77))
150
151 file_name: str = f"{folder_name}/{data_name}_analysis.csv"
152
153 pd.DataFrame(summary).to_csv(file_name, sep=';', index=False)
154
155 logger.success(
156 "Saving primary analysis to .csv file: SUCCESS".ljust(77))
157
158 except Exception as exception:
159 PrintException(exception, logger_label, "fg #C48BC0")
160
161 logger.success(f"End analysis of '{data_name}'".ljust(77))