22import gzip
33import shutil
44from functools import partial
5- from statistics import mean
65
76import pandas as pd
87import pyarrow as pa
@@ -490,15 +489,15 @@ def fill_result_list(vyusteni_element_list, result_lists, required_list, categor
490489
491490
492491# Vybere z každého vyústění hodnotu, která je považována za nejhorší
493- def select_worst (result_lists , strategy_dict ):
492+ def select_worst (result_lists , strategy_dict , already_parsed ):
494493 result = {}
495494 for name , result_list in result_lists .items ():
496495 try :
497496 if not result_list :
498497 result [name ] = None
499498 continue
500499 if 'Min' in name or 'Max' in name or 'Vysledek' in name :
501- result [name ] = next ((result for result in result_list if result is not None ), None )
500+ result [name ] = next ((val for val in result_list if val is not None ), None )
502501 continue
503502 strategy = strategy_dict [name .split ('_' )[- 2 ]]
504503 floats = floats_sublist (result_list )
@@ -508,20 +507,25 @@ def select_worst(result_lists, strategy_dict):
508507 float_result = max (floats , default = None )
509508 case 'min' :
510509 float_result = min (floats , default = None )
511- case 'mean' :
512- float_result = mean (floats ) # Vyvolá výjimku v případě prázdného seznamu
513510 case 'max_diff_1' :
514511 float_result = max (floats , default = None , key = lambda x : abs (x - 1.0 ))
515512 case 'bounds' :
516- name_stem = name .partition ("_Hodnota" )[0 ]
517513 try :
518- min_value = float (result [f'{ name_stem } _Min_Hodnota' ])
519- max_value = float (result [f'{ name_stem } _Max_Hodnota' ])
514+ if name .startswith ('Nafta' ):
515+ # Pro naftu jsou limity v samostatném bloku MereniVznetLimit
516+ param = name .split ('_' )[- 2 ]
517+ min_value = float (already_parsed [f'Nafta_MereniVznetLimit_{ param } _Min_Hodnota' ])
518+ max_value = float (already_parsed [f'Nafta_MereniVznetLimit_{ param } _Max_Hodnota' ])
519+ else :
520+ # Pro benzín/plyn jsou limity součástí aktuálního záznamu
521+ name_stem = name .partition ("_Hodnota" )[0 ]
522+ min_value = float (result [f'{ name_stem } _Min_Hodnota' ])
523+ max_value = float (result [f'{ name_stem } _Max_Hodnota' ])
524+
520525 optimal_value = (max_value + min_value ) / 2
521526 float_result = max (floats , default = None , key = lambda x : abs (x - optimal_value ))
522- # Pokud by některá z krajních hodnot chyběla vezmu první záznam o otáčkách
523527 except Exception :
524- float_result = next ((float for float in floats if float is not None ), None )
528+ float_result = next ((f for f in floats if f is not None ), None )
525529 # Cast na string, aby bylo zachováno načtení všech hodnot jako string
526530 if float_result is not None :
527531 result [name ] = str (float_result )
@@ -543,8 +547,8 @@ def get_detail_benzin(element, prefix, namespaces):
543547 categories = {'OtackyVolnobezne' : ('otackyVolnobezne' , 1 ), 'OtackyZvysene' : ('otackyZvysene' , 1 )}
544548 result_lists = initialize_result_list (required_list , categories , prefix )
545549 fill_result_list (benzin_vyusteni_element_list , result_lists , required_list , categories , prefix , namespaces )
546- strategy_dict = {'CO' : 'max' , 'CO2' : 'min' , 'COCOOR' : 'max' , 'HC' : 'max' , 'LAMBDA' : 'max_diff_1' , 'N' : 'bounds' , 'NOX' : 'max' , 'O2' : 'max' , 'TPS' : 'max ' }
547- result |= select_worst (result_lists , strategy_dict )
550+ strategy_dict = {'CO' : 'max' , 'CO2' : 'min' , 'COCOOR' : 'max' , 'HC' : 'max' , 'LAMBDA' : 'max_diff_1' , 'N' : 'bounds' , 'NOX' : 'max' , 'O2' : 'max' , 'TPS' : 'min ' }
551+ result |= select_worst (result_lists , strategy_dict , result )
548552 return result
549553
550554
@@ -575,8 +579,8 @@ def get_detail_nafta(element, prefix, namespaces):
575579 categories = {'MereniPrumer' : ('mereniPrumer' , 1 ), 'Mereni' : ('mereni' , 4 )}
576580 result_lists = initialize_result_list (required_list , categories , prefix )
577581 fill_result_list (nafta_vyusteni_element_list , result_lists , required_list , categories , prefix , namespaces )
578- strategy_dict = {'TPS' : 'mean ' , 'CasAkcelerace' : 'max' , 'Kourivost' : 'max' , 'OtackyPrebehove' : 'mean ' , 'OtackyVolnobezne' : 'mean ' , 'Teplota' : 'min' }
579- result |= select_worst (result_lists , strategy_dict )
582+ strategy_dict = {'TPS' : 'min ' , 'CasAkcelerace' : 'max' , 'Kourivost' : 'max' , 'OtackyPrebehove' : 'bounds ' , 'OtackyVolnobezne' : 'bounds ' , 'Teplota' : 'min' }
583+ result |= select_worst (result_lists , strategy_dict , result )
580584 return result
581585
582586
@@ -980,6 +984,13 @@ def parse_stations_file(target_dir, xml_file, verbosity, delete):
980984def run_preprocessing ():
981985 explain_verbosity (config .VERBOSITY )
982986
987+ print ('—————————————————————————————————Stanice STK a SME:—————————————————————————————————————————————\n ' )
988+ # Seznam stanic prochází denní aktualizací
989+ clear_folder (config .STATIONS_DIR , config .VERBOSITY )
990+ download_stations (config .SPARQL_ENDPOINT , config .STATIONS_DIR / 'gz' , config .DATASET_STATIONS , config .VERBOSITY )
991+ extract_files (config .STATIONS_DIR / 'gz' , config .STATIONS_DIR / 'xml' , 1 , config .VERBOSITY )
992+ parse_series_to_parquet (config .STATIONS_DIR / 'xml' , config .STATIONS_DIR / 'parquet' , parse_stations_file , 1 , config .VERBOSITY , False )
993+
983994 print ('——————————————————————————————————PROHLÍDKY VOZIDEL STK A SME:——————————————————————————————————\n ' )
984995 downloaded_inspection_dates = downloaded_dates ([config .INSPECTIONS_DIR / 'gz' , config .INSPECTIONS_DIR / 'xml' , config .INSPECTIONS_DIR / 'parquet' ])
985996 download_files (config .SPARQL_ENDPOINT , config .INSPECTIONS_DIR / 'gz' , config .PARENT_DATASET_INSPECTIONS , config .START_DATE , config .END_DATE , downloaded_inspection_dates , config .NO_DOWNLOAD_THREADS , config .MAX_DOWNLOAD_ATTEMPTS , config .VERBOSITY )
@@ -992,12 +1003,6 @@ def run_preprocessing():
9921003 extract_files (config .MEASUREMENTS_DIR / 'gz' , config .MEASUREMENTS_DIR / 'xml' , config .NO_EXTRACT_THREADS , config .VERBOSITY )
9931004 parse_series_to_parquet (config .MEASUREMENTS_DIR / 'xml' , config .MEASUREMENTS_DIR / 'parquet' , parse_measurements_file , config .NO_PARSE_PROCESSES , config .VERBOSITY , False )
9941005
995- print ('—————————————————————————————————Stanice STK a SME:—————————————————————————————————————————————\n ' )
996- # Seznam stanic prochází denní aktualizací
997- clear_folder (config .STATIONS_DIR , config .VERBOSITY )
998- download_stations (config .SPARQL_ENDPOINT , config .STATIONS_DIR / 'gz' , config .DATASET_STATIONS , config .VERBOSITY )
999- extract_files (config .STATIONS_DIR / 'gz' , config .STATIONS_DIR / 'xml' , 1 , config .VERBOSITY )
1000- parse_series_to_parquet (config .STATIONS_DIR / 'xml' , config .STATIONS_DIR / 'parquet' , parse_stations_file , 1 , config .VERBOSITY , False )
10011006
10021007
10031008if __name__ == '__main__' :
0 commit comments