Esempio per dumpare un root file in un HDF5 nell'ipotesi che sia possibile caricarlo interamente in memoria
import uproot
import h5py
import numpy as np
Definisco input e output files
fileToLoad = "output.root"
HDF5file = "output.h5"
outData
¶with uproot.open(fileToLoad) as f:
for k in f:
print(k)
try:
for kk in f[k]:
print(kk)
except:
pass
outData;1 <TBranch 'eventID' at 0x01fe24417a10> <TBranch 'Tracker_NHit_X_1' at 0x01fe24417810> <TBranch 'Tracker_NHit_Y_1' at 0x01fe24447450> <TBranch 'Tracker_NHit_X_2' at 0x01fe242bf990> <TBranch 'Tracker_NHit_Y_2' at 0x01fe242bee90> <TBranch 'Tracker_X_1' at 0x01fe242be110> <TBranch 'Tracker_Y_1' at 0x01fe2428a6d0> <TBranch 'Tracker_X_2' at 0x01fe2428b650> <TBranch 'Tracker_Y_2' at 0x01fe2428a610> <TBranch 'Ekin' at 0x01fe242bd7d0> <TBranch 'edep_APC1' at 0x01fe24261b90> <TBranch 'edep_APC2' at 0x01fe24263410> <TBranch 'edep_calo' at 0x01fe24260d10>
with uproot.open(fileToLoad)["outData"] as f, h5py.File(HDF5file, 'w', libver='latest') as hf:
print(f"Convertirò {fileToLoad} in {HDF5file}")
hf.swmr_mode = True
opts = {"compression":"gzip", "chunks":True}
for k in f:
currName = k.name
print(currName)
hf.create_dataset(currName, data = f.arrays(library = "np")[currName], **opts)
Convertirò output.root in output.h5 eventID Tracker_NHit_X_1 Tracker_NHit_Y_1 Tracker_NHit_X_2 Tracker_NHit_Y_2 Tracker_X_1 Tracker_Y_1 Tracker_X_2 Tracker_Y_2 Ekin edep_APC1 edep_APC2 edep_calo
with h5py.File(HDF5file, 'r', libver='latest', swmr=True) as hf:
# Mi faccio stampare tutte le chiavi contenute nel file di dati
print(hf.keys())
for k in hf.keys():
print(k, hf[k].shape)
<KeysViewHDF5 ['Ekin', 'Tracker_NHit_X_1', 'Tracker_NHit_X_2', 'Tracker_NHit_Y_1', 'Tracker_NHit_Y_2', 'Tracker_X_1', 'Tracker_X_2', 'Tracker_Y_1', 'Tracker_Y_2', 'edep_APC1', 'edep_APC2', 'edep_calo', 'eventID']> Ekin (10000,) Tracker_NHit_X_1 (10000,) Tracker_NHit_X_2 (10000,) Tracker_NHit_Y_1 (10000,) Tracker_NHit_Y_2 (10000,) Tracker_X_1 (10000,) Tracker_X_2 (10000,) Tracker_Y_1 (10000,) Tracker_Y_2 (10000,) edep_APC1 (10000,) edep_APC2 (10000,) edep_calo (10000,) eventID (10000,)
HDF5file_aggregato = "output_aggregato.h5"
with uproot.open(fileToLoad)["outData"] as f, h5py.File(HDF5file_aggregato, 'w', libver='latest') as hf:
print(f"Convertirò {fileToLoad} in {HDF5file_aggregato}")
# HDF5 settings
hf.swmr_mode = True
opts = {"compression":"gzip", "chunks":True}
# Write num hit
NHit = np.vstack((
f.arrays(library = "np")["Tracker_NHit_X_1"],
f.arrays(library = "np")["Tracker_NHit_Y_1"],
f.arrays(library = "np")["Tracker_NHit_X_2"],
f.arrays(library = "np")["Tracker_NHit_Y_2"],
)).T
print(NHit.shape)
hf.create_dataset("Tracker_NHit", data = NHit, **opts)
# Write positions
posizioni = np.vstack((
f.arrays(library = "np")["Tracker_X_1"],
f.arrays(library = "np")["Tracker_Y_1"],
f.arrays(library = "np")["Tracker_X_2"],
f.arrays(library = "np")["Tracker_Y_2"],
)).T
print(posizioni.shape)
hf.create_dataset("Tracker", data = posizioni, **opts)
# Write deposited energy
edep = np.vstack((
f.arrays(library = "np")["edep_APC1"],
f.arrays(library = "np")["edep_APC2"],
f.arrays(library = "np")["edep_calo"],
)).T
print(edep.shape)
hf.create_dataset("edep", data = edep, **opts)
# Write other info
info = np.vstack((
f.arrays(library = "np")["Ekin"],
f.arrays(library = "np")["eventID"],
)).T
print(info.shape)
hf.create_dataset("info", data = info, **opts)
Convertirò output.root in output_aggregato.h5 (10000, 4) (10000, 4) (10000, 3) (10000, 2)
with h5py.File(HDF5file_aggregato, 'r', libver='latest', swmr=True) as hf:
# Mi faccio stampare tutte le chiavi contenute nel file di dati
print(hf.keys())
for k in hf.keys():
print(k, hf[k].shape)
<KeysViewHDF5 ['Tracker', 'Tracker_NHit', 'edep', 'info']> Tracker (10000, 4) Tracker_NHit (10000, 4) edep (10000, 3) info (10000, 2)