ROOT to HDF5¶

Esempio per dumpare un root file in un HDF5 nell'ipotesi che sia possibile caricarlo interamente in memoria

In [ ]:
import uproot 
import h5py
import numpy as np

Definisco input e output files

In [ ]:
fileToLoad = "output.root"
HDF5file = "output.h5"

Ispeziono il root file di partenza. In particolare, abbiamo una sola ntupla outData¶

In [ ]:
with uproot.open(fileToLoad) as f:    
    for k in f:
        print(k)
        try:
            for kk in f[k]:
                print(kk)
        except:
            pass
outData;1
<TBranch 'eventID' at 0x01fe24417a10>
<TBranch 'Tracker_NHit_X_1' at 0x01fe24417810>
<TBranch 'Tracker_NHit_Y_1' at 0x01fe24447450>
<TBranch 'Tracker_NHit_X_2' at 0x01fe242bf990>
<TBranch 'Tracker_NHit_Y_2' at 0x01fe242bee90>
<TBranch 'Tracker_X_1' at 0x01fe242be110>
<TBranch 'Tracker_Y_1' at 0x01fe2428a6d0>
<TBranch 'Tracker_X_2' at 0x01fe2428b650>
<TBranch 'Tracker_Y_2' at 0x01fe2428a610>
<TBranch 'Ekin' at 0x01fe242bd7d0>
<TBranch 'edep_APC1' at 0x01fe24261b90>
<TBranch 'edep_APC2' at 0x01fe24263410>
<TBranch 'edep_calo' at 0x01fe24260d10>

Effettuo la conversione¶

In [ ]:
with uproot.open(fileToLoad)["outData"] as f, h5py.File(HDF5file, 'w', libver='latest') as hf:
    
    print(f"Convertirò {fileToLoad} in {HDF5file}")
    
    hf.swmr_mode = True
    opts = {"compression":"gzip", "chunks":True}
    
    for k in f:
        currName = k.name
        print(currName)
        
        hf.create_dataset(currName, data =  f.arrays(library = "np")[currName], **opts)
        
        
        
Convertirò output.root in output.h5
eventID
Tracker_NHit_X_1
Tracker_NHit_Y_1
Tracker_NHit_X_2
Tracker_NHit_Y_2
Tracker_X_1
Tracker_Y_1
Tracker_X_2
Tracker_Y_2
Ekin
edep_APC1
edep_APC2
edep_calo

Ispeziono il file prodotto¶

In [ ]:
with h5py.File(HDF5file, 'r', libver='latest', swmr=True) as hf:
    
    # Mi faccio stampare tutte le chiavi contenute nel file di dati
    print(hf.keys())
    for k in hf.keys():
        print(k, hf[k].shape)
<KeysViewHDF5 ['Ekin', 'Tracker_NHit_X_1', 'Tracker_NHit_X_2', 'Tracker_NHit_Y_1', 'Tracker_NHit_Y_2', 'Tracker_X_1', 'Tracker_X_2', 'Tracker_Y_1', 'Tracker_Y_2', 'edep_APC1', 'edep_APC2', 'edep_calo', 'eventID']>
Ekin (10000,)
Tracker_NHit_X_1 (10000,)
Tracker_NHit_X_2 (10000,)
Tracker_NHit_Y_1 (10000,)
Tracker_NHit_Y_2 (10000,)
Tracker_X_1 (10000,)
Tracker_X_2 (10000,)
Tracker_Y_1 (10000,)
Tracker_Y_2 (10000,)
edep_APC1 (10000,)
edep_APC2 (10000,)
edep_calo (10000,)
eventID (10000,)

Supponendo di voler fare un file aggregato¶

In [ ]:
HDF5file_aggregato = "output_aggregato.h5"

with uproot.open(fileToLoad)["outData"] as f, h5py.File(HDF5file_aggregato, 'w', libver='latest') as hf:
    
    print(f"Convertirò {fileToLoad} in {HDF5file_aggregato}")
    
    # HDF5 settings
    hf.swmr_mode = True
    opts = {"compression":"gzip", "chunks":True}
    
    
    # Write num hit
    NHit = np.vstack((
        f.arrays(library = "np")["Tracker_NHit_X_1"],
        f.arrays(library = "np")["Tracker_NHit_Y_1"],
        f.arrays(library = "np")["Tracker_NHit_X_2"],
        f.arrays(library = "np")["Tracker_NHit_Y_2"],
        )).T
    
    print(NHit.shape)
    hf.create_dataset("Tracker_NHit", data =  NHit, **opts)
    
        
    # Write positions
    posizioni = np.vstack((
        f.arrays(library = "np")["Tracker_X_1"],
        f.arrays(library = "np")["Tracker_Y_1"],
        f.arrays(library = "np")["Tracker_X_2"],
        f.arrays(library = "np")["Tracker_Y_2"],
        )).T
    
    print(posizioni.shape)
    hf.create_dataset("Tracker", data =  posizioni, **opts)
    
        
    # Write deposited energy
    edep = np.vstack((
        f.arrays(library = "np")["edep_APC1"],
        f.arrays(library = "np")["edep_APC2"],
        f.arrays(library = "np")["edep_calo"],
        )).T
    
    print(edep.shape)
    hf.create_dataset("edep", data =  edep, **opts)
    
        
    # Write other info
    info = np.vstack((
        f.arrays(library = "np")["Ekin"],
        f.arrays(library = "np")["eventID"],
        )).T
    
    print(info.shape)
    hf.create_dataset("info", data =  info, **opts)
    
        
        
        
        
Convertirò output.root in output_aggregato.h5
(10000, 4)
(10000, 4)
(10000, 3)
(10000, 2)

Ispeziono il file prodotto¶

In [ ]:
with h5py.File(HDF5file_aggregato, 'r', libver='latest', swmr=True) as hf:
    
    # Mi faccio stampare tutte le chiavi contenute nel file di dati
    print(hf.keys())
    for k in hf.keys():
        print(k, hf[k].shape)
<KeysViewHDF5 ['Tracker', 'Tracker_NHit', 'edep', 'info']>
Tracker (10000, 4)
Tracker_NHit (10000, 4)
edep (10000, 3)
info (10000, 2)