In [1]:
import pandas as pd
import numpy as np
In [2]:
# creo 2 dataframe a caso che contengono un immaginario timestamp e una pulse heigth
df1 = pd.DataFrame({"time": [10, 20, 30, 41, 100, 110, 159, 161], "PH": [110,111,109,108,110,111,109,108]})
df2 = pd.DataFrame({"time": [11, 18, 45, 105, 160, 163], "PH": [5000,5500,5400,5500,5400,5400]})
In [3]:
df1
Out[3]:
time PH
0 10 110
1 20 111
2 30 109
3 41 108
4 100 110
5 110 111
6 159 109
7 161 108
In [4]:
df2
Out[4]:
time PH
0 11 5000
1 18 5500
2 45 5400
3 105 5500
4 160 5400
5 163 5400
In [5]:
out = pd.merge_asof(df1.rename(columns={'time':'Time1'}), 
                    df2.rename(columns={'time':'Time2'}), 
                    suffixes=['_det1', '_det2'],
                    left_on='Time1',
                    right_on='Time2',
                    direction='nearest',
                    tolerance=3)

out
Out[5]:
Time1 PH_det1 Time2 PH_det2
0 10 110 11.0 5000.0
1 20 111 18.0 5500.0
2 30 109 NaN NaN
3 41 108 NaN NaN
4 100 110 NaN NaN
5 110 111 NaN NaN
6 159 109 160.0 5400.0
7 161 108 160.0 5400.0
In [6]:
out["Delta"] = np.abs(out.Time1 - out.Time2)
out
Out[6]:
Time1 PH_det1 Time2 PH_det2 Delta
0 10 110 11.0 5000.0 1.0
1 20 111 18.0 5500.0 2.0
2 30 109 NaN NaN NaN
3 41 108 NaN NaN NaN
4 100 110 NaN NaN NaN
5 110 111 NaN NaN NaN
6 159 109 160.0 5400.0 1.0
7 161 108 160.0 5400.0 1.0
In [7]:
out.dropna()
Out[7]:
Time1 PH_det1 Time2 PH_det2 Delta
0 10 110 11.0 5000.0 1.0
1 20 111 18.0 5500.0 2.0
6 159 109 160.0 5400.0 1.0
7 161 108 160.0 5400.0 1.0
In [8]:
out = pd.merge_asof(df1.rename(columns={'time':'Time1'}), 
                    df2.rename(columns={'time':'Time2'}), 
                    suffixes=['_det1', '_det2'],
                    left_on='Time1',
                    right_on='Time2',
                    direction='nearest',
                    tolerance=6)

out
Out[8]:
Time1 PH_det1 Time2 PH_det2
0 10 110 11.0 5000.0
1 20 111 18.0 5500.0
2 30 109 NaN NaN
3 41 108 45.0 5400.0
4 100 110 105.0 5500.0
5 110 111 105.0 5500.0
6 159 109 160.0 5400.0
7 161 108 160.0 5400.0
In [9]:
out["Delta"] = np.abs(out.Time1 - out.Time2)
out
Out[9]:
Time1 PH_det1 Time2 PH_det2 Delta
0 10 110 11.0 5000.0 1.0
1 20 111 18.0 5500.0 2.0
2 30 109 NaN NaN NaN
3 41 108 45.0 5400.0 4.0
4 100 110 105.0 5500.0 5.0
5 110 111 105.0 5500.0 5.0
6 159 109 160.0 5400.0 1.0
7 161 108 160.0 5400.0 1.0
In [10]:
aa = out.dropna()
In [11]:
bb = aa.drop_duplicates(keep='first', subset=['Time2'])
cc = bb.drop_duplicates(keep='first', subset=['Time1']) # In realtà per definizione non ne avrò mai qui
cc
Out[11]:
Time1 PH_det1 Time2 PH_det2 Delta
0 10 110 11.0 5000.0 1.0
1 20 111 18.0 5500.0 2.0
3 41 108 45.0 5400.0 4.0
4 100 110 105.0 5500.0 5.0
6 159 109 160.0 5400.0 1.0
In [ ]:
 
In [ ]: