import pandas as pd
import numpy as np
# creo 2 dataframe a caso che contengono un immaginario timestamp e una pulse heigth
df1 = pd.DataFrame({"time": [10, 20, 30, 41, 100, 110, 159, 161], "PH": [110,111,109,108,110,111,109,108]})
df2 = pd.DataFrame({"time": [11, 18, 45, 105, 160, 163], "PH": [5000,5500,5400,5500,5400,5400]})
df1
time | PH | |
---|---|---|
0 | 10 | 110 |
1 | 20 | 111 |
2 | 30 | 109 |
3 | 41 | 108 |
4 | 100 | 110 |
5 | 110 | 111 |
6 | 159 | 109 |
7 | 161 | 108 |
df2
time | PH | |
---|---|---|
0 | 11 | 5000 |
1 | 18 | 5500 |
2 | 45 | 5400 |
3 | 105 | 5500 |
4 | 160 | 5400 |
5 | 163 | 5400 |
out = pd.merge_asof(df1.rename(columns={'time':'Time1'}),
df2.rename(columns={'time':'Time2'}),
suffixes=['_det1', '_det2'],
left_on='Time1',
right_on='Time2',
direction='nearest',
tolerance=3)
out
Time1 | PH_det1 | Time2 | PH_det2 | |
---|---|---|---|---|
0 | 10 | 110 | 11.0 | 5000.0 |
1 | 20 | 111 | 18.0 | 5500.0 |
2 | 30 | 109 | NaN | NaN |
3 | 41 | 108 | NaN | NaN |
4 | 100 | 110 | NaN | NaN |
5 | 110 | 111 | NaN | NaN |
6 | 159 | 109 | 160.0 | 5400.0 |
7 | 161 | 108 | 160.0 | 5400.0 |
out["Delta"] = np.abs(out.Time1 - out.Time2)
out
Time1 | PH_det1 | Time2 | PH_det2 | Delta | |
---|---|---|---|---|---|
0 | 10 | 110 | 11.0 | 5000.0 | 1.0 |
1 | 20 | 111 | 18.0 | 5500.0 | 2.0 |
2 | 30 | 109 | NaN | NaN | NaN |
3 | 41 | 108 | NaN | NaN | NaN |
4 | 100 | 110 | NaN | NaN | NaN |
5 | 110 | 111 | NaN | NaN | NaN |
6 | 159 | 109 | 160.0 | 5400.0 | 1.0 |
7 | 161 | 108 | 160.0 | 5400.0 | 1.0 |
out.dropna()
Time1 | PH_det1 | Time2 | PH_det2 | Delta | |
---|---|---|---|---|---|
0 | 10 | 110 | 11.0 | 5000.0 | 1.0 |
1 | 20 | 111 | 18.0 | 5500.0 | 2.0 |
6 | 159 | 109 | 160.0 | 5400.0 | 1.0 |
7 | 161 | 108 | 160.0 | 5400.0 | 1.0 |
out = pd.merge_asof(df1.rename(columns={'time':'Time1'}),
df2.rename(columns={'time':'Time2'}),
suffixes=['_det1', '_det2'],
left_on='Time1',
right_on='Time2',
direction='nearest',
tolerance=6)
out
Time1 | PH_det1 | Time2 | PH_det2 | |
---|---|---|---|---|
0 | 10 | 110 | 11.0 | 5000.0 |
1 | 20 | 111 | 18.0 | 5500.0 |
2 | 30 | 109 | NaN | NaN |
3 | 41 | 108 | 45.0 | 5400.0 |
4 | 100 | 110 | 105.0 | 5500.0 |
5 | 110 | 111 | 105.0 | 5500.0 |
6 | 159 | 109 | 160.0 | 5400.0 |
7 | 161 | 108 | 160.0 | 5400.0 |
out["Delta"] = np.abs(out.Time1 - out.Time2)
out
Time1 | PH_det1 | Time2 | PH_det2 | Delta | |
---|---|---|---|---|---|
0 | 10 | 110 | 11.0 | 5000.0 | 1.0 |
1 | 20 | 111 | 18.0 | 5500.0 | 2.0 |
2 | 30 | 109 | NaN | NaN | NaN |
3 | 41 | 108 | 45.0 | 5400.0 | 4.0 |
4 | 100 | 110 | 105.0 | 5500.0 | 5.0 |
5 | 110 | 111 | 105.0 | 5500.0 | 5.0 |
6 | 159 | 109 | 160.0 | 5400.0 | 1.0 |
7 | 161 | 108 | 160.0 | 5400.0 | 1.0 |
aa = out.dropna()
bb = aa.drop_duplicates(keep='first', subset=['Time2'])
cc = bb.drop_duplicates(keep='first', subset=['Time1']) # In realtà per definizione non ne avrò mai qui
cc
Time1 | PH_det1 | Time2 | PH_det2 | Delta | |
---|---|---|---|---|---|
0 | 10 | 110 | 11.0 | 5000.0 | 1.0 |
1 | 20 | 111 | 18.0 | 5500.0 | 2.0 |
3 | 41 | 108 | 45.0 | 5400.0 | 4.0 |
4 | 100 | 110 | 105.0 | 5500.0 | 5.0 |
6 | 159 | 109 | 160.0 | 5400.0 | 1.0 |