In [22]:
#Pandas, Numpy, time buckets, binning, VBA equivalent
In [23]:
import numpy as np
In [24]:
import pandas as pd
In [25]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
In [26]:
ts = pd.Series(np.arange(12), index=rng)
In [27]:
ts
Out[27]:
2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32
In [28]:
ts.resample('5min', closed='right', label='right', loffset='-1s').sum()
Out[28]:
1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32
In [29]:
start_times = ['2000-01-01 09:00', '2000-01-01 10:00']
In [30]:
end_times = ['2000-01-01 17:00', '2000-01-01 18:00']
In [31]:
index = ['Timeframe ' + str(i) for i in range(len(start_times))]
In [32]:
df = pd.DataFrame({'Start Time': pd.to_datetime(start_times),
                  'End Time' : pd.to_datetime(end_times)}, index=index)
In [33]:
df
Out[33]:
End Time Start Time
Timeframe 0 2000-01-01 17:00:00 2000-01-01 09:00:00
Timeframe 1 2000-01-01 18:00:00 2000-01-01 10:00:00
In [34]:
df['Start Time']
Out[34]:
Timeframe 0   2000-01-01 09:00:00
Timeframe 1   2000-01-01 10:00:00
Name: Start Time, dtype: datetime64[ns]
In [35]:
df.unstack()
Out[35]:
End Time    Timeframe 0   2000-01-01 17:00:00
            Timeframe 1   2000-01-01 18:00:00
Start Time  Timeframe 0   2000-01-01 09:00:00
            Timeframe 1   2000-01-01 10:00:00
dtype: datetime64[ns]
In [36]:
rng = pd.date_range('2000-01-01 09:00', periods=9, freq='H')
In [37]:
rng
Out[37]:
DatetimeIndex(['2000-01-01 09:00:00', '2000-01-01 10:00:00',
               '2000-01-01 11:00:00', '2000-01-01 12:00:00',
               '2000-01-01 13:00:00', '2000-01-01 14:00:00',
               '2000-01-01 15:00:00', '2000-01-01 16:00:00',
               '2000-01-01 17:00:00'],
              dtype='datetime64[ns]', freq='H')
In [38]:
ts = pd.DataFrame(0, index=rng, columns=['minutes'], dtype='float')
In [39]:
ts
Out[39]:
minutes
2000-01-01 09:00:00 0.0
2000-01-01 10:00:00 0.0
2000-01-01 11:00:00 0.0
2000-01-01 12:00:00 0.0
2000-01-01 13:00:00 0.0
2000-01-01 14:00:00 0.0
2000-01-01 15:00:00 0.0
2000-01-01 16:00:00 0.0
2000-01-01 17:00:00 0.0
In [40]:
from pandas.tseries.offsets import Hour, Minute
In [41]:
from IPython.core.debugger import set_trace
In [42]:
for index, row in ts.iterrows():
    #set_trace()
    start_boundary = index
    end_boundary = index + Hour()
    time_count = pd.Timedelta('0 m')
    for _, raw_data in df.iterrows():
        #set_trace()
        start_time = raw_data['Start Time']
        end_time = raw_data['End Time']
        if end_time > start_boundary:
            if start_time < end_boundary:
                if start_time <= start_boundary:
                    if end_time >= end_boundary:
                        time_count = time_count + (end_boundary - start_boundary)
                    else:
                        time_count = time + (end_time - start_boundary)
                else:
                    if end_time >= end_boundary:
                        time_count = time_count + (end_boundary - start_time)
                    else:
                        time_count = time_count + (end_time - start_time)
    #set_trace()
    ts.at[index, 'minutes'] = time_count.seconds / 60
In [43]:
ts
Out[43]:
minutes
2000-01-01 09:00:00 60.0
2000-01-01 10:00:00 120.0
2000-01-01 11:00:00 120.0
2000-01-01 12:00:00 120.0
2000-01-01 13:00:00 120.0
2000-01-01 14:00:00 120.0
2000-01-01 15:00:00 120.0
2000-01-01 16:00:00 120.0
2000-01-01 17:00:00 60.0