Kaggle's Airbnb New User Booking Trend

Using mpld3 to do visualization in ipython with Kaggle’s airbnb data. First experience is great!


In [1]:
# load pkg
import datetime
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import mpld3
mpld3.enable_notebook()

# import mod
%autoreload 1
%aimport mod

# save data in one hdf
data = ['train_users_2', 'age_gender_bkts', 'countries', 'sessions', 'test_users', 'sample_submission_NDF']
data_name = ['train', 'age_gender', 'countries', 'sessions', 'test', 'submissions']

def read_data(data, data_name):
    hdf = pd.HDFStore('data.h5')
    for di in range(len(data)):
        fn = data[di]
        dn = data_name[di]
        hdf.put(dn, pd.read_csv('DATA/%s.csv'%fn))

# read_data(data, data_name)
        
# timer
now = datetime.datetime.now()

# read data from hdf
hdf = pd.HDFStore('data.h5')
train = hdf.train
sessions = hdf.sessions
age_gender = hdf.age_gender
countries = hdf.countries

print('data loaded, took %i seconds' %(datetime.datetime.now() - now).seconds)

# transform timestamp columns to datetime
train.date_account_created = pd.to_datetime(train.date_account_created) 
train.timestamp_first_active = pd.to_datetime(train.timestamp_first_active, format = '%Y%m%d%H%M%S')
train.date_first_booking = pd.to_datetime(train.date_first_booking)

# take a look
train.head()
data loaded, took 4 seconds
Out[1]:
iddate_account_createdtimestamp_first_activedate_first_bookinggenderagesignup_methodsignup_flowlanguageaffiliate_channelaffiliate_providerfirst_affiliate_trackedsignup_appfirst_device_typefirst_browsercountry_destination
0gxn3p5htnn2010-06-282009-03-19 04:32:55NaT-unknown-NaNfacebook0endirectdirectuntrackedWebMac DesktopChromeNDF
1820tgsjxq72011-05-252009-05-23 17:48:09NaTMALE38facebook0enseogoogleuntrackedWebMac DesktopChromeNDF
24ft3gnwmtx2010-09-282009-06-09 23:12:472010-08-02FEMALE56basic3endirectdirectuntrackedWebWindows DesktopIEUS
3bjjt8pjhuk2011-12-052009-10-31 06:01:292012-09-08FEMALE42facebook0endirectdirectuntrackedWebMac DesktopFirefoxother
487mebub9p42010-09-142009-12-08 06:11:052010-02-18-unknown-41basic0endirectdirectuntrackedWebMac DesktopChromeUS

Profiling

date_first_booking

In [10]:
mod.draw_trend(train, 'doy');
In [11]:
mod.draw_trend(train, 'doy', var = 'timestamp_first_active');
In [12]:
mod.draw_trend(train, 'm')
In [13]:
mod.draw_trend(train, 'm', var = 'timestamp_first_active')
In [14]:
mod.draw_trend(train, 'd');
In [15]:
mod.draw_trend(train, 'dow')
In [16]:
mod.draw_trend(train, 'dow', var = 'timestamp_first_active')
In [17]:
mod.draw_trend(train, 'hr', var = 'timestamp_first_active')
In [20]:
mod.draw_trend(train, 'country_destination', var = 'timestamp_first_active')
comments powered by Disqus