Using mpld3
to do visualization in ipython with Kaggle’s airbnb data. First experience is great!
Kaggle Description: https://www.kaggle.com/c/airbnb-recruiting-new-user-bookings
# load pkg
import datetime
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import mpld3
mpld3.enable_notebook()
# import mod
%autoreload 1
%aimport mod
# save data in one hdf
data = ['train_users_2', 'age_gender_bkts', 'countries', 'sessions', 'test_users', 'sample_submission_NDF']
data_name = ['train', 'age_gender', 'countries', 'sessions', 'test', 'submissions']
def read_data(data, data_name):
hdf = pd.HDFStore('data.h5')
for di in range(len(data)):
fn = data[di]
dn = data_name[di]
hdf.put(dn, pd.read_csv('DATA/%s.csv'%fn))
# read_data(data, data_name)
# timer
now = datetime.datetime.now()
# read data from hdf
hdf = pd.HDFStore('data.h5')
train = hdf.train
sessions = hdf.sessions
age_gender = hdf.age_gender
countries = hdf.countries
print('data loaded, took %i seconds' %(datetime.datetime.now() - now).seconds)
# transform timestamp columns to datetime
train.date_account_created = pd.to_datetime(train.date_account_created)
train.timestamp_first_active = pd.to_datetime(train.timestamp_first_active, format = '%Y%m%d%H%M%S')
train.date_first_booking = pd.to_datetime(train.date_first_booking)
# take a look
train.head()
mod.draw_trend(train, ‘doy’);
mod.draw_trend(train, ‘doy’, var = ‘timestamp_first_active’);
mod.draw_trend(train, ’m’)
mod.draw_trend(train, ’m’, var = ‘timestamp_first_active’)
mod.draw_trend(train, ’d’);
mod.draw_trend(train, ‘dow’)
mod.draw_trend(train, ‘dow’, var = ‘timestamp_first_active’)
mod.draw_trend(train, ‘hr’, var = ‘timestamp_first_active’)
mod.draw_trend(train, ‘country_destination’, var = ‘timestamp_first_active’)