Friday, June 29, 2018

Plotting a calendar in matplotlib

And here's a function to plot a compact calendar with matplotlib:
import calendar
import numpy as np
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt

def plot_calendar(days, months):
    plt.figure(figsize=(9, 3))
    # non days are grayed
    ax = plt.gca().axes
    ax.add_patch(Rectangle((29, 2), width=.8, height=.8, 
                           color='gray', alpha=.3))
    ax.add_patch(Rectangle((30, 2), width=.8, height=.8,
                           color='gray', alpha=.5))
    ax.add_patch(Rectangle((31, 2), width=.8, height=.8,
                           color='gray', alpha=.5))
    ax.add_patch(Rectangle((31, 4), width=.8, height=.8,
                           color='gray', alpha=.5))
    ax.add_patch(Rectangle((31, 6), width=.8, height=.8,
                           color='gray', alpha=.5))
    ax.add_patch(Rectangle((31, 9), width=.8, height=.8,
                           color='gray', alpha=.5))
    ax.add_patch(Rectangle((31, 11), width=.8, height=.8,
                           color='gray', alpha=.5))
    for d, m in zip(days, months):
        ax.add_patch(Rectangle((d, m), 
                               width=.8, height=.8, color='C0'))
    plt.yticks(np.arange(1, 13)+.5, list(calendar.month_abbr)[1:])
    plt.xticks(np.arange(1,32)+.5, np.arange(1,32))
    plt.xlim(1, 32)
    plt.ylim(1, 13)
    plt.gca().invert_yaxis()
    # remove borders and ticks
    for spine in plt.gca().spines.values():
        spine.set_visible(False)
    plt.tick_params(top=False, bottom=False, left=False, right=False)
    plt.show()


You can use it to highlight the days with full moon in 2018:
full_moon_day = [2, 31, 2, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22]
full_moon_month = [1, 1, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
plot_calendar(full_moon_day, full_moon_month)

Or just highlight all the weekends:
from datetime import datetime, timedelta

def get_weekends(year):
    weekend_day = []
    weekend_month = [] 
    start = datetime(year, 1, 1)
    for i in range(365):
        day_of_the_year = start + timedelta(days=i)
        if day_of_the_year.weekday() > 4:
            weekend_day.append(day_of_the_year.day)
            weekend_month.append(day_of_the_year.month)
    return weekend_day, weekend_month

weekend_day, weekend_month = get_weekends(2018)
plot_calendar(weekend_day, weekend_month)

Wednesday, May 30, 2018

Visualizing UK Carbon Emissions

Have you ever wanted to check carbon emissions in the UK and never had an easy way to do it? Now you can use the Official Carbon Intensity API developed by the National Grid. Let's see an example of how to use the API to summarize the emissions in the month of May. First, we download the data with a request to the API:
import urllib.request
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

period = ('2018-05-01T00:00Z', '2018-05-28T00:00Z')
url = 'https://api.carbonintensity.org.uk/intensity/%s/%s'
url = url % period
response = urllib.request.urlopen(url)
data = json.loads(response.read())['data']
We organize the result in a DataFrame indexed by timestamps:
carbon_intensity = pd.DataFrame()
carbon_intensity['timestamp'] = [pd.to_datetime(d['from']) for d in data]
carbon_intensity['intensity'] = [d['intensity']['actual'] for d in data]
carbon_intensity['classification'] = [d['intensity']['index'] for d in data]
carbon_intensity.set_index('timestamp', inplace=True)
From the classification provided we extract the thresholds to label emissions in low, high and moderate:
thresholds = carbon_intensity.groupby(by='classification').min()
threshold_high = thresholds[thresholds.index == 'high'].values[0][0]
threshold_moderate = thresholds[thresholds.index == 'moderate'].values[0][0]
Now we group the data by hour of the day and create a boxplot that shows some interesting facts about carbon emissions in May:
hour_group = carbon_intensity.groupby(carbon_intensity.index.hour)

plt.figure(figsize=(12, 6))
plt.title('UK Carbon Intensity in May 2018')
plt.boxplot([g.intensity for _,g in hour_group], 
            medianprops=dict(color='k'))

ymin, ymax = plt.ylim()

plt.fill_between(x=np.arange(26), 
                 y1=np.ones(26)*threshold_high, 
                 y2=np.ones(26)*ymax, 
                 color='crimson', 
                 alpha=.3, label='high')

plt.fill_between(x=np.arange(26), 
                 y1=np.ones(26)*threshold_moderate, 
                 y2=np.ones(26)*threshold_high, 
                 color='skyblue', 
                 alpha=.5, label='moderate')

plt.fill_between(x=np.arange(26), 
                 y1=np.ones(26)*threshold_moderate, 
                 y2=np.ones(26)*ymin, 
                 color='palegreen', 
                 alpha=.3, label='low')

plt.ylim(ymin, ymax)
plt.ylabel('carbon intensity (gCO_2/kWH)')
plt.xlabel('hour of the day')
plt.legend(loc='upper left', ncol=3,
           shadow=True, fancybox=True)
plt.show()

We notice that the medians almost always falls in the moderate emissions region and in two cases it even falls in the low region. In the early afternoon the medians reach their minimum while the maximum is reached in the evening. It's nice to see that most of the hours present outliers in the low emissions region and only few outliers are in the high region.

Do you want to know more about boxplots? Check this out!