About

We will write few lines of python codes to download the tweets from a twitter handle. Note that, you can open this webpage as a ipython notebook by clocking the colab tab shown on the top right panel (up).

import GetOldTweets3 as got
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable # to move placement of colorbar
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
username = 'MoHFW_INDIA' # init a twitter hander
count = 10000 # how many recent tweets to download 
tweetCriteria = got.manager.TweetCriteria().setUsername(username)\
                                           .setSince("2015-06-01")\
                                           .setUntil("2020-06-29")\
                                           .setMaxTweets(count)
# download tweets
tweets = got.manager.TweetManager.getTweets(tweetCriteria)# Creating list of chosen tweet data

# segregate tweet date and text
tweets_date = [tweet.date for tweet in tweets]
tweets_text = [tweet.text for tweet in tweets]

# segregate tweet by date and count per date
tweets_cal_1 = np.zeros((31,12))
tweets_cal_2 = np.zeros((31,12))
tweets_date_1 = []
tweets_date_2 = []
for i in range(len(tweets_text)):
    if tweets_date[i].year == 2019:
        tweets_cal_1[tweets_date[i].day-1,tweets_date[i].month-1]+= 1 
        tweets_date_1.append(str(tweets_date[i].month)+'-'+str(tweets_date[i].year))
    if tweets_date[i].year == 2020:
        tweets_cal_2[tweets_date[i].day-1,tweets_date[i].month-1]+= 1
        tweets_date_2.append(str(tweets_date[i].month)+'-'+str(tweets_date[i].year))

# stack to a single np array
tweets_cal = np.hstack((tweets_cal_1,tweets_cal_2))
# visualiza the tweet count distrubted over DAY-MONTH-YEAR
fig,ax = plt.subplots(1,1,figsize=(12,5))
X, Y = np.arange(1,tweets_cal.shape[0]+2), np.arange(0,tweets_cal.shape[1])
im = ax.pcolormesh(Y,X, tweets_cal, vmin=0, vmax=np.max(tweets_cal), cmap='Blues')
plt.xlim(0,18)
plt.xticks(np.arange(0,18)+.5, xticks,rotation=30)
cbar = fig.colorbar(im, ax=ax)
cbar.set_label('NO. OF TWEETS',size=13)
plt.ylabel("DAY OF MONTH",fontsize=13)
plt.xlabel("MONTH OF YEAR",fontsize=13)
plt.title('Twitter handle: @'+username,fontsize=13)
plt.show()

fig, ax = plt.subplots(1,1,figsize=(10,4))
ax.plot(np.sum(tweets_cal,axis=0))
ax.set_xlabel("MONTH OF YEAR",fontsize=13)
ax.set_ylabel('NO. OF TWEETS',fontsize=13)
ax.xaxis.set_minor_locator(AutoMinorLocator())
ax.yaxis.set_minor_locator(AutoMinorLocator())
ax.tick_params(which='both', width=2)
ax.grid(True)
plt.xticks(np.arange(0,18), xticks,rotation=30)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
# ax.plot([5,60],[5,60],'--',color='black',alpha=0.25)
ax.tick_params(which='minor', length=4, color='gray')
plt.xlim(0,17)
plt.show()

Observations

The twitter handle used above belongs to the Minstry of Health, Government of India. We can see a spur in the tweet count from 3-20 (March 2020) onwards. This month onwards the COVID-19 awareness grew in India.

Next

We will try doing some basic NLP on the tweet texts.