A follow up to the Twitter profile description word cloud… I’ve created a hashtag word cloud from the 19.2 million hashtags used in the tweets collected by Altmetric.com
The Python code is VERY similar to the profile description word cloud code, however we have to turn off the ‘collocations’ option in the WordCloud module options to make it work as we expect.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Feb 10 16:19:56 2019
@author: tdbowman
"""
import io
import csv
import numpy as np
from wordcloud import WordCloud, STOPWORDS
from os import path
from PIL import Image
# current directory
currdir = path.dirname(__file__)
# from https://github.com/nikhilkumarsingh/wordcloud-example/blob/master/mywc.py
def create_wordcloud(text):
# use cloud.png as mask for word cloud
mask = np.array(Image.open(path.join(currdir, "cloud.png")))
# create set of stopwords
#stop_words = list(STOPWORDS)
# create wordcloud object
wc = WordCloud(collocations=False,
background_color="white",
max_words=200,
mask=mask,
width=1334,
height=945)
# generate wordcloud
wc.generate(text)
# save wordcloud
wc.to_file(path.join(currdir, "wc_hashtags.png"))
if __name__ == "__main__":
# Grab text from file and convert to list
your_list = []
with io.open('hashtags.csv', 'r', encoding='utf-8') as f:
reader = csv.reader(x.replace('\0', '') for x in f)
your_list = ','.join([i[0] for i in reader])
# generate wordcloud
create_wordcloud(your_list)