Data Visualization Final

Question
Does the density of Cholera Cases differentiate between Northern and Southern Yemen? For this
question, I have split the governorates into two even groups. 11 governorates on and below latitude line
15.3053 and 11 above that line.
How to View Dash

In order to view my presentation, you must have a cable that connects a usc to a usb so that you can
access the dash locally. Next, open the terminal and type python app1.py to start the application.
Finally, you must type the address in the search bar which is http://127.0.0.1:8050/.
Code
# Libraries #########################################
import dash
from dash.dependencies import Input, Output, State, Event
import dash_core_components as dcc
import dash_html_components as html
import dash_table_experiments as dt
import plotly
from plotly import graph_objs as go
from plotly.graph_objs import *
from flask import Flask
import pandas as pd
import numpy as np
import os
import copy
app = dash.Dash(__name__)
server = app.server
# API keys and datasets #########################################

mapbox_access_token =
'pk.eyJ1IjoibWl0Y2gxMDMiLCJhIjoiY2p1M2JjNnBkMDU2ZjQzczdqb3dqMnN5byJ9.ipO9NbWoRs
seRz9wjMjvDg'
map_data = pd.read_csv('Yemen_Map.csv')
# Bootsrap CSS #########################################

app.css.append_css({'external_url':
'https://cdn.rawgit.com/plotly/dash-app-stylesheets/2d266c578d2a6e8850ebce48fdb52759b2aef506/
stylesheet-oil-and-gas.css'}) # noqa: E501
#Map layout and set up #########################################

layout = dict(
autosize=True,
height=500,
font=dict(color="#191A1A"),
titlefont=dict(color="#191A1A", size='14'),
margin=dict(
l=35,
r=35,
b=35,
t=45
),
hovermode="closest",
plot_bgcolor='#fffcfc',
paper_bgcolor='#fffcfc',
legend=dict(font=dict(size=15), orientation='h'),
title='Cholera Density Comparison by Governorate 02/18/18',
mapbox=dict(
accesstoken=mapbox_access_token,
style="light",
center=dict(
lon=48.5164,
lat=15.5527
),
zoom=4,
)
)
#This helps with my colorscale

group = ['All']
group = group + ['Low', 'Medium', 'High', 'Very High']
group_class = [{'label': str(item),
'value': str(item)}
for item in group]
# Creating layouts for data table #########################################

layout_right = copy.deepcopy(layout)
layout_right['height'] = 300
layout_right['margin-top'] = '20'
layout_right['font-size'] = '12'
mp_max = map_data['Cases'].max()
mp_min = map_data['Cases'].min()
# Components style #########################################

def size_scale(md, selected_row_indices=[]):
size = []
max_score = mp_max
min_score = mp_min
for row in md['Cases']:
scale = (row - mp_min)/(mp_max - mp_min)
if scale <= 0.10:
size.append("5")
elif scale <= 0.12:
size.append("6")
elif scale <= 0.18:
size.append("7")
elif scale <= 0.24:
size.append("8")
elif scale <= 0.30:
size.append("9")
elif scale <= 0.36:
size.append("10")
elif scale <= 0.42:
size.append("11")
elif scale <= 0.48:
size.append("12")
elif scale <= 0.54:
size.append("13")
elif scale <= 0.60:
size.append("14")
elif scale <= 0.66:
size.append("15")
elif scale <= 0.72:
size.append("16")
elif scale <= 0.78:
size.append("17")
elif scale <= 0.84:
size.append("18")
elif scale <= 0.90:
size.append("19")
else:
size.append("5")
for i in selected_row_indices:
size[i] = '5'
return size
#Map and point updates #########################################

def gen_map(map_data):
return {
"data": [
{
"type": "scattermapbox",
"lat": list(map_data['Lat']),
"lon": list(map_data['Lon']),
"text": list(map_data['Governorate']),
"name": list(map_data['Governorate']),
"mode": "markers",
"marker": {
"size": [10,11,7,25,24,18,22,21,9,26,12,23,20,17,16,8,19,15,14,13,6,5],
"opacity": 0.7,
"color": "black"
}
}
],
"layout": layout
}
# Layout #########################################
app.layout = html.Div([
html.Div(
[
html.H2(
'Cholera Cases in Yemen May 2017 - February 2018',
style={'font-family': 'Helvetica',
"margin-top": "25",
"margin-bottom": "0"},
className='nine columns',
),
html.Img(
src="https://nairobigarage.com/2017/wp-content/uploads/2015/02/hxl_fb-01.png",
className='one column',
style={
'height': '18%',
'width': '18%',
'float': 'right',
'position': 'relative',
'padding-top': 10,
'padding-right': 0
},
),
],
className='row'
),
# Map + table + Histogram #########################################

html.Div(
[
html.Div(
[
dcc.Graph(id='map-graph',
animate=True,
style={'margin-top': '20'})
], className = "six columns"
),
html.Div(
[
dt.DataTable(
rows=map_data.to_dict('records'),
columns=map_data.columns,
row_selectable=True,
filterable=True,
sortable=True,
selected_row_indices=[],
id='datatable'),
],
style=layout_right,
className="six columns"
),
html.Div(
[
dcc.Graph(id="histogram")
],className="twelve columns")
], className="row"
)
], className='ten columns offset-by-one')
def update_selected_row_indices(governorate, date, cases, deaths):

map_aux = map_data.copy()
# Governorates filter #########################################

map_aux = map_aux[map_aux['Governorate'].isin(governorate)]
# Cases filter #########################################

if cases == 'Low':
map_aux = map_aux[map_aux['Cases'] <= 3000]
if cases == 'Medium':
map_aux = map_aux[(map_aux['Cases'] > 3000) & \
(map_aux['Cases'] <= 10000 )]
if cases == 'High':
map_aux = map_aux[(map_aux['Cases'] >= 10000) & \
(map_aux['Cases'] <= 15000)]
if cases == 'Very High':
map_aux = map_aux[map_aux['Cases'] > 20000]
rows = map_aux.to_dict('records')
return rows
#Callbacks #########################################
@app.callback(
Output('datatable', 'selected_row_indices'),
[Input('histogram', 'selectedData')],
[State('datatable', 'selected_row_indices')])
def update_selected_row_indices(selectedData, selected_row_indices):
if selectedData:
selected_row_indices = []
for point in selectedData['points']:
selected_row_indices.append(point['pointNumber'])
return selected_row_indices
@app.callback(
Output('histogram', 'figure'),
[Input('datatable', 'rows'),
Input('datatable', 'selected_row_indices')])
def update_figure(rows, selected_row_indices):
dff = pd.DataFrame(rows)
layout = go.Layout(
bargap=0.5,
bargroupgap=0,
barmode='group',
margin=Margin(l=50, r=10, t=0, b=100),
showlegend=False,
height=250,
dragmode="select",
xaxis=dict(
showgrid=False,
nticks=50,
fixedrange=False
),
yaxis=dict(
showticklabels=True,
showgrid=False,
fixedrange=False,
rangemode='nonnegative',
)
)
# Bar Graph set up #########################################

data = Data([
go.Bar(
x=dff['Governorate'],
y=dff['Cases'],
marker=dict(
color="white",
line=dict(
color="black",
width=1.5,
)
)
),
])
return go.Figure(data=data, layout=layout)
@app.callback(
Output('map-graph', 'figure'),
[Input('datatable', 'rows'),
Input('datatable', 'selected_row_indices')])
def map_selection(rows, selected_row_indices):
aux = pd.DataFrame(rows)
temp_df = aux.ix[selected_row_indices, :]
if len(selected_row_indices) == 0:
return gen_map(aux)
return gen_map(temp_df)
if __name__ == '__main__':
app.run_server(debug=True)
Applying Tufte’s and Kosslyn’s Principles

One of Tufte’s principals that I tried to follow was having only the required amount of ink necessary to
show my data story. Another key principal of his that I followed was data integrity. My data shows the
truth based off the data from the set that was collected. The map shows you the locations and the
severity while my bar graph that corresponds with the map and data table shows the quantity of how
many people in Yemen have been affected by Cholera. My visualization also lacks chart junk and is
proportionate to the numbers that are supposed to be represented. Although I don’t use his classic
visualization of small multiples, I do allow the view to compare different values in the dataset to one
another. For example, you could search in the filter for a specific date, number of cases, etc.
Going along with Kosslyn’s principals, I believe that the principal of relevance wouldn’t have been
followed if I had decided to show more than just the cases. Originally, I wanted to show deaths along
with air strikes but there was simply too much data for an individual to look at and fully understand.
This would have gone against the principle of capacity limitations which is when a visualization shows
too much information and doesn’t allow the viewer to process it all. Since cases appeared to be very
significant, not that deaths aren’t, I wanted to show the change over the period of ten months. This can
be clearly seen in the bar graph which follows Kosslyn’s principles of salience and discriminability.
Originally, I had the bars on my graph colored with multiple colors showing an increase. I realized that
this would most likely distract the viewer and possibly confuse them so I eliminated them and didn’t
violate Kosslyn’s principle of perceptual organization.
Development Process
Struggles and Victories
When I first started this project, things seemed to be going smoothly because I was able to get the
example dash up and running pretty quickly. I also learned how to make my own graphs by loading
data into Dash. However, when I began playing around with making a map is when my first big road
block occurred. I knew I wanted my dash to have a map and a bar graph to represent my data. I had a
hard time with the map but decided to restart and I managed to finally get a map working. I think it was
due to my app.py file being bogged down with different code. However, when I moved onto callbacks,
I was having a majority of my struggles here. I could get the layout down but not the callbacks.
Eventually, I found a one hour video by Am Yoshino and it explained everything in great detail and
was very useful. That is where I was inspired to add a data table he used in another examples and to
add the logo of the organization where the data I’m using came from. His video did an excellent job at
explaining callbacks.
As for the victories while doing this project, I noticed that dividing up the work into steps made it much
easier. The first step is to get something to work on Dash. From there, move on to adding more
features. Finally, set up the callbacks to make the visualization interactive. Although, it was a very long
process getting the data to look the way I wanted it to because I had to create the dataset from scratch. I
had to look up the different governorates and their longitude and latitudes because there were many
changes in Yemen due to recent events. Some governorates that existed before are no longer there and
some are brand new.
Lessons Learned
I would say the number one lesson learned from this project is to make sure I don’t bog my project
down with different chunks of code and to not restart every time it becomes to bogged down. I found
myself confused when I had numerous app.py files and was unable to get any of them to work. I
definitely needed to have more patience with creating the app. When I did have patience, everything
worked out really well. When I took it one step at a time, I saw much better results in regards to the
layout of the visualization. Before I begin coding next time, I am going to make sure I spend more time
on thinking about how I want to present the data. Going into my Dash, I had no initial idea or plan of
how I wanted it to look besides having a few graphs. This made it very difficult for me to code because
I had no idea which code I should be typing.
Shown above is exactly what you shouldn’t do when trying to create your app. Maybe if you have one
other app to test out certain bits of code without ruining what you already have is fine but don’t make
eight different apps.
Recommendations
I would recommend to anyone to write down a plan first before starting this project. What data do you
want to present? How do you want to present your data? What layout do you want? I personally learn
the best from watching tutorials and videos because I am able to go back immediately and dissect it one
part at a time. I found a video that really helped me and would recommend that others find the method
they learn from best and just look at that type of material for a while before starting. If you come across
an error that you can’t seem to figure out, I definitely recommend commenting out most of your code
and run it section by section to find where the error is. I came across some situations where my
terminal didn’t tell me where or what the error was. By commenting out my code, I was able narrow
down the place of where my error was located.
The app.py file above isn’t complete, but it was something that I managed to get working. I found it
much easier to start small and work my way up.
More Time
If I had more time, I would figure out how to define the borders of the governorates in Yemen so that
they are easier to see. I would also analyze each month instead of primarily focusing on the most recent
month and the first month recorded. I would like to compare different seasons to each other as well to
see if it slowed down as temperatures dropped or if more rainfall occurred. Only certain areas would be
affected by these traits however.
Findings
I found that Southern Yemen did have a slightly higher percentage of cases. However, I thought it
would be a more significant amount. 47 % of the total number of cases were above the latitude line I
have selected while 53% were below. Over these ten months, there have been an additional 998,000
new cases. 3.8 % of the total population in Yemen is infected with cholera compared to the 0.2 % that
was infected in May. The total infected above the line is 503,334 and below is 556,442 people.

Data Visualization Final

Загружено:

Сведения о документе

Оригинальное название

Авторское право

Доступные форматы

Поделиться этим документом

Поделиться или встроить документ

Параметры публикации

Этот документ был вам полезен?

Это неприемлемый материал?

Авторское право:

Доступные форматы

Data Visualization Final

Загружено:

Авторское право:

Доступные форматы

Question

How to View Dash

# API keys and datasets #########################################

# Bootsrap CSS #########################################

#Map layout and set up #########################################

#This helps with my colorscale

# Creating layouts for data table #########################################

# Components style #########################################

#Map and point updates #########################################

# Map + table + Histogram #########################################

def update_selected_row_indices(governorate, date, cases, deaths):

# Governorates filter #########################################

# Cases filter #########################################

# Bar Graph set up #########################################

Applying Tufte’s and Kosslyn’s Principles

Вам также может понравиться