%reset -f
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import helpers as h
import numpy as np
import scipy as sp
import json
import os
_requiredPaths = ['tab', 'tex', 'img']
for p in _requiredPaths:
if not os.path.exists(p):
os.mkdir(p)
joined = pd.read_csv('cleaned.csv',index_col=0)
pd.options.display.max_columns=400
pd.options.display.max_colwidth= 100
with open("descriptions.json", 'r') as f:
descriptions = json.load(f)
joined['TimeInterview'].plot(kind="hist");
import re
def descriptiveValueCounts(series, caption = "", reference = "", header=[], isFloat=True, save=True, folder="tab",
reorder = []):
t = h.Table("lc", "lc")
t.isFloat = isFloat
if caption:
t.setCaption(caption)
if reference:
t.reference = reference
if header:
t.setHeader(header)
vc = series.value_counts()
for k, v in vc.iteritems():
t.addRow([k,"$%d$" %v])
if reorder:
t.rows = [t.rows[i] for i in reorder]
if save:
t.writeLatexToFile(path=folder)
return t
descriptiveValueCounts(joined.DemoEducation, caption = "Educational Demogprahics", reference="demoEducation",
header=["Highest Qualification", "Count"])
mf = h.figuresToLaTeX(columns=1,basename='demoAge',path='',
caption='Histogram of our participant\'s age')
a = plt.figure(figsize=(8,5), dpi=80)
ax = joined.DemoAge.plot(kind="hist",bins=np.arange(10,80,5))
plt.setp(ax.patches, 'facecolor', '0.3','edgecolor', '0.15', 'alpha', 0.75)
locs, labels = plt.xticks()
plt.xticks(locs,[r'$%g$' %x for x in locs],size='large')
locs, labels = plt.yticks()
plt.yticks(locs,[r'$%g$' %x for x in locs],size='large')
plt.xlabel('Age')
plt.ylabel('Frequency')
h.rstyle(ax)
mf.addFigure(a,describeText=h.describe(joined.DemoAge.values))
print h.describe(joined.DemoAge.values)
mf.writeLaTeX()
descriptiveValueCounts(joined.DemoEmployment,caption="Employment Demogpraphics of the participants",
reference="demoEmployment", header=["Employment Status", "Count"])
descriptiveValueCounts(joined.DemoGender, caption="Gender of the participants", reference="demoGender",
header= ["Gender", "Count"])
descriptiveValueCounts(joined.DemoLearningDiff, caption="Learning difficulty or disability of the participants",
reference="demoDisability", header=["Answer", "Count"])
mf = h.figuresToLaTeX(columns=1,basename='numberCards',path='',
caption='Participant\'s number of payment card with PINs')
a = plt.figure(figsize=(6,3), dpi=80)
ax = joined.NumberCards.plot(kind="hist",bins=np.arange(0,11,1))
plt.setp(ax.patches, 'facecolor', '0.3','edgecolor', '0.15', 'alpha', 0.75)
locs, labels = plt.xticks()
plt.xticks(locs,[r'$%g$' %x for x in locs],size='large')
locs, labels = plt.yticks()
plt.yticks(locs,[r'$%g$' %x for x in locs],size='large')
plt.xlabel('Number of Payment Cards')
plt.ylabel('Frequency')
h.rstyle(ax)
mf.addFigure(a,describeText=h.describe(joined.NumberCards.values))
print h.describe(joined.NumberCards.values)
mf.writeLaTeX()
t = h.Table("l" + "c"*11, "l|" + "c"*10 + "|c")
t.setHeader([""] + range(10) +["mean"])
t.isFloat = True
t.setCaption("Distribution of Participant's PINs")
t.reference = "numPINs"
j = 4
for st in [joined.NumberPINs4.value_counts(), joined.NumberPINs5.value_counts(), joined.NumberPINs6.value_counts()]:
ds = [st[x] if x in st else 0 for x in range(10)]
t.addRow(["%d digits" %j] + ["$%d$" %d for d in ds] + ["$%.2f$" %(1.0*sum(i*ds[i] for i in range(10))/241)])
j += 1
t.writeLatexToFile(path="tab")
t
sp.stats.ttest_rel(joined.NumberPINs4.values,joined.NumberCards.values)
joined.NumberCards.value_counts()
descriptiveValueCounts(joined.FreqCash, caption="Frequency of cash withdrawal",
reference="freqCash", header=["Freuency", "Count"], reorder = [4, 1, 0, 2, 3, 5, 6])
#Pin use frequencies:
import itertools
keys = joined["FreqP4#1"].value_counts().keys()
keys = [keys[i] for i in [2, 0, 1, 3, 4, 6 ,5]]
t = h.Table(["l"]+["c"]*(9+3+4), ["l|cccccccc|c|cc|c|ccc|c"])
t.header = [""] + ["#%d" %(i+1) for i in range(8)] + ["Sum"] + ["#%d" %(i+1) for i in range(2)] + \
["Sum"] + ["#%d" %(i+1) for i in range(3)] + ["Sum"]
cols4 = [joined["FreqP4#%d" %(i+1)].value_counts() for i in range(8)]
cols5 = [joined["FreqP5#%d" %(i+1)].value_counts() for i in range(2)]
cols6 = [joined["FreqP6#%d" %(i+1)].value_counts() for i in range(3)]
for k in keys:
c4 = [col[k] if k in col else 0 for col in cols4]
c4 += [sum(c4)]
c5 = [col[k] if k in col else 0 for col in cols5]
c5 += [sum(c5)]
c6 = [col[k] if k in col else 0 for col in cols6]
c6 += [sum(c6)]
t.addRow([k] + ["$%d$" %x for x in itertools.chain(c4, c5, c6)])
t.setCaption("Frequency of usage of all PINs of the participants")
t.latexComment = r"&\multicolumn{9}{c|}{4-digit PINs}&\multicolumn{3}{c|}{5-digit PINs}&\multicolumn{4}{c}{6-digit PINs}\\"
t.reference = "freqPINs"
t.writeLatexToFile(path="tab")
t
keys = joined["OrigP4#1"].value_counts().keys()
keys = [keys[i] for i in [1, 0, 2]]
t = h.Table(["l"]+["c"]*(9+3+4), ["p{2.8cm}|cccccccc|c|cc|c|ccc|c"])
t.header = [""] + ["#%d" %(i+1) for i in range(8)] + ["Sum"] + ["#%d" %(i+1) for i in range(2)] + \
["Sum"] + ["#%d" %(i+1) for i in range(3)] + ["Sum"]
cols4 = [joined["OrigP4#%d" %(i+1)].value_counts() for i in range(8)]
cols5 = [joined["OrigP5#%d" %(i+1)].value_counts() for i in range(2)]
cols6 = [joined["OrigP6#%d" %(i+1)].value_counts() for i in range(3)]
t.latexComment = r"&\multicolumn{9}{c|}{4-digit PINs}&\multicolumn{3}{c|}{5-digit PINs}&\multicolumn{4}{c}{6-digit PINs}\\"
for k in keys:
c4 = [col[k] if k in col else 0 for col in cols4]
c4 += [sum(c4)]
c5 = [col[k] if k in col else 0 for col in cols5]
c5 += [sum(c5)]
c6 = [col[k] if k in col else 0 for col in cols6]
c6 += [sum(c6)]
t.addRow([k] + ["$%d$" %x for x in itertools.chain(c4, c5, c6)])
t.setCaption("Source of PINs of the participants")
t.reference = "originPINs"
t.writeLatexToFile(path="tab")
t
keys = [u'Card', u'Slip', u'Desk', u'Wallet', u'Diary', u'File', u'Phone']
base = 'PinWritten'
t = h.Table(["l"]+["c"]*(4), ["l|ccc|c"])
t.header = [""] + ["%d-digit" %(i+4) for i in range(3)] + ["Sum"]
t.latexComment = "Two people mentioned that they store their Pins in lastpass"
for k in keys:
d = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['Yes'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'Yes' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
dNo = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['No'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'No' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
desc = descriptions['%s#%d%s' %(base,5,k)].split("[")[1][:-1]
t.addRow([desc] + ["$%.0f\%%$" %(100.0*y/(y+n) if y else 0.0) for y,n in zip(d, dNo)] + \
["$%.0f\%%$" %(100.0*sum(d)/(sum(d)+sum(dNo) if d else d))])
writtenCounts = [joined["%s#%d" %(base,i+4)].value_counts() for i in range(3)]
subs = [y for x in writtenCounts for y in [x['Yes'] if 'Yes' in x else 0, 0 if 'Yes' not in x else
100.0*(1.0*x['Yes'] / (x['Yes']+(x['No'] if 'No' in x else 0)))]]
t.setCaption("Location of written down PINs by participants. %d (%.1f%%), %d (%.1f%%), %d (%.1f%%) " %tuple(subs) + \
"wrote down their 4-, 5-, 6-digit PINs respectively.")
t.reference = "pinsWritten"
t.writeLatexToFile(path="tab")
t
keys = ["WithdrawCash", "ReUse", "ReUseOther"]
t = h.Table(["l"]+["c"]*(3), ["p{9cm}|ccc"])
t.header = [""] + ["%d-digit" %(i+4) for i in range(3)]# + ["Sum"]
for k in keys:
#d = [joined['%s#%d' %(k,i+4)].value_counts()['Yes'] if ('%s#%d' %(k,i+4) in newColumnNames
# and 'Yes' in joined['%s#%d' %(k,i+4)].value_counts()) else 0 for i in range(3)]
writtenCounts = [joined["%s#%d" %(k,i+4)].value_counts() for i in range(3)]
subs = [0 if 'Yes' not in x else 100.0*(1.0*x['Yes'] / (x['Yes']+(x['No'] if 'No' in x else 0))) for x in writtenCounts]
desc = descriptions['%s#%d' %(k,4)]
t.addRow([desc] + ["$%.0f%s$" %(x,"\%") for x in subs])# + ["$%d$" %sum(d)])
t.setCaption("Participants card sharing statistics")
t.reference = "withdrawCash"
t.writeLatexToFile(path="tab")
t
mf = h.figuresToLaTeX(columns=1,basename='reUseNumber',path='',
caption='Frequency of reuse of PINs, 16% of participants reuse at least one of their PINs.')
a = plt.figure(figsize=(8,5), dpi=80)
ax = joined["ReUse#4Num"].plot(kind="hist",bins=np.arange(0,11,1))
plt.setp(ax.patches, 'facecolor', '0.3','edgecolor', '0.15', 'alpha', 0.75)
locs, labels = plt.xticks()
plt.xticks(locs,[r'$%g$' %x for x in locs],size='large')
locs, labels = plt.yticks()
plt.yticks(locs,[r'$%g$' %x for x in locs],size='large')
plt.xlabel('Number of Payment Cards per PIN')
plt.ylabel('Frequency')
h.rstyle(ax)
mf.addFigure(a,describeText=h.describe(joined["ReUse#4Num"].dropna().values))
print h.describe(joined["ReUse#4Num"].dropna().values)
mf.writeLaTeX()
keys = [u'PhoneUnlock', u'Burglar', u'Voicemail', u'SIMUnlock', u'Computer']
base = 'ReUseOther'
t = h.Table(["l"]+["c"]*(4), ["l|ccc|c"])
t.header = [""] + ["%d-digit" %(i+4) for i in range(3)] + ["Sum"]
t.latexComment = "comments for 4 digits only: Online banking : 14, kindle: 1"
for k in keys:
d = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['Yes'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'Yes' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
dNo = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['No'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'No' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
desc = descriptions['%s#%d%s' %(base,5,k)].split("[")[1][:-1]
t.addRow([desc] + ["$%.0f\%%$" %(100.0*y/(y+n) if y else 0.0) for y,n in zip(d, dNo)] + \
["$%.0f\%%$" %(100.0*sum(d)/(sum(d)+sum(dNo) if d else d))])
writtenCounts = [joined["ReUseOther#%d" %(i+4)].value_counts() for i in range(3)]
subs = [y for x in writtenCounts for y in [x['Yes'] if 'Yes' in x else 0, 0 if 'Yes' not in x else
100.0*(1.0*x['Yes'] / (x['Yes']+(x['No'] if 'No' in x else 0)))]]
t.setCaption("Location of reuse of PINs by participants. %d (%.1f%%), %d (%.1f%%), %d (%.1f%%) " %tuple(subs) + \
"reused their 4-, 5-, 6-digit PINs, respectively.")
t.reference = "pinReUseLocations"
t.writeLatexToFile(path="tab")
t
keys = [u'Stranger', u'Family', u'Flatmate', u'Spouse', u'Casual', u'Friend']
base = 'Shared'
t = h.Table(["l"]+["c"]*(4), ["l|ccc|c"])
t.header = [""] + ["%d-digit" %(i+4) for i in range(3)] + ["Sum"]
t.latexComment = "No Comments/other"
for k in keys:
d = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['Yes'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'Yes' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
dNo = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['No'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'No' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
desc = descriptions['%s#%d%s' %(base,5,k)].split("[")[1][:-1]
t.addRow([desc] + ["$%.0f\%%$" %(100.0*y/(y+n) if y else 0.0) for y,n in zip(d, dNo)] + \
["$%.0f\%%$" %(100.0*sum(d)/(sum(d)+sum(dNo) if d else d))])
writtenCounts = [joined["%s#%d" %(base,i+4)].value_counts() for i in range(3)]
subs = [y for x in writtenCounts for y in [x['Yes'] if 'Yes' in x else 0, 0 if 'Yes' not in x else
100.0*(1.0*x['Yes'] / (x['Yes']+(x['No'] if 'No' in x else 0)))]]
t.setCaption("Sharing of PINs by participants. %d (%.1f%%), %d (%.1f%%), %d (%.1f%%) " %tuple(subs) + \
"shared their 4-, 5-, 6-digit PINs respectively.")
t.reference = "pinSharing"
t.writeLatexToFile(path="tab")
t
keys = [u'Myself', u'Issued', u'ReqRem', u'Bank']
base = 'Forgot'
t = h.Table(["l"]+["c"]*(4), ["p{8cm}|ccc|c"])
t.header = [""] + ["%d-digit" %(i+4) for i in range(3)] + ["Sum"]
t.latexComment = "6 people stopped using their payment card"
for k in keys:
d = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['Yes'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'Yes' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
dNo = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['No'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'No' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
desc = descriptions['%s#%d%s' %(base,5,k)].split("[")[1][:-1]
t.addRow([desc] + ["$%.0f\%%$" %(100.0*y/(y+n) if y else 0.0) for y,n in zip(d, dNo)] + \
["$%.0f\%%$" %(100.0*sum(d)/(sum(d)+sum(dNo) if d else d))])
writtenCounts = [joined["%s#%d" %(base,i+4)].value_counts() for i in range(3)]
subs = [y for x in writtenCounts for y in [x['Yes'] if 'Yes' in x else 0, 0 if 'Yes' not in x else
100.0*(1.0*x['Yes'] / (x['Yes']+(x['No'] if 'No' in x else 0)))]]
t.setCaption("Forgetting of PINs by participants. %d (%.1f%%), %d (%.1f%%), %d (%.1f%%) " %tuple(subs) + \
"forgot their 4-, 5-, 6-digit PINs respectively. Additionally, 6 participants stated that they stopped using" +\
" the payment card after forgetting the PIN.")
t.reference = "pinForgot"
t.writeLatexToFile(path="tab")
t
keys = [u'Number', u'Shape', u'Date', u'Change']
base = 'Pattern'
t = h.Table(["l"]+["c"]*(4), ["l|ccc|c"])
t.header = [""] + ["%d-digit" %(i+4) for i in range(3)] + ["Sum"]
t.latexComment = "Additionally, 6 participants stated that they associated numbers with letters and spell out a word."
for k in keys:
d = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['Yes'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'Yes' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
dNo = [joined['%s#%d%s' %(base, i+4,k)].value_counts()['No'] if ('%s#%d%s' %(base,i+4,k) in descriptions.keys()
and 'No' in joined['%s#%d%s' %(base,i+4,k)].value_counts()) else 0 for i in range(3)]
desc = descriptions['%s#%d%s' %(base,5,k)].split("[")[1][:-1]
t.addRow([desc] + ["$%.0f\%%$" %(100.0*y/(y+n) if y else 0.0) for y,n in zip(d, dNo)] + \
["$%.0f\%%$" %(100.0*sum(d)/(sum(d)+sum(dNo) if d else d))])
writtenCounts = [joined["%s#%d" %(base,i+4)].value_counts() for i in range(3)]
subs = [y for x in writtenCounts for y in [x['Yes'] if 'Yes' in x else 0, 0 if 'Yes' not in x else
100.0*(1.0*x['Yes'] / (x['Yes']+(x['No'] if 'No' in x else 0)))]]
t.setCaption("Use of patterns to remember PINs. %d (%.1f%%), %d (%.1f%%), %d (%.1f%%) " %tuple(subs) + \
"use patterns to remember their 4-, 5-, 6-digit PINs respectively.")
t.reference = "pinPattern"
t.writeLatexToFile(path="tab")
t
descriptiveValueCounts(joined.Longest,caption="The age of participant's oldest PIN",reference="pinAge",header=["Age", "Freq"],
reorder=[4,3,1,2,0])
keys = [u'ChangedAsked', u'ChangedWant', u'ChangedCompromised', u'ChangedRegular']
base = ''
t = h.Table(["l"]+["c"]*(1), ["l|c"])
t.header = [""] + ["Percentage"]
t.latexComment = ""
totalChanged = sum(joined[k].value_counts()['Yes'] for k in keys)
for k in keys:
d = [joined['%s%s' %(base, k)].value_counts()['Yes'] if ('%s%s' %(base,k) in descriptions.keys()
and 'Yes' in joined['%s%s' %(base,k)].value_counts()) else 0]
desc = descriptions['%s%s' %(base,k)].split("[")[1][:-1]
t.addRow([desc] + ["$%.0f\%%$" %(100.0*y/totalChanged if y else 0.0) for y in d])
t.setCaption("Participant's reasons for changing their most used PIN")
t.reference = "pinChanged"
t.writeLatexToFile(path="tab")
t
joined.ChangedAsked.value_counts()
mf = h.figuresToLaTeX(columns=1,basename='numberChanges',path='',
caption='Participant\'s number of PIN changes')
a = plt.figure(figsize=(8,5), dpi=80)
ax = joined.ChangedNum.plot(kind="hist",bins=np.arange(0,9,1))
plt.setp(ax.patches, 'facecolor', '0.3','edgecolor', '0.15', 'alpha', 0.75)
locs, labels = plt.xticks()
plt.xticks(locs,[r'$%g$' %x for x in locs],size='large')
locs, labels = plt.yticks()
plt.yticks(locs,[r'$%g$' %x for x in locs],size='large')
plt.xlabel('Number of PIN changes')
plt.ylabel('Frequency')
h.rstyle(ax)
mf.addFigure(a,describeText=h.describe(joined.ChangedNum.values))
print h.describe(joined.ChangedNum.values)
mf.writeLaTeX()