Merge pull request #10 from corbett/master

test data generation code and file, README updated for #3
tomrittervg · Jul 29, 2014 · c07e991 · c07e991
2 parents e32639d + 9814103
commit c07e991
Show file tree

Hide file tree

Showing 5 changed files with 588 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -14,9 +14,9 @@ We have a bajillion things we'd like to run a usability study on, but to start s
 We suggest the following types of fingerprint types.  There are certainly more, but the more we try to test the bigger we're making it.
 
  1. Hexadecimal digits ala PGP fingerprints: 8ACD 146E A94C EB12 E4EA  6915 66A1 0918 9B79 658F
- 2. Pseudowords (For example: djijeh - isoy - dacif - qipc - buyowa)
- 3. English Words (For example: bridge - late - sister - plane - brush - error - cup - soup - organization - great - quality - offer - dead)
- 4. English poems (See [example implementation](https://github.com/akwizgran/basic-english) and [example comparison](https://moderncrypto.org/mail-archive/messaging/2014/000125.html)
+ 2. English Words (For example: bridge - late - sister - plane - brush - error - cup - soup - organization - great - quality - offer - dead)
+ 3. English poems (See [example implementation](https://github.com/akwizgran/basic-english) and [example comparison](https://moderncrypto.org/mail-archive/messaging/2014/000125.html) 
+ 4. Pseudowords (For example: djijeh - isoy - dacif - qipc - buyowa)
  5. Visual Fingerprints (Using OpenSSH's [visual host keys](http://www.kcbug.org/?p=18))
 
 ## Comparison Mechanisms
@@ -64,6 +64,42 @@ To create the computationally chosen flaw, we will take a target fingerprint and
 
 To avoid having every subject see an exact distribution of match and non-match for each type, each subject will get a randomly selected set of tests. The goal is that over N subjects, we will get a statistically valid and even distribution of trials for each category.
 
+## Test generation
+###Demo
+ 1. `cd pseudoword_testdata; make; cd ..`
+ 2. `python demoTestData.py`
+ Generates
+ ```
+ #1. Hexadecimal digits ala PGP fingerprints
+ 9F7B726D789BEB58D3E2FD79131C92AC
+ 397B02CD789BE55033E2BD7B121F52AC
+ #2. English Words
+ living - plate - receipt - limit - rat - organization - toe - rub - road - before - attraction - light - scissors
+ living - discovery - receipt - limit - church - organization - cart - exchange - road - surprise - attraction - light - scissors
+
+ #3. English poems
+ his dead system rests widely from her true map
+ this flat power decides on the tight walk with his shelf
+
+
+ her rough system sits widely from her round map
+ this clear pull wins across your smooth walk to our shelf
+
+ #4. Pseudowords (For example: djijeh - isoy - dacif - qipc - buyowa)
+ toswoc - ivuf - nayan - pem2 - atakg
+ 5esiku - ivug - aa5an - pewh - ataog
+ ```
+
+###Generation
+ `genTestData.py` has code to generate tests for N pairs of participants. An example with N=15 
+  `python ./genTestData.py 15 > testData.csv &`
+
+`testData.csv` then has, one per line
+ `#pair   fingerprint     comparison      error   Alice   Bob     judgement`
+ with judgement yet to be filled out. Pair is a pair id, fingerprint is one of the fingerprint types (currently just supporting 4), comparison is Phone or Business card, Error rate is `small mismatch` (fingerprints are either exact or have a small mismatch with 50/50 odds) or `large mismatch` (fingerprints are either exact or stastically randomly different with 50/50 ods). For each combination of these variabes, 2x trials are generated for a total of 32 per testing pair of participants.
+
+
+
 ## Rejected Ideas
 
 ### The 'Head Fake Approach

diff --git a/demoTestData.py b/demoTestData.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+from hexadecimal_testdata.hexdata import genData as hexData
+from englishword_testdata.gen_basic import genData as englishWordData
+from englishpoems_testdata.gendata import genData as englishPoemData
+from pseudoword_testdata.keyname_script import genData as pseudoWordData
+
+print "#1. Hexadecimal digits ala PGP fingerprints"
+for f in hexData():
+    print f
+print "#2. English Words"
+for f in englishWordData():
+    print f
+print "#3. English poems"
+for f in englishPoemData():
+    print f
+print "#4. Pseudowords"
+for f in pseudoWordData():
+    print f
diff --git a/genTestData.py b/genTestData.py
@@ -1,15 +1,56 @@
 #!/usr/bin/env python
 
+from random import choice
+import sys
+
 from hexadecimal_testdata.hexdata import genData as hexData
 from englishword_testdata.gen_basic import genData as englishWordData
 from englishpoems_testdata.gendata import genData as englishPoemData
 from pseudoword_testdata.keyname_script import genData as pseudoWordData
 
-for f in hexData():
-    print f
-for f in englishWordData():
-    print f
-for f in englishPoemData():
-    print f
-for f in pseudoWordData():
-    print f
+"""
+4 Fingerprint Types (TODO: what happened to 5th?)
+2 Comparison Mechanisms
+2 Error Rates
+x 2 Outcomes (Match or Not-Match)
+---
+32 Test Cases
+ 
+Run: ./genTestData.py <testerpairs> > testData.csv
+"""
+
+
+def farFingerprintsToCompare(genData) :
+    perfectMatch,almostMatch = genData()
+    noMatch,noMatch =  genData()
+    return perfectMatch,choice([perfectMatch,noMatch])
+
+def closeFingerprintsToCompare(genData):
+    perfectMatch,almostMatch = genData()
+    noMatch,noMatch =  genData()
+    return perfectMatch,choice([perfectMatch,almostMatch])
+
+def normalize(s):
+    return s.strip().replace('\n','<br>')
+
+
+fingerprints = {'hex':hexData,'english word':englishWordData,'english poem':englishPoemData,'pseudo word':pseudoWordData}
+comparisons = ['Business card','Phone']
+errors = {'large mismatch':farFingerprintsToCompare,'small mismatch':closeFingerprintsToCompare}
+outcomes = ['match','not match']
+
+def genTestData(testerpairs):
+    print '#pair\tfingerprint\tcomparison\terror\tAlice\tBob\tjudgement'
+    for tid in  range(testerpairs):
+        for mech in comparisons:
+            for errorDesc,errorFunc in errors.items():
+                for fingerprintDesc,fingerprintFunc  in fingerprints.items():
+                    for possibleOutcome in outcomes:
+                        alicePrint,bobPrint = errorFunc(fingerprintFunc)
+                        print '\t'.join([str(tid),fingerprintDesc,mech,errorDesc,normalize(alicePrint),normalize(bobPrint)])
+
+
+if __name__ == '__main__':
+    genTestData(int(sys.argv[1]))
+
+
diff --git a/hexadecimal_testdata/hexdata.py b/hexadecimal_testdata/hexdata.py