Commit cfc15cb

bryfry <bryon.fryer@gmail.com>
2013-05-26 18:23:25
round1 - p5
1 parent 11202b1
round1.py
@@ -6,27 +6,25 @@ import random
 from math import sqrt
 import simple_crypto as sc# helper functions
 
+stl = 20  # str_tips length
+do_4 = False
+
 # 1. Convert hex to base64 and back.
 print "\n---- Problem 1: Convert hex to base64 and back ----"
 p1_hex = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d"
 p1_b64 = "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t"
-print "Given (hex) :", p1_hex
-print "Given (b64) :", p1_b64
-
 p1_hex_dec = p1_hex.decode ('hex')
-print "hex_decoded :", p1_hex_dec
-
 p1_b64_enc = base64.b64encode (p1_hex_dec)
-print "b64_encoded :", p1_b64_enc
-
 p1_b64_dec = base64.b64decode (p1_b64_enc)
-print "b64_decoded :", p1_b64_dec
-
 p1_hex_enc = p1_b64_dec.encode ('hex')
-print "hex_encoded :", p1_hex_enc
-
+print "hex (G) :", sc.str_tips (p1_hex, stl) 
+print "b64 (G) :", sc.str_tips (p1_b64, stl)
+print "hex_dec :", p1_hex_dec
+print "b64_enc :", sc.str_tips (p1_b64_enc, stl)
+print "b64_dec :", p1_b64_dec
+print "hex_enc :", sc.str_tips (p1_hex_enc, stl)
 assert p1_b64_enc == p1_b64, 'Hex -> B64 failed'
-assert p1_b64_dec == p1_hex_dec, 'Decoding mismatch (decoded b64 != decoded hex)'
+assert p1_b64_dec == p1_hex_dec, 'Decoding mismatch (dec b64 != dec hex)'
 assert p1_hex_enc == p1_hex, 'b64 -> hex failed'
 
 
@@ -38,12 +36,11 @@ p2_r = "746865206b696420646f6e277420706c6179"
 p2_a_dec = p2_a.decode ('hex')
 p2_b_dec = p2_b.decode ('hex')
 p2_r_dec = p2_r.decode ('hex')
-print "a decoded : ", p2_a_dec
-print "b decoded : ", p2_b_dec
-print "given a^b : ", p2_r_dec
-
 p2_o_dec =  sc.xor_string(p2_a_dec, p2_b_dec)
-print "      a^b : ", p2_o_dec
+print "a_dec   : ", p2_a_dec
+print "b_dec   : ", p2_b_dec
+print "a^b (G) : ", p2_r_dec
+print "a^b     : ", p2_o_dec
 assert p2_o_dec.encode ('hex') == p2_r, 'a^b failed'
 
 
@@ -51,26 +48,62 @@ assert p2_o_dec.encode ('hex') == p2_r, 'a^b failed'
 print "\n---- Problem 3: Single-character XOR Cipher ----"
 p3_hex = "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736"
 p3_hex_dec = p3_hex.decode ('hex')
-key = sc.ngram_freq_cos_sim (p3_hex_dec)
-print "xor key    : ", key
+f = sc.freq_array_load()
+key, value = sc.ngram_freq_cos_sim (p3_hex_dec, f)
 p3_xor_dec = sc.xor_char (p3_hex_dec, key)
-print "p3_xor_dec : ", p3_xor_dec
 p3_xor_enc = sc.xor_char (p3_xor_dec, key)
 p3_hex_enc = p3_xor_enc.encode ('hex')
-text_filter = sc.translator (keep=string.ascii_letters)
-assert p3_hex == p3_hex_enc, 'Decryption failed'
+print "p3 dec : ", key
+print "p3 dec : ", p3_xor_dec
+assert p3_hex == p3_hex_enc, 'dec/enc failed'
 
 # 4. Detect single-character XOR
-print "\n---- Problem 4: Detect single-character XOR ----"
-f = open('gistfile1.txt')
-lines = f.readlines()
-f.close()
-'''
-p4_xor_dec = [] 
-for a in lines:
-    tmp = a.strip().decode('hex')
-    p4_xor_dec.append (sc.xor_char (a.strip(), sc.freq_decode_char_xor (a.strip())).lower())
+if do_4:
+    print "\n---- Problem 4: Detect single-character XOR ----"
+    lines = [line.strip() for line in open('gistfile1.txt')]
+    best = (0,'',"")
+    f = sc.freq_array_load()
+    for x in lines:
+        x_raw = x.decode('hex')
+        key, value = sc.ngram_freq_cos_sim (x_raw, f)
+        if value > best[0]:
+            best = (value, key, sc.xor_str(x_raw, key))
+    print "p4 key : ", best[1]
+    print "p4 dec : ", best[2].strip()
+
+
+# 5. Repeating-key XOR Cipher
+print "\n---- Problem 5: Repeating-key XOR Cipher ----"
+p5_str = "Burning 'em, if you ain't quick and nimble\nI go crazy when I hear a cymbal"
+p5_key = "ICE"
+p5_enc_hex = "0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f"
+p5_enc = p5_enc_hex.decode('hex')
+p5_str_enc = sc.xor_str(p5_str,p5_key)
+p5_str_enc_hex = sc.xor_str(p5_str,p5_key).encode('hex')
+p5_dec = sc.xor_str(p5_enc, p5_key)
+print "p5 str     (G) :", sc.str_tips (p5_str, stl)
+print "p5 enc hex (G) :", sc.str_tips (p5_enc_hex, stl)
+print "p5 str enc hex :", sc.str_tips (p5_str_enc_hex, stl)
+print "p5 dec         :", sc.str_tips (p5_dec, stl)
+assert p5_enc == p5_str_enc, "p5 encryption failed"
+assert p5_dec == p5_str, "p5 decryption failed"
 
-print sc.most_english(p4_xor_dec)
-'''
 
+# 5b. Encrypt a bunch of stuff using your repeating-key XOR function
+keys = 10
+sa = [] # string array
+sa.append ("Please! This is supposed to be a happy occasion.")
+sa.append ("Let's not bicker and argue over who killed who.")
+sa.append ("Go and boil your bottoms, you sons of silly persons!")
+sa.append ("Listen, strange women lyin' in ponds distributin' swords is no basis for a system of government.")
+sa.append ("Supreme executive power derives from a mandate from the masses, not from some farcical aquatic ceremony.")
+for s in sa:
+    for k in range(keys):
+        key_len = random.randint (3,30)
+        key = ''.join (random.choice (string.ascii_letters) for r in range (key_len))
+        s_e = sc.xor_str (s, key)
+        s_ed = sc.xor_str (s_e, key)
+        s_ede = sc.xor_str (s_ed, key)
+        assert s_ed == s, "p5b enc/dec failed (ed/s)"
+        assert s_ede == s_e, "p5b enc/dec failed (ede/e)"
+print "Repeating-key XOR tested on", len(sa), "strings, with", len(sa)*keys, "unique keys"
simple_crypto.py
@@ -62,10 +62,8 @@ def dict_cosine_sim (a, b):
     if a_sq_sum == 0.0 or b_sq_sum == 0.0:
         return 0
     return (ab_sum / (sqrt (a_sq_sum) * sqrt (b_sq_sum)))
- 
-def ngram_freq_cos_sim (s):
-    # returns the most likely char used to 'encrypt' the string s
 
+def freq_array_load():
     # prepare the 'known' relative frequencies of letters in English
     # from http://www.cryptograms.org/letter-frequencies.php
     # http://www3.nd.edu/~busiforc/handouts/cryptography/Letter%20Frequencies.html  
@@ -79,7 +77,47 @@ def ngram_freq_cos_sim (s):
     trigram_csv = csv.DictReader (open ('trigram_freq.csv', 'rb'), delimiter=',')    
     quadgram_csv = csv.DictReader (open ('quadgram_freq.csv', 'rb'), delimiter=',')    
     sum_cos_sim = {}
-#    sum_cos_sim = collections.defaultdict (list)
+    
+    for line in freq_csv:
+        eng_freq[line['Letter']] = line['Frequency']
+    for line in bigram_csv:
+        bigram_freq[line['Bigram'].lower()] = line['Frequency']
+    for line in trigram_csv:
+        trigram_freq[line['Trigram'].lower()] = line['Frequency']
+    for line in quadgram_csv:
+        quadgram_freq[line['Quadgram'].lower()] = line['Frequency']
+    return [(1,eng_freq), (2,bigram_freq), (3,trigram_freq), (4,quadgram_freq)] 
+
+
+def ngram_freq_cos_sim (s, freq_array):
+    # returns (key,value) where key is likley to decrypt string s
+    # value is the sum'd cosine similarity values w.r.t. the given freq_array
+
+    sum_cos_sim = {}
+    # remove non-ascii letters
+    text_filter = translator (keep=string.ascii_letters)
+    for char in string.printable:
+        sum_cos_sim[char] = 0
+        t = text_filter (xor_str (s, char)).lower()
+        for n,f in freq_array:
+            sum_cos_sim[char] += dict_cosine_sim (ngram_freq(n, t, f), f)
+           
+    key = max (sum_cos_sim, key=sum_cos_sim.get) 
+    value = sum_cos_sim[key]
+    return (key, value)
+           
+            
+def old_ngram_freq_cos_sim (s):
+    eng_freq = {}
+    bigram_freq = {}
+    trigram_freq = {}
+    quadgram_freq = {}
+    freq_csv = csv.DictReader (open ('letter_freq.csv', 'rb'), delimiter=',')    
+    bigram_csv = csv.DictReader (open ('bigram_freq.csv', 'rb'), delimiter=',')    
+    trigram_csv = csv.DictReader (open ('trigram_freq.csv', 'rb'), delimiter=',')    
+    quadgram_csv = csv.DictReader (open ('quadgram_freq.csv', 'rb'), delimiter=',')    
+    sum_cos_sim = {}
+    
     for line in freq_csv:
         eng_freq[line['Letter']] = line['Frequency']
     for line in bigram_csv:
@@ -119,9 +157,7 @@ def ngram_freq_cos_sim (s):
     '''
     key = max (sum_cos_sim, key=sum_cos_sim.get) 
     value = sum_cos_sim[key]
-    #print (key, value)
     return (key, value)
-    #return max (sum_cos_sim, key=sum_cos_sim.get) 
 
 def ngram_freq (n, s, freq_dict):
     count = collections.Counter()
@@ -142,3 +178,10 @@ def test_xor (s, k):
     assert encrypt == encrypt_again
     assert decrypt == s
     return True
+
+def str_tips (s, n):
+    if n > len(s)/2:
+        return s
+    else:
+        return s[:n] + " ... " + s[-n:]
+