Commit cfc15cb
Changed files (2)
round1.py
@@ -6,27 +6,25 @@ import random
from math import sqrt
import simple_crypto as sc# helper functions
+stl = 20 # str_tips length
+do_4 = False
+
# 1. Convert hex to base64 and back.
print "\n---- Problem 1: Convert hex to base64 and back ----"
p1_hex = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d"
p1_b64 = "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t"
-print "Given (hex) :", p1_hex
-print "Given (b64) :", p1_b64
-
p1_hex_dec = p1_hex.decode ('hex')
-print "hex_decoded :", p1_hex_dec
-
p1_b64_enc = base64.b64encode (p1_hex_dec)
-print "b64_encoded :", p1_b64_enc
-
p1_b64_dec = base64.b64decode (p1_b64_enc)
-print "b64_decoded :", p1_b64_dec
-
p1_hex_enc = p1_b64_dec.encode ('hex')
-print "hex_encoded :", p1_hex_enc
-
+print "hex (G) :", sc.str_tips (p1_hex, stl)
+print "b64 (G) :", sc.str_tips (p1_b64, stl)
+print "hex_dec :", p1_hex_dec
+print "b64_enc :", sc.str_tips (p1_b64_enc, stl)
+print "b64_dec :", p1_b64_dec
+print "hex_enc :", sc.str_tips (p1_hex_enc, stl)
assert p1_b64_enc == p1_b64, 'Hex -> B64 failed'
-assert p1_b64_dec == p1_hex_dec, 'Decoding mismatch (decoded b64 != decoded hex)'
+assert p1_b64_dec == p1_hex_dec, 'Decoding mismatch (dec b64 != dec hex)'
assert p1_hex_enc == p1_hex, 'b64 -> hex failed'
@@ -38,12 +36,11 @@ p2_r = "746865206b696420646f6e277420706c6179"
p2_a_dec = p2_a.decode ('hex')
p2_b_dec = p2_b.decode ('hex')
p2_r_dec = p2_r.decode ('hex')
-print "a decoded : ", p2_a_dec
-print "b decoded : ", p2_b_dec
-print "given a^b : ", p2_r_dec
-
p2_o_dec = sc.xor_string(p2_a_dec, p2_b_dec)
-print " a^b : ", p2_o_dec
+print "a_dec : ", p2_a_dec
+print "b_dec : ", p2_b_dec
+print "a^b (G) : ", p2_r_dec
+print "a^b : ", p2_o_dec
assert p2_o_dec.encode ('hex') == p2_r, 'a^b failed'
@@ -51,26 +48,62 @@ assert p2_o_dec.encode ('hex') == p2_r, 'a^b failed'
print "\n---- Problem 3: Single-character XOR Cipher ----"
p3_hex = "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736"
p3_hex_dec = p3_hex.decode ('hex')
-key = sc.ngram_freq_cos_sim (p3_hex_dec)
-print "xor key : ", key
+f = sc.freq_array_load()
+key, value = sc.ngram_freq_cos_sim (p3_hex_dec, f)
p3_xor_dec = sc.xor_char (p3_hex_dec, key)
-print "p3_xor_dec : ", p3_xor_dec
p3_xor_enc = sc.xor_char (p3_xor_dec, key)
p3_hex_enc = p3_xor_enc.encode ('hex')
-text_filter = sc.translator (keep=string.ascii_letters)
-assert p3_hex == p3_hex_enc, 'Decryption failed'
+print "p3 dec : ", key
+print "p3 dec : ", p3_xor_dec
+assert p3_hex == p3_hex_enc, 'dec/enc failed'
# 4. Detect single-character XOR
-print "\n---- Problem 4: Detect single-character XOR ----"
-f = open('gistfile1.txt')
-lines = f.readlines()
-f.close()
-'''
-p4_xor_dec = []
-for a in lines:
- tmp = a.strip().decode('hex')
- p4_xor_dec.append (sc.xor_char (a.strip(), sc.freq_decode_char_xor (a.strip())).lower())
+if do_4:
+ print "\n---- Problem 4: Detect single-character XOR ----"
+ lines = [line.strip() for line in open('gistfile1.txt')]
+ best = (0,'',"")
+ f = sc.freq_array_load()
+ for x in lines:
+ x_raw = x.decode('hex')
+ key, value = sc.ngram_freq_cos_sim (x_raw, f)
+ if value > best[0]:
+ best = (value, key, sc.xor_str(x_raw, key))
+ print "p4 key : ", best[1]
+ print "p4 dec : ", best[2].strip()
+
+
+# 5. Repeating-key XOR Cipher
+print "\n---- Problem 5: Repeating-key XOR Cipher ----"
+p5_str = "Burning 'em, if you ain't quick and nimble\nI go crazy when I hear a cymbal"
+p5_key = "ICE"
+p5_enc_hex = "0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f"
+p5_enc = p5_enc_hex.decode('hex')
+p5_str_enc = sc.xor_str(p5_str,p5_key)
+p5_str_enc_hex = sc.xor_str(p5_str,p5_key).encode('hex')
+p5_dec = sc.xor_str(p5_enc, p5_key)
+print "p5 str (G) :", sc.str_tips (p5_str, stl)
+print "p5 enc hex (G) :", sc.str_tips (p5_enc_hex, stl)
+print "p5 str enc hex :", sc.str_tips (p5_str_enc_hex, stl)
+print "p5 dec :", sc.str_tips (p5_dec, stl)
+assert p5_enc == p5_str_enc, "p5 encryption failed"
+assert p5_dec == p5_str, "p5 decryption failed"
-print sc.most_english(p4_xor_dec)
-'''
+# 5b. Encrypt a bunch of stuff using your repeating-key XOR function
+keys = 10
+sa = [] # string array
+sa.append ("Please! This is supposed to be a happy occasion.")
+sa.append ("Let's not bicker and argue over who killed who.")
+sa.append ("Go and boil your bottoms, you sons of silly persons!")
+sa.append ("Listen, strange women lyin' in ponds distributin' swords is no basis for a system of government.")
+sa.append ("Supreme executive power derives from a mandate from the masses, not from some farcical aquatic ceremony.")
+for s in sa:
+ for k in range(keys):
+ key_len = random.randint (3,30)
+ key = ''.join (random.choice (string.ascii_letters) for r in range (key_len))
+ s_e = sc.xor_str (s, key)
+ s_ed = sc.xor_str (s_e, key)
+ s_ede = sc.xor_str (s_ed, key)
+ assert s_ed == s, "p5b enc/dec failed (ed/s)"
+ assert s_ede == s_e, "p5b enc/dec failed (ede/e)"
+print "Repeating-key XOR tested on", len(sa), "strings, with", len(sa)*keys, "unique keys"
simple_crypto.py
@@ -62,10 +62,8 @@ def dict_cosine_sim (a, b):
if a_sq_sum == 0.0 or b_sq_sum == 0.0:
return 0
return (ab_sum / (sqrt (a_sq_sum) * sqrt (b_sq_sum)))
-
-def ngram_freq_cos_sim (s):
- # returns the most likely char used to 'encrypt' the string s
+def freq_array_load():
# prepare the 'known' relative frequencies of letters in English
# from http://www.cryptograms.org/letter-frequencies.php
# http://www3.nd.edu/~busiforc/handouts/cryptography/Letter%20Frequencies.html
@@ -79,7 +77,47 @@ def ngram_freq_cos_sim (s):
trigram_csv = csv.DictReader (open ('trigram_freq.csv', 'rb'), delimiter=',')
quadgram_csv = csv.DictReader (open ('quadgram_freq.csv', 'rb'), delimiter=',')
sum_cos_sim = {}
-# sum_cos_sim = collections.defaultdict (list)
+
+ for line in freq_csv:
+ eng_freq[line['Letter']] = line['Frequency']
+ for line in bigram_csv:
+ bigram_freq[line['Bigram'].lower()] = line['Frequency']
+ for line in trigram_csv:
+ trigram_freq[line['Trigram'].lower()] = line['Frequency']
+ for line in quadgram_csv:
+ quadgram_freq[line['Quadgram'].lower()] = line['Frequency']
+ return [(1,eng_freq), (2,bigram_freq), (3,trigram_freq), (4,quadgram_freq)]
+
+
+def ngram_freq_cos_sim (s, freq_array):
+ # returns (key,value) where key is likley to decrypt string s
+ # value is the sum'd cosine similarity values w.r.t. the given freq_array
+
+ sum_cos_sim = {}
+ # remove non-ascii letters
+ text_filter = translator (keep=string.ascii_letters)
+ for char in string.printable:
+ sum_cos_sim[char] = 0
+ t = text_filter (xor_str (s, char)).lower()
+ for n,f in freq_array:
+ sum_cos_sim[char] += dict_cosine_sim (ngram_freq(n, t, f), f)
+
+ key = max (sum_cos_sim, key=sum_cos_sim.get)
+ value = sum_cos_sim[key]
+ return (key, value)
+
+
+def old_ngram_freq_cos_sim (s):
+ eng_freq = {}
+ bigram_freq = {}
+ trigram_freq = {}
+ quadgram_freq = {}
+ freq_csv = csv.DictReader (open ('letter_freq.csv', 'rb'), delimiter=',')
+ bigram_csv = csv.DictReader (open ('bigram_freq.csv', 'rb'), delimiter=',')
+ trigram_csv = csv.DictReader (open ('trigram_freq.csv', 'rb'), delimiter=',')
+ quadgram_csv = csv.DictReader (open ('quadgram_freq.csv', 'rb'), delimiter=',')
+ sum_cos_sim = {}
+
for line in freq_csv:
eng_freq[line['Letter']] = line['Frequency']
for line in bigram_csv:
@@ -119,9 +157,7 @@ def ngram_freq_cos_sim (s):
'''
key = max (sum_cos_sim, key=sum_cos_sim.get)
value = sum_cos_sim[key]
- #print (key, value)
return (key, value)
- #return max (sum_cos_sim, key=sum_cos_sim.get)
def ngram_freq (n, s, freq_dict):
count = collections.Counter()
@@ -142,3 +178,10 @@ def test_xor (s, k):
assert encrypt == encrypt_again
assert decrypt == s
return True
+
+def str_tips (s, n):
+ if n > len(s)/2:
+ return s
+ else:
+ return s[:n] + " ... " + s[-n:]
+