Commit 11202b1

bryon <bryon.fryer@gmail.com>
2013-05-26 15:26:55
update
1 parent b4453cd
Changed files (1)
simple_crypto.py
@@ -30,6 +30,7 @@ def xor_string (a_string,b_string):
 
 def xor_char (s, c):
     # xor each character of the string s with the char c
+    # mostly obsolete, use xor_str for everything and it works fine
     xor_string = ''
     for a in s:
         xor_string += chr (ord (a) ^ ord (c))
@@ -37,15 +38,12 @@ def xor_char (s, c):
 
 def xor_str (s, k):
     # xor each character of the string s with each char in k (repeating of course)
-    xor_string = ''
+    out = ''
     index = 0
-    for a in s:
-        # xor loop over key k
-        xor_string = xor_string.join(chr(ord(a) ^ ord(k[index])))
+    for char in s:
+        out = out + (chr (ord(char) ^ ord(k[index])))
         index = (index + 1) % len(k)
-        
-    xor_string = text_filter (xor_string)
-    return xor_string.lower()
+    return out
 
 def dict_euclidian_dist (a, b):
     e_sum = 0
@@ -80,7 +78,8 @@ def ngram_freq_cos_sim (s):
     bigram_csv = csv.DictReader (open ('bigram_freq.csv', 'rb'), delimiter=',')    
     trigram_csv = csv.DictReader (open ('trigram_freq.csv', 'rb'), delimiter=',')    
     quadgram_csv = csv.DictReader (open ('quadgram_freq.csv', 'rb'), delimiter=',')    
-    sum_cos_sim = collections.defaultdict (list)
+    sum_cos_sim = {}
+#    sum_cos_sim = collections.defaultdict (list)
     for line in freq_csv:
         eng_freq[line['Letter']] = line['Frequency']
     for line in bigram_csv:
@@ -94,7 +93,7 @@ def ngram_freq_cos_sim (s):
     text_filter = translator (keep=string.ascii_letters)
     
     # generate 'decrypted' text using each lowercase letters as the key
-    for char in string.ascii_lowercase:
+    for char in string.printable:
         sum_cos_sim[char] = 0
         char_dec = text_filter (xor_char (s, char)).lower()
         
@@ -114,12 +113,15 @@ def ngram_freq_cos_sim (s):
         quadgram_dec_freq = ngram_freq(4, char_dec, quadgram_freq)
         sum_cos_sim[char] += dict_cosine_sim (quadgram_dec_freq, quadgram_freq)
 
-    '''
+    '''#
     for t in sorted(sum_cos_sim, key=sum_cos_sim.get):
         print t, sum_cos_sim[t]
     '''
-
-    return max (sum_cos_sim, key=sum_cos_sim.get)
+    key = max (sum_cos_sim, key=sum_cos_sim.get) 
+    value = sum_cos_sim[key]
+    #print (key, value)
+    return (key, value)
+    #return max (sum_cos_sim, key=sum_cos_sim.get) 
 
 def ngram_freq (n, s, freq_dict):
     count = collections.Counter()
@@ -127,5 +129,16 @@ def ngram_freq (n, s, freq_dict):
     for i in range(0,len(s)-(n-1)):
         count[s[i:len(s)-(len(s)-(n+i))]] += 1
     for key in freq_dict:
-        freq[key] = count.get(key,0.0)/float(len(s)-(n-1))
+        if (len(s)-(n-1)==0):
+            freq[key] = 0.0
+        else:
+            freq[key] = count.get(key,0.0)/float(len(s)-(n-1))
     return freq
+
+def test_xor (s, k):
+    encrypt = xor_str (s , k)
+    decrypt = xor_str (encrypt, k)
+    encrypt_again = xor_str (decrypt, k)
+    assert encrypt == encrypt_again
+    assert decrypt == s
+    return True