-
Notifications
You must be signed in to change notification settings - Fork 0
/
Generalization.py
72 lines (66 loc) · 4.42 KB
/
Generalization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import difflib
from difflib import SequenceMatcher
import itertools
def search(plane):
alliedaircraft = [] #all allied aircraft that could be considered useful
texthandle = open("Aircraft - Allied.txt", 'r') #open file that contains all useful allied aircraft
for line in texthandle: #iterate through text file
alliedaircraft.append(line.strip()) #add the aircraft names to the list
axisaircraft = []
texthandle2 = open("Aircraft - Axis.txt", 'r')
for line in texthandle2:
axisaircraft.append(line.lower())
def cleanup2(plane): #function for cleaning up whitespace and non-alpha-numeric characters
plane = plane.replace("-", "")
plane = plane.replace(" ", "")
plane = plane.lower()
plane = plane.replace("\n", "")
plane = plane.replace("(", "")
plane = plane.replace(")", "")
return plane
similarities = {} #dictionary for holding all the planes and their respective match percentages
plane = cleanup2(plane)
for plane1 in alliedaircraft: #iterate through all allied planes
if plane == cleanup2(plane1): #if they are a perfect match
similarities[plane1] = 100 #set similarity percentage to 100 and break the while loop
break
elif len(plane) <= len(cleanup2(plane)): #if not a perfect match
similarity = difflib.SequenceMatcher(None, plane, cleanup2(plane1)[:len(plane)+1]).ratio() #calculate percent match
similarities[plane1] = similarity * 100 #multiply by 100 for actual percent readings
for plane1 in axisaircraft: #iterate through all allied planes
if plane == cleanup2(plane1): #if they are a perfect match
similarities[plane1] = 100 #set similarity percentage to 100 and break the while loop
break
elif len(plane) <= len(cleanup2(plane)): #if not a perfect match
similarity = difflib.SequenceMatcher(None, plane, cleanup2(plane1)[:len(plane)+1]).ratio() #calculate percent match
similarities[plane1] = similarity * 100
sortedlist = list() #creating empty list to hold sorted users
for thing in similarities.items(): #iterate through the keys and terms of usercount dictionary
sortedlist.append(thing) #add each key,value pair to sortedlist
for i in range(1, len(sortedlist)): #insertion sort algorithm
nextElementValue = sortedlist[i][1]
temp = sortedlist[i]
j = i-1
while j >= 0 and sortedlist[j][1] < nextElementValue:
item = sortedlist[j]
sortedlist[j+1] = item
j = j-1
sortedlist[j+1] = temp
samesims = [] #list to hold all top results with the same similarity
samesims.append(sortedlist[0]) #add the top one
for n in range(1, len(sortedlist)):
if sortedlist[n][1] == samesims[0][1]: #if the current similarity is equal to the similarity of the top result one
samesims.append(sortedlist[n]) #add that plane and its similarity
shortestindex = 0 #algorithm to determine what the plane with the shortest name is
shortest = len(samesims[0][0]) #set the shortest string length to be the first plane's string length
for n in range(1, len(samesims)): #iterate through the next terms of the list of planes with the same similarities
if len(samesims[n][0]) < shortest: #if the present plane has a shorter string length
shortest = len(samesims[n][0]) #make the shortest length to be that length
shortestindex = n #set the index of the shortest string length to be that index
if samesims[shortestindex][1] > 50: #if the match found is reasonably comparable to the request
return samesims[shortestindex][0].replace("\n", "") #return the plane with the highest match
else:
return "No match"
while True:
plane = input("Please enter a request: ")
search(plane)