Skip to content

Commit

Permalink
added duplicate checker
Browse files Browse the repository at this point in the history
  • Loading branch information
hymm committed Feb 22, 2018
1 parent de20196 commit e744a41
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 4 deletions.
29 changes: 26 additions & 3 deletions RankingFunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from re import compile, split
import os
import csv
from fuzzywuzzy import process

from hmc_urllib import getHTML
from RankingSettings import * # Imports the dictionary of tags with names as well as global settings
Expand Down Expand Up @@ -116,9 +117,9 @@ def AddCsv(CsvFilename):
def GetDataCsv(filename, Dict):
"""Opens 'filename' csv, adds all players to 'Dict' along with default values,
adds 'filename' a player appears in to their Dict entry, and returns a list with the results
of each match in 'filename'. Columns in 'filename' should be labeled 'Team 1', 'Team 2', ,
with each match separated by a carriage return (a new line).
TxtFiles: a string; the .txt file to be read.
of each match in 'filename'. Columns in 'filename' should be labeled 'Team 1', 'Team 2',
'Score 1', and 'Score 2'
filename: a string; the .csv file to be read.
Dict: one of the Title dictionaries (recommended to use the TitleDict function)."""
csvFilename = AddCsv(filename)
if 'ResultsFolder' in globals():
Expand Down Expand Up @@ -1137,3 +1138,25 @@ def ProcessFolder(path):
for file in os.listdir(path):
currentFile = os.path.join(path, file)
ProcessRankings([currentFile[0:-4]], 'Melee');

def FuzzyMatch(filename, cutoff=80):
"""Use this to find duplicates."""
# get list of team names
f = open(AddCsv(filename), encoding='utf-8')
next(f, None)
dataDict = csv.DictReader(f)
teamList = [row['Tag'].lower() for row in dataDict]
output = {teamName: process.extractWithoutOrder(teamName, teamList, score_cutoff=cutoff) for teamName in teamList[:-1]}
duplicatesFound = False
for team in output:
matches = []
for match in output[team]:
if (match[0] != team):
matches.append(match)

if len(matches) > 0:
duplicatesFound = True
print(team + ': ' + str(matches))

if (not duplicatesFound):
print('No duplicates found at cutoff=' + str(cutoff))
4 changes: 3 additions & 1 deletion Season2-INT.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@
# 01-02/2014
##ProcessRankings(['Apex 2014'], 'Melee')


print('Start.')
#Week 1
#ProcessRankings(['Tick Tock Tuesdays 9'],'Melee')
ProcessRankings(['2018-01-13_-_Son_Of_A_Squid_5_-_Matchups'],'Splatoon')
ProcessRankings(['2018-01-21_-_SplatChampionship_2018_-winter-_-_Matchups_1'],'Splatoon')

#ShowRankings('Melee')
WriteCSVRankings('Splatoon','Test3')
FuzzyMatch('Test3', cutoff=80)
#PersonRankings('Ink Soup')
print('Done.')

#UsefulFunctions() # Run this to print all the useful functions as well as information about each.
##UsefulFunctionsListed() # Run this to print all the useful functions without the headers or additional information.

0 comments on commit e744a41

Please sign in to comment.