Skip to content

Commit

Permalink
all files
Browse files Browse the repository at this point in the history
  • Loading branch information
yasharkor committed Nov 2, 2020
0 parents commit 3045da8
Show file tree
Hide file tree
Showing 1,784 changed files with 30,905 additions and 0 deletions.
361 changes: 361 additions & 0 deletions Assignment1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,361 @@
from os import listdir
from os.path import isfile, join
import re
import csv

# Path of .txt data directory
mypath='./data/dev/'
# getting the names of all files in the directory
file_name_list = [f for f in listdir(mypath) if isfile(join(mypath, f))]


# remove matches that are copy or smaller
def remove_matches(temp_list):
# take the span element of each math an put it in tuples
tuples=[x[4] for x in temp_list]
indexes=set()
for j, big in enumerate(tuples):
for i, small in enumerate(tuples):
if(small[0]>=big[0] and small[1]<=big[1] and i!=j):
indexes.add(i)
for index in sorted(list(indexes), reverse=True):
del temp_list[index]
return temp_list




##---------------------------------------------------------##
# defining a list for all regesx pattern
regex_list=[]
# defining a list for the type of date expression
expr_list=[]
# defining a list for the output
out_list=[['article_id','expr_type','value','char_offset','temp']]

##---------------------------------------------------------##
##---------------------------------------------------------##

# Set of all months
months=set([
'January','February','March','April','May','June','July','August',
'September','October','November','December',
'january', 'february', 'march', 'april', 'june', 'july',
'august', 'september', 'october', 'november', 'december'
])
# creating OR statement between various months
months = '|'.join(months)

# Set of all days
days=set(['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'])
# creating OR statement between days
days='|'.join(days)

seasons=set(['Fall','Winter','Summer','Spring','Autumn'])
seasons='|'.join(seasons)

numbers=set(['one','two','three','four','five','six','seven','eigth','nine','ten','1st','2nd','3rd'])
numbers='|'.join(numbers)

interval=set(['day','Day','week','Week','Month','month','season','Season','year','Year','hour','minute','Hour','Minute','noon','today','tomorrow','yesterday'])
interval='|'.join(interval)

deictic1=set(['next','prior','last','before','after'])
deictic1='|'.join(deictic1)

deictic2=set(['ago','later'])
deictic2='|'.join(deictic2)

##---------------------------------------------------------##
##---------------------Regex Patterns----------------------##
##---------------------------------------------------------##

##----------------------Pattern 1--------------------------##

# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
\w+
\s
((?: {}))[s]? # interval set
\s? # space
(?:{}) # deictic1 set
\s? # interval set
((?: {}))[s]?
)
""".format(interval,deictic1,interval), re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('time-deictic1-time')
##----------------------Pattern 2--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:{}) # Numbers set
\s? # space
((?: {}))[s]? # interval set
\s? # space
(?:{})) # deictic2 set
""".format(numbers,interval,deictic2), re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('numbers-time-deictic')

##----------------------Pattern 3--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:half)? # optional half to match words like half an hour
\s? # optional space
\w+ # one word before interval vocabularies
\s? # optional space
(?:{}))[s,.]? # interval set
""".format(interval), re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('interval')
##----------------------Pattern 4--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:\b{}[s,]?\b) # deictic1 set
\s? # space
(?: {}) # days set
)
""".format(deictic1,days), re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('deictic1')
##----------------------Pattern 5--------------------------##

# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:next)?
\s?
(?:{}) # Any weekdays
)
""".format(days), re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('weekday')
##----------------------Pattern 6--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:[01]?[0-9]|2[0-3]) # up to 0-23 h
:
[0-5][0-9] # up to 00-59 min
(?::[0-5][0-9])? # up to 59 sec optional
)
""",re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('24hr time')
##----------------------Pattern 7--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:19|20)\d\d # 1900-2099 year
[\ .-/]
(?: 0?[1-9]|1[012]) # 0-12 M
[\ .-/]
(?: 0?[1-9]|[12]\d|3[01]) # 0-9 | 10-19, 20-29 | 30,31
)
""",re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('YYYY/M/D')
##----------------------Pattern 8--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:0?[1-9]|1[012])? #optional 0-12 M
[\. -/]
(?:0?[1-9]|[12]\d|3[01])? #optional 0-9 | 10-19, 20-29 | 30,31
[\. -/]
(?:(?:19|20)\d\d) # 1900-2099 year
)
""",re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('M/D/YYYY or MM/DD/YYYY')
##----------------------Pattern 9--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:0?[1-9]|[12]\d|3[01])? # 0-9 | 10-19, 20-29 | 30,31
[\. -/]
(0?[1-9]|1[012])? # optional 0-12 M
[\. -/]
(?:(?:19|20)\d\d) # 1900-2099 year
)
""",re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('D/M/YYYY or DD/MM/YYYY')
##----------------------Pattern 10--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?:[12][0-9]{{3}}s?)? # 1900-2099 year
\s?
(?:{})
\s?
(?:[12][0-9]{{3}}s?)? # 1900-2099 year
)
""".format(seasons),re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('Season')
##----------------------Pattern 11--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
immediately
)
""",re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('immediate time')
##----------------------Pattern 12--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
\w+ # 1 word before years
\s* # space
years?[, .]? # years, year, year, year.
)
""",re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('number of years')
##----------------------Pattern 13--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?: {}) # Months
\s* # space
\d{{0,2}} # Day 0-99
\s* # space
(?:[12][0-9]{{3}}s?) # Year 1000-2999
)
""".format(months),re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('month dd yyyy')
##----------------------Pattern 14--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
\d{{0,2}} # Day 0-99
\s* # space
(?: {}) # Months
\s* # space
(?:[12][0-9]{{3}}s?) # Year 1000-2999
)
""".format(months),re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('dd months yyyy')
##----------------------Pattern 15--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
(
(?: late|early|mid)? # optional late or early
[\s-]? # optional space
[12] # 1 or 2
[0-9]{3} # any 3 digit number, find years between 1000-2999
s* # optional s
)
""",re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('year')

##----------------------Pattern 16--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
({}) # months
""".format(months), re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('month')
##----------------------Pattern 17--------------------------##
# Appending regext pattern to regex_list
regex_list.append(
re.compile("""
((?:{})-(?:{})) # Any combination of months-months
""".format(months,months), re.VERBOSE)
)
# Appending type of date expression to expr_list
expr_list.append('month-month')

##---------------------------------------------------------##
##-------Read, and find patterns and save into list -------##
##---------------------------------------------------------##


# For loop over all .txt files
for file_name in file_name_list:
# opening one .txt file
with open (join(mypath,file_name)) as text_file:
# Reading the .txt file
text=text_file.read()
# text=text.replace('\n', ' ')
temp_list=[]
# print(text)
# For loop over all regext patters in regex_list and enumerating them to assign the associated expr_list
for i, pattern in enumerate(regex_list):
# For loop over all matches found by findall
for m in pattern.finditer(text):
# Appending to the list (file name, expression type, value, char_offset)
# print(file_name,m.group(),m.span())
temp_list.append([file_name,expr_list[i],m.group(),m.start(),m.span()])
# remove matches that are copy or smaller
remove_matches(temp_list)
# Append the list of matched find in a specific .txt file to the out_list
out_list.extend(temp_list)

# seleting the first 4 elements of each out put = dropping .span() informtion
out_list=list(map(lambda x:x[0:4],out_list))



##-------------------------------------------------------------
# seleting the first 4 elements of each out put = dropping .span() informtion


# out_list=list(map(lambda x:x[0:3],out_list))

##-----------------save into CSV file-------------------------##
# Writing the output(out_list) in to a csv file(out.csv)
with open("out.csv", "w", newline="") as csv_file:
writer = csv.writer(csv_file)
writer.writerows(out_list)


print('Successful')
print(len(out_list))
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Intro to NLP - Assignment 1

## README file

1. Yashar Kor (CCID: yashar)
2. Libraries including os, re, and csv should be installed
3. To run the program only run Assignmnet1.py file

4.The default data directory is mypath='./data/dev/' if you want to test it on another test data please specify the directory path and modify "mypath"


## Data

The assignment's development data can be found inside [data/dev](data/dev).
7 changes: 7 additions & 0 deletions data/dev/1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
India-Pakistan peace boosts trade

Calmer relations between India and Pakistan are paying economic dividends, with new figures showing bilateral trade up threefold in the summer.

The value of trade in April-July rose to $186.3m (£97m) from $64.4m in the same period in 2003, the Indian Government said. Nonethless, the figures represent less than 1% of India's overall exports. But business is expected to be boosted further from 2006 when the South Asian Free Trade Area Agreement starts. Both countries eased travel and other restrictions as part of the peace process aimed at ending nearly six decades of hostilities.

Sugar, plastics, pharmaceutical products and tea are among the major exports from India to its neighbour, while firms in Pakistani have been selling fabrics, fruit and spices. "If the positive trend continues, two-way trade could well cross half a billion dollars this fiscal year," India's federal commerce Minister Kamal Nath said. According to official data, the value of India's overall exports in the current fiscal year is expected to reach more than $60bn, while in Pakistan's case it is set to hit more than $12bn. Meanwhile, the Indian Government said the prospects for the country's booming economy remained "very bright" despite a "temporary aberration" this year. Its mid-year economic review forecasts growth of 6-6.5% in 2004, compared with 8.2% in 2003. Higher oil prices, the level of tax collections, and an unfavourable monsoon season affecting the farm sector had hurt the economy in April-September, it said.
7 changes: 7 additions & 0 deletions data/dev/10.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
McDonald's boss Bell dies aged 44

Charlie Bell, the straight-talking former head of fast-food giant McDonald's, has died of cancer aged 44.

Mr Bell was diagnosed with colorectal cancer in May last year, a month after taking over the top job. He resigned in November to fight the illness. Joining the company as a 15-year-old part-time worker, Mr Bell quickly moved through its ranks, becoming Australia's youngest store manager at 19. A popular go-getter, he is credited with helping revive McDonald's sales. Mr Bell leaves a wife and daughter. "As we mourn his passing, I ask you to keep Charlie's family in your hearts and prayers," chief executive James Skinner said in a statement. "And remember that in his abbreviated time on this earth, Charlie lived life to the fullest." "No matter what cards life dealt, Charlie stayed centred on his love for his family and for McDonald's."

After running the company's Australian business in the 1990s, Mr Bell moved to the US in 1999 to run operations in Asia, Africa and the Middle East. In 2001, he took over the reins in Europe, McDonald's second most important market. He became chief operating officer and president in 2002. Mr Bell took over as chief executive after his predecessor as CEO, Jim Cantalupo, died suddenly of a heart attack in April. Having worked closely with Mr Cantalupo, who came out of retirement to turn McDonald's around, Mr Bell focused on boosting demand at existing restaurants rather than follow a policy of rapid expansion. He had promised not to let the company get "fat, dumb and happy," and, according to Reuters, once told analysts that he would shove a fire hose down the throat of competitors if he saw them drowning. Mr Bell oversaw McDonald's "I'm lovin' it" advertising campaign and introduced successes such as McCafe, now the biggest coffee shop brand in Australia and New Zealand. Colleagues said that Mr Bell was proud of his humble beginnings, helping out behind cash tills and clearing tables when visiting restaurants.
Loading

0 comments on commit 3045da8

Please sign in to comment.