-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlternativeMainScript.R
126 lines (97 loc) · 3.16 KB
/
AlternativeMainScript.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#Libraries and Functions
setwd("E:/Instagram-Crawler")
source("packages.R")
source("CommentExtraction.R")
source("URLExtractor.R")
#
#Set (initial) parameters
hashtagCounter <- 1
sysSleepTimer <- 5
index <- 1
conComment <- ""
conPost <- ""
conTest <- ""
#########################################################################################
############ MENTION THE DATABASE AND THE COLLECTIONS CREATED FOR STORAGE #############
#########################################################################################
db_name <- "admin"
cursor_url_collection <- "test"
post_collection <- "post"
comment_collection <- "comment"
databaseConnections <- function(){
conComment <<- mongo(
collection = comment_collection,
db = db_name,
url = "mongodb://localhost",
verbose = FALSE,
options = ssl_options()
)
conPost <<- mongo(
collection = post_collection,
db = db_name,
url = "mongodb://localhost",
verbose = FALSE,
options = ssl_options()
)
conTest <<- mongo(
collection = cursor_url_collection,
db = db_name,
url = "mongodb://localhost",
verbose = FALSE,
options = ssl_options()
)
}
databaseConnections()
cursorDocuments <- conTest$find(query = "{}")
postDocuments <- as.vector(conComment$find(query = "{}", fields = '{"post_url": 1, "_id":0}'))
postDocumentsCount <- unique(postDocuments)
# Check for duplications in the mongo db collections
testDocuments <- conTest$find(query = "{}")
testDocumentsCount <- unique(testDocuments)
postDocuments <- conPost$find(query = "{}", fields = '{"post_url": 1, "_id":0}')
postDocumentsCount <- unique(postDocuments)
commentDocuments <- conComment$find(query = "{}")
commentDocumentsCount <- unique(commentDocuments)
postCount <- conTest$find(query = "{}")
postCount1 <- unique(postCount$Post_URL)
postDocuments <- conPost$distinct(key = "post_Id",query = "{}")
r <- postDocuments$post_Id
for(row in 1:nrow(cursorDocuments)){
url = cursorDocuments[row,]$Post_URL
print(row)
quer = paste('{"post_url":"',url,'"}',sep = "")
countPost <- conPost$count(query = quer)
if(countPost < 1){
print("NEW URLLLLLLLLLLLLLLLLLLLL")
print("NEW URLLLLLLLLLLLLLLLLLLLL")
print("NEW URLLLLLLLLLLLLLLLLLLLL")
print("NEW URLLLLLLLLLLLLLLLLLLLL")
print("NEW URLLLLLLLLLLLLLLLLLLLL")
print("NEW URLLLLLLLLLLLLLLLLLLLL")
print("NEW URLLLLLLLLLLLLLLLLLLLL")
}else{
print("Already Present")
}
}
#
# quer = paste('{"post_url":"',url,'"}',sep = "")
# countPost <- conPost$count(query = quer)
postDocuments <- conTest$distinct(key = "Post_URL",query = "{}")
url <- "http://instagram.com/p/B_Qd6rWJwfU"
{"post_url":"http://instagram.com/p/B_Qd6rWJwfU"}
quer = paste('{"post_url":"',url,'"}',sep = "")
countPost <- conPost$count(query = quer)
countPost <- conTest$count(query = quer)
countPost <- conTest$run(command = 'count({"post_url":"http://instagram.com/p/B_Qd6rWJwfU"})')
if(countPost > 0){
print("Post Data Already Exisits")
}else{
if(alternativeCode == 1){
conTest$insert(postDataFrame)
Sys.sleep(0.15)
}else{
extractPostCommentData(url,conPost,conComment)
conTest$insert(postDataFrame)
Sys.sleep(0.15)
}
}