-
-
Notifications
You must be signed in to change notification settings - Fork 388
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Community learning discussion draft #2715
base: master
Are you sure you want to change the base?
Changes from all commits
7a8de57
0cef67e
c7e0ed3
d19ab03
e55e0a3
c0c1467
9d537a6
85bfd6b
86432ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -173,6 +173,14 @@ | |
"message": "Learn in Private/Incognito windows", | ||
"description": "Checkbox label on the general settings page" | ||
}, | ||
"options_community_learning_setting": { | ||
"message": "Enable community learning and share data about trackers", | ||
"description": "Checkbox label on the general settings page" | ||
}, | ||
"options_community_learning_warning": { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the link that explains what community learning is (and the decision making that led to it) be included in this message or the one above? |
||
"message": "When you enable community learning, your browser will share some information it collects about trackers with EFF. Specifically, each time your instance of Privacy Badger observes a particular tracker on a website that it has not seen before, it will share the origin (top-level domain +1) of both the tracker and the website, as well as the type of tracking action that it observed. EFF will only use this information for generating community learning lists, and will never share personal information with third parties. For more details, see our privacy policy: https://link.to.come", | ||
"description": "Checkbox label on the general settings page" | ||
}, | ||
"options_incognito_warning": { | ||
"message": "Enabling learning in Private/Incognito windows may leave traces of your private browsing history on your computer. By default, Privacy Badger will block trackers it already knows about in Private/Incognito windows, but it won't learn about new trackers. You might want to enable this option if a lot of your browsing happens in Private/Incognito windows.", | ||
"description": "Tooltip on the general settings page" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,17 @@ var exports = { | |
TRACKING_THRESHOLD: 3, | ||
MAX_COOKIE_ENTROPY: 12, | ||
|
||
// The max amount of time (in milliseconds) that PB will wait before sharing a | ||
// tracking action with EFF for community learning | ||
MAX_CL_WAIT_TIME: 5 * 60 * 1000, // five minutes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call on reducing network load with the reporting timeouts, but I'm curious why 5 minutes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. completely arbitrary! |
||
|
||
// The probability that any given tracking action will be logged to the | ||
// community server, as a float from 0.0 to 1.0 | ||
CL_PROBABILITY: 1.0, | ||
|
||
// size of the in-memory community learning cache | ||
CL_CACHE_SIZE: 5000, | ||
|
||
DNT_POLICY_CHECK_INTERVAL: 1000, // one second | ||
}; | ||
|
||
|
@@ -50,5 +61,12 @@ exports.BLOCKED_ACTIONS = new Set([ | |
exports.USER_COOKIEBLOCK, | ||
]); | ||
|
||
exports.TRACKER_TYPES = Object.freeze({ | ||
COOKIE: "cookie", | ||
COOKIE_SHARE: "cookie_share", | ||
SUPERCOOKIE: "supercookie", | ||
FINGERPRINT: "fingerprint", | ||
}) | ||
|
||
return exports; | ||
})(); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,9 @@ function HeuristicBlocker(pbStorage) { | |
// impossible to attribute to a tab. | ||
this.tabOrigins = {}; | ||
this.tabUrls = {}; | ||
|
||
// in-memory cache for community learning | ||
this.previouslySharedTrackers = new Set(); | ||
} | ||
|
||
HeuristicBlocker.prototype = { | ||
|
@@ -104,8 +107,10 @@ HeuristicBlocker.prototype = { | |
* @param {Boolean} check_for_cookie_share whether to check for cookie sharing | ||
*/ | ||
heuristicBlockingAccounting: function (details, check_for_cookie_share) { | ||
let tab_id = details.tabId; | ||
|
||
// ignore requests that are outside a tabbed window | ||
if (details.tabId < 0 || !badger.isLearningEnabled(details.tabId)) { | ||
if (tab_id < 0 || !badger.isLearningEnabled(tab_id)) { | ||
return {}; | ||
} | ||
|
||
|
@@ -115,12 +120,12 @@ HeuristicBlocker.prototype = { | |
|
||
// if this is a main window request, update tab data and quit | ||
if (details.type == "main_frame") { | ||
self.tabOrigins[details.tabId] = request_origin; | ||
self.tabUrls[details.tabId] = details.url; | ||
self.tabOrigins[tab_id] = request_origin; | ||
self.tabUrls[tab_id] = details.url; | ||
return {}; | ||
} | ||
|
||
let tab_origin = self.tabOrigins[details.tabId]; | ||
let tab_origin = self.tabOrigins[tab_id]; | ||
|
||
// ignore first-party requests | ||
if (!tab_origin || !utils.isThirdPartyDomain(request_origin, tab_origin)) { | ||
|
@@ -141,15 +146,16 @@ HeuristicBlocker.prototype = { | |
|
||
// check if there are tracking cookies | ||
if (hasCookieTracking(details, request_origin)) { | ||
self._recordPrevalence(request_host, request_origin, tab_origin); | ||
self._recordPrevalence(request_host, request_origin, tab_origin, tab_id, | ||
constants.TRACKER_TYPES.COOKIE); | ||
return {}; | ||
} | ||
|
||
// check for cookie sharing iff this is an image in the top-level frame, and the request URL has parameters | ||
if (check_for_cookie_share && details.type == 'image' && details.frameId === 0 && details.url.indexOf('?') > -1) { | ||
if (false && details.type == 'image' && details.frameId === 0 && details.url.indexOf('?') > -1) { | ||
// get all non-HttpOnly cookies for the top-level frame | ||
// and pass those to the cookie-share accounting function | ||
let tab_url = self.tabUrls[details.tabId]; | ||
let tab_url = self.tabUrls[tab_id]; | ||
|
||
let config = { | ||
url: tab_url | ||
|
@@ -161,7 +167,7 @@ HeuristicBlocker.prototype = { | |
chrome.cookies.getAll(config, function (cookies) { | ||
cookies = cookies.filter(cookie => !cookie.httpOnly); | ||
if (cookies.length >= 1) { | ||
self.pixelCookieShareAccounting(tab_url, tab_origin, details.url, request_host, request_origin, cookies); | ||
self.pixelCookieShareAccounting(tab_id, details.url, request_host, request_origin, cookies); | ||
} | ||
}); | ||
} | ||
|
@@ -179,8 +185,10 @@ HeuristicBlocker.prototype = { | |
* @param cookies are the result of chrome.cookies.getAll() | ||
* @returns {*} | ||
*/ | ||
pixelCookieShareAccounting: function (tab_url, tab_origin, request_url, request_host, request_origin, cookies) { | ||
pixelCookieShareAccounting: function (tab_id, request_url, request_host, request_origin, cookies) { | ||
let params = (new URL(request_url)).searchParams, | ||
tab_origin = self.tabOrigins[tab_id], | ||
tab_url = self.tabUrls[tab_id], | ||
TRACKER_ENTROPY_THRESHOLD = 33, | ||
MIN_STR_LEN = 8; | ||
|
||
|
@@ -251,7 +259,10 @@ HeuristicBlocker.prototype = { | |
log("Found high-entropy cookie share from", tab_origin, "to", request_host, | ||
":", entropy, "bits\n cookie:", cookie.name, '=', cookie.value, | ||
"\n arg:", key, "=", value, "\n substring:", s); | ||
this._recordPrevalence(request_host, request_origin, tab_origin); | ||
this._recordPrevalence( | ||
request_host, request_origin, tab_origin, tab_id, | ||
constants.TRACKER_TYPES.COOKIE_SHARE | ||
); | ||
return; | ||
} | ||
} | ||
|
@@ -265,8 +276,11 @@ HeuristicBlocker.prototype = { | |
* @param {String} tracker_fqdn The fully qualified domain name of the tracker | ||
* @param {String} tracker_origin Base domain of the third party tracker | ||
* @param {String} page_origin Base domain of page where tracking occurred | ||
* @param {Integer} tab_id the ID of the tab the user is in | ||
* @param {String} tracker_type the kind of tracking action that was observed | ||
*/ | ||
updateTrackerPrevalence: function (tracker_fqdn, tracker_origin, page_origin) { | ||
updateTrackerPrevalence: function (tracker_fqdn, tracker_origin, page_origin, | ||
tab_id, tracker_type) { | ||
// abort if we already made a decision for this fqdn | ||
let action = this.storage.getAction(tracker_fqdn); | ||
if (action != constants.NO_TRACKING && action != constants.ALLOW) { | ||
|
@@ -276,7 +290,9 @@ HeuristicBlocker.prototype = { | |
this._recordPrevalence( | ||
tracker_fqdn, | ||
tracker_origin, | ||
page_origin | ||
page_origin, | ||
tab_id, | ||
tracker_type | ||
); | ||
}, | ||
|
||
|
@@ -292,8 +308,9 @@ HeuristicBlocker.prototype = { | |
* @param {String} tracker_fqdn The FQDN of the third party tracker | ||
* @param {String} tracker_origin Base domain of the third party tracker | ||
* @param {String} page_origin Base domain of page where tracking occurred | ||
* @param {String} tracker_type the kind of tracking action that was observed | ||
*/ | ||
_recordPrevalence: function (tracker_fqdn, tracker_origin, page_origin) { | ||
_recordPrevalence: function (tracker_fqdn, tracker_origin, page_origin, tab_id, tracker_type) { | ||
var snitchMap = this.storage.getStore('snitch_map'); | ||
var firstParties = []; | ||
if (snitchMap.hasItem(tracker_origin)) { | ||
|
@@ -310,23 +327,78 @@ HeuristicBlocker.prototype = { | |
return; // We already know about the presence of this tracker on the given domain | ||
} | ||
|
||
// record that we've seen this tracker on this domain (in snitch map) | ||
firstParties.push(page_origin); | ||
snitchMap.setItem(tracker_origin, firstParties); | ||
// If community learning is enabled, queue up a request to the EFF server | ||
if (badger.isCommunityLearningEnabled(tab_id)) { | ||
let page_fqdn = (new URI(this.tabUrls[tab_id])).host; | ||
this.shareTrackerInfo(page_fqdn, tracker_fqdn, tracker_type); | ||
} | ||
|
||
// ALLOW indicates this is a tracker still below TRACKING_THRESHOLD | ||
// (vs. NO_TRACKING for resources we haven't seen perform tracking yet). | ||
// see https://github.com/EFForg/privacybadger/pull/1145#discussion_r96676710 | ||
this.storage.setupHeuristicAction(tracker_fqdn, constants.ALLOW); | ||
this.storage.setupHeuristicAction(tracker_origin, constants.ALLOW); | ||
// If local learning is enabled, record that we've seen this tracker on this | ||
// domain (in snitch map) | ||
if (badger.isLocalLearningEnabled(tab_id)) { | ||
firstParties.push(page_origin); | ||
snitchMap.setItem(tracker_origin, firstParties); | ||
|
||
// ALLOW indicates this is a tracker still below TRACKING_THRESHOLD | ||
// (vs. NO_TRACKING for resources we haven't seen perform tracking yet). | ||
// see https://github.com/EFForg/privacybadger/pull/1145#discussion_r96676710 | ||
this.storage.setupHeuristicAction(tracker_fqdn, constants.ALLOW); | ||
this.storage.setupHeuristicAction(tracker_origin, constants.ALLOW); | ||
|
||
// block the origin if it has been seen on multiple first party domains | ||
if (firstParties.length >= constants.TRACKING_THRESHOLD) { | ||
log('blocklisting origin', tracker_fqdn); | ||
this.blocklistOrigin(tracker_origin, tracker_fqdn); | ||
} | ||
} | ||
}, | ||
|
||
// Blocking based on outbound cookies | ||
var httpRequestPrevalence = firstParties.length; | ||
/** | ||
* Share information about a tracker for community learning | ||
*/ | ||
shareTrackerInfo: function(page_host, tracker_host, tracker_type) { | ||
// Share a random sample of trackers we observe | ||
if (Math.random() < constants.CL_PROBABILITY) { | ||
// check if we've shared this tracker recently | ||
// note that this check comes after checking against the snitch map | ||
let tr_str = page_host + '+' + tracker_host + '+' + tracker_type; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding some string sanitization and/or sanity checking the input values in this method wouldn't hurt, since they're about to get launched off in a POST |
||
if (this.previouslySharedTrackers.has(tr_str)) { | ||
return; | ||
} | ||
|
||
// add this entry to the cache | ||
this.previouslySharedTrackers.add(tr_str); | ||
|
||
// if the cache gets too big, cut it in half | ||
if (this.previouslySharedTrackers.size > constants.CL_CACHE_SIZE) { | ||
this.previouslySharedTrackers = new Set( | ||
// An array created from the set will have all of its entries ordered | ||
// by when they were added | ||
Array.from(this.previouslySharedTrackers).slice( | ||
// keep the most recent half of the cache entries | ||
Math.floor(constants.CL_CACHE_SIZE / 2) | ||
) | ||
); | ||
} | ||
|
||
// block the origin if it has been seen on multiple first party domains | ||
if (httpRequestPrevalence >= constants.TRACKING_THRESHOLD) { | ||
log('blocklisting origin', tracker_fqdn); | ||
this.blocklistOrigin(tracker_origin, tracker_fqdn); | ||
// now make the request to the database server | ||
setTimeout(function() { | ||
fetch("http://localhost:8080", { | ||
method: "POST", | ||
body: JSON.stringify({ | ||
tracker_data: { | ||
page_host: page_host, | ||
tracker_host: tracker_host, | ||
tracker_type: tracker_type, | ||
} | ||
}) | ||
}).then(res => { | ||
if (!res.ok) { | ||
console.log("tracking action logging failed:", res); | ||
} | ||
}); | ||
// share info after a random delay, to reduce network load on browser | ||
}, Math.floor(Math.random() * constants.MAX_CL_WAIT_TIME)); | ||
} | ||
} | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think changing this message to "Enable community learning and share data about trackers" would answer some questions that user might have without them having to read the lengthier warning message (which is great and should still be included on top of this)