Skip to content

Commit

Permalink
Merge branch 'main' of github.com:HTTPArchive/almanac.httparchive.org…
Browse files Browse the repository at this point in the history
… into production
  • Loading branch information
tunetheweb committed Dec 2, 2024
2 parents f79361b + 91885c1 commit a58c704
Show file tree
Hide file tree
Showing 96 changed files with 1,498 additions and 250 deletions.
30 changes: 12 additions & 18 deletions sql/2024/seo/robots-txt-size-2024.sql
Original file line number Diff line number Diff line change
@@ -1,37 +1,31 @@
#standardSQL
# Robots.txt size
CREATE TEMPORARY FUNCTION getRobotsSize(payload STRING)
RETURNS FLOAT64 LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var robots = JSON.parse($._robots_txt);
return robots['size']/1024;
} catch (e) {
return 0;
}
''';

SELECT
client,
COUNT(DISTINCT(site)) AS sites,
SAFE_DIVIDE(COUNTIF(robots_size = 0), COUNT(DISTINCT(site))) AS pct_0,
SAFE_DIVIDE(COUNTIF(robots_size > 0 AND robots_size <= 100), COUNT(DISTINCT(site))) AS pct_0_100,
SAFE_DIVIDE(COUNTIF(robots_size > 100 AND robots_size <= 200), COUNT(DISTINCT(site))) AS pct_100_200,
SAFE_DIVIDE(COUNTIF(robots_size > 200 AND robots_size <= 300), COUNT(DISTINCT(site))) AS pct_200_300,
SAFE_DIVIDE(COUNTIF(robots_size > 300 AND robots_size <= 400), COUNT(DISTINCT(site))) AS pct_300_400,
SAFE_DIVIDE(COUNTIF(robots_size > 400 AND robots_size <= 500), COUNT(DISTINCT(site))) AS pct_400_500,
SAFE_DIVIDE(COUNTIF(robots_size > 500), COUNT(DISTINCT(site))) AS pct_gt500,
SAFE_DIVIDE(COUNTIF(robots_size = 0), COUNT(DISTINCT(site))) AS pct_missing,
SAFE_DIVIDE(COUNTIF(robots_size IS NULL), COUNT(DISTINCT(site))) AS pct_missing,
COUNTIF(robots_size > 500) AS count_gt500,
COUNTIF(robots_size = 0) AS count_missing
COUNTIF(robots_size IS NULL) AS count_missing
FROM (
SELECT
client,
page AS site,
getRobotsSize(payload) AS robots_size
root_page AS site,
custom_metrics.robots_txt,
FLOAT64(custom_metrics.robots_txt.size_kib) AS robots_size
FROM
`httparchive.all.pages`
WHERE date = '2024-06-01'
) -- noqa: L062
`httparchive.crawl.pages`
WHERE
date = '2024-06-01' AND
is_root_page AND -- no need to crawl inner pages for this one
custom_metrics.robots_txt.status IS NOT NULL
)
GROUP BY
client
ORDER BY
Expand Down
3 changes: 1 addition & 2 deletions src/config/2024.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@
"part": "II",
"chapter_number": "9",
"title": "SEO",
"slug": "seo",
"todo": true
"slug": "seo"
},
{
"part": "II",
Expand Down
61 changes: 59 additions & 2 deletions src/config/contributors.json
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,10 @@
},
"dwsmart": {
"avatar_url": "11179452",
"bluesky": "tamethebots.com",
"github": "dwsmart",
"linkedin": "davewsmart",
"mastodon": "https://seocommunity.social/@dwsmart",
"name": "Dave Smart",
"teams": {
"2020": [
Expand All @@ -1203,9 +1206,11 @@
"2022": [
"authors",
"reviewers"
],
"2024": [
"authors"
]
},
"twitter": "davewsmart",
"website": "https://tamethebots.com"
},
"dsottimano": {
Expand Down Expand Up @@ -2044,7 +2049,9 @@
},
"fellowhuman1101": {
"avatar_url": "52051775",
"bluesky": "not-a-robot.com",
"github": "fellowhuman1101",
"linkedin": "jamie-indigo",
"name": "Jamie Indigo",
"teams": {
"2020": [
Expand All @@ -2056,6 +2063,10 @@
],
"2022": [
"authors"
],
"2024": [
"authors",
"leads"
]
},
"twitter": "Jammer_Volts",
Expand Down Expand Up @@ -3023,13 +3034,22 @@
},
"MichaelLewittes": {
"avatar_url": "96250205",
"bluesky": "michaellewittes.bsky.social",
"github": "MichaelLewittes",
"linkedin": "michael-lewittes-a22b831",
"mastodon": "https://seocommunity.social/@MichaelLewittes",
"name": "Michael Lewittes",
"teams": {
"2022": [
"editors"
],
"2024": [
"authors",
"editors"
]
}
},
"twitter": "MichaelLewittes",
"website": "https://www.ranktify.com/team"
},
"MichaelSolati": {
"avatar_url": "11811422",
Expand Down Expand Up @@ -4871,5 +4891,42 @@
"reviewers"
]
}
},
"mikaelaraujo": {
"avatar_url": "4764075",
"bluesky": "mikaelaraujo.bsky.social",
"github": "mikaelaraujo",
"linkedin": "mikael-araujo",
"name": "Mikael Araújo",
"teams": {
"2024": [
"authors"
]
},
"threads": "@mikaelaraujo",
"twitter": "miknaraujo",
"website": "https://www.mikaelaraujo.com"
},
"henryp25": {
"avatar_url": "62102954",
"github": "henryp25",
"linkedin": "henry-price-9ab362b4",
"name": "Henry Price",
"teams": {
"2024": [
"analysts"
]
}
},
"cnichols013" :{
"avatar_url": "73146375",
"github": "cnichols013",
"linkedin": "chris-nichols",
"name": "Chris Nichols",
"teams": {
"2024": [
"analysts"
]
}
}
}
Loading

0 comments on commit a58c704

Please sign in to comment.