Skip to content

Commit

Permalink
predicted-outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
ericciarla committed Nov 5, 2024
1 parent 75b48dc commit 71e512b
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ apps/js-sdk/firecrawl/dist
/examples/claude_web_crawler/firecrawl_env
/examples/haiku_web_crawler/firecrawl_env
/examples/sonnet_web_crawler/firecrawl_env
/examples/internal_link_assitant/firecrawl_env
94 changes: 94 additions & 0 deletions examples/internal_link_assitant/internal_link_assitant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import os
import json
from firecrawl import FirecrawlApp
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables
load_dotenv()

# Retrieve API keys from environment variables
firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")

# Initialize the FirecrawlApp and set OpenAI API key
app = FirecrawlApp(api_key=firecrawl_api_key)
client = OpenAI(api_key=openai_api_key)

def main():
# Get user input
blog_url = input("Enter the blog URL: ")

if not blog_url.strip():
blog_url = "https://www.firecrawl.dev/blog/how-to-use-openai-o1-reasoning-models-in-applications"

# Scrape the blog content
print("Scraping the blog content...")
blog_scrape_result = app.scrape_url(blog_url, params={'formats': ['markdown']})

# Get the blog content in markdown format
blog_content = blog_scrape_result.get('markdown', '')

# Turn the blog URL into a top-level domain
top_level_domain = '/'.join(blog_url.split('/')[:3])

# Map the website to get all links
print("Mapping the website to get all links...")
site_map = app.map_url(top_level_domain)

# Get the list of URLs from the site map
site_links = site_map.get('links', [])


prompt = f"""
You are an AI assistant helping to improve a blog post.
Here is the original blog post content:
{blog_content}
Here is a list of other pages on the website:
{json.dumps(site_links, indent=2)}
Please revise the blog post to include internal links to some of these pages where appropriate. Make sure the internal links are relevant and enhance the content.
Only return the revised blog post in markdown format.
"""

import re

# Function to count links in a markdown content
def count_links(markdown_content):
return len(re.findall(r'\[.*?\]\(.*?\)', markdown_content))

# Use OpenAI API to get the revised blog post
print("Generating the revised blog post with internal links...")
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": prompt
}
],
prediction={
"type": "content",
"content": blog_content
}
);

revised_blog_post = completion.choices[0].message.content

# Count links in the original and revised blog post
original_links_count = count_links(blog_content)
revised_links_count = count_links(revised_blog_post)

# Output a portion of the revised blog post and link counts
print("\nRevised blog post (first 500 characters):")
print(revised_blog_post[:500])
print(f"\nNumber of links in the original blog post: {original_links_count}")
print(f"Number of links in the revised blog post: {revised_links_count}")

if __name__ == "__main__":
main()

0 comments on commit 71e512b

Please sign in to comment.