predicted-outputs

poggiolabs · Nov 5, 2024 · 71e512b · 71e512b
1 parent 75b48dc
commit 71e512b
Show file tree

Hide file tree

Showing 2 changed files with 95 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -32,3 +32,4 @@ apps/js-sdk/firecrawl/dist
 /examples/claude_web_crawler/firecrawl_env
 /examples/haiku_web_crawler/firecrawl_env
 /examples/sonnet_web_crawler/firecrawl_env
+/examples/internal_link_assitant/firecrawl_env
diff --git a/examples/internal_link_assitant/internal_link_assitant.py b/examples/internal_link_assitant/internal_link_assitant.py
@@ -0,0 +1,94 @@
+import os
+import json
+from firecrawl import FirecrawlApp
+from dotenv import load_dotenv
+from openai import OpenAI
+
+# Load environment variables
+load_dotenv()
+
+# Retrieve API keys from environment variables
+firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY")
+openai_api_key = os.getenv("OPENAI_API_KEY")
+
+# Initialize the FirecrawlApp and set OpenAI API key
+app = FirecrawlApp(api_key=firecrawl_api_key)
+client = OpenAI(api_key=openai_api_key)
+
+def main():
+    # Get user input
+    blog_url = input("Enter the blog URL: ")
+
+    if not blog_url.strip():
+        blog_url = "https://www.firecrawl.dev/blog/how-to-use-openai-o1-reasoning-models-in-applications"
+
+    # Scrape the blog content
+    print("Scraping the blog content...")
+    blog_scrape_result = app.scrape_url(blog_url, params={'formats': ['markdown']})
+
+    # Get the blog content in markdown format
+    blog_content = blog_scrape_result.get('markdown', '')
+
+    # Turn the blog URL into a top-level domain
+    top_level_domain = '/'.join(blog_url.split('/')[:3])
+
+    # Map the website to get all links
+    print("Mapping the website to get all links...")
+    site_map = app.map_url(top_level_domain)
+
+    # Get the list of URLs from the site map
+    site_links = site_map.get('links', [])
+
+
+    prompt = f"""
+You are an AI assistant helping to improve a blog post.
+
+Here is the original blog post content:
+
+{blog_content}
+
+Here is a list of other pages on the website:
+
+{json.dumps(site_links, indent=2)}
+
+Please revise the blog post to include internal links to some of these pages where appropriate. Make sure the internal links are relevant and enhance the content.
+
+Only return the revised blog post in markdown format.
+"""
+
+    import re
+
+    # Function to count links in a markdown content
+    def count_links(markdown_content):
+        return len(re.findall(r'\[.*?\]\(.*?\)', markdown_content))
+
+    # Use OpenAI API to get the revised blog post
+    print("Generating the revised blog post with internal links...")
+    completion = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+        prediction={
+            "type": "content",
+            "content": blog_content
+        }
+    );
+
+    revised_blog_post = completion.choices[0].message.content
+
+    # Count links in the original and revised blog post
+    original_links_count = count_links(blog_content)
+    revised_links_count = count_links(revised_blog_post)
+
+    # Output a portion of the revised blog post and link counts
+    print("\nRevised blog post (first 500 characters):")
+    print(revised_blog_post[:500])
+    print(f"\nNumber of links in the original blog post: {original_links_count}")
+    print(f"Number of links in the revised blog post: {revised_links_count}")
+
+if __name__ == "__main__":
+    main()