Skip to content

Commit 89340a5

Browse files
author
naman-msft
committed
added docs
1 parent e14c82e commit 89340a5

File tree

2 files changed

+1244
-0
lines changed

2 files changed

+1244
-0
lines changed

proc.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import re
44
import shutil
5+
import json
56
from pathlib import Path
67
import yaml
78
from openai import AzureOpenAI
@@ -48,6 +49,85 @@ def extract_title_from_markdown(file_path):
4849
# Fallback to filename
4950
return Path(file_path).stem
5051

52+
def extract_description_from_markdown(file_path):
53+
"""Extract description from markdown file metadata"""
54+
with open(file_path, 'r', encoding='utf-8') as f:
55+
content = f.read()
56+
57+
# Try to extract YAML frontmatter
58+
yaml_match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
59+
if yaml_match:
60+
try:
61+
metadata = yaml.safe_load(yaml_match.group(1))
62+
if metadata and 'description' in metadata:
63+
return metadata['description']
64+
except:
65+
pass
66+
67+
# Fallback to title
68+
return extract_title_from_markdown(file_path)
69+
70+
def extract_next_steps(client, file_content):
71+
"""Use Azure OpenAI to extract next steps from document"""
72+
prompt = f"""From this document content, extract any "Next Steps" or related tutorial links mentioned.
73+
Return as a JSON array of objects with 'title' and 'url' fields.
74+
If no next steps are found, return an empty array.
75+
76+
Document content:
77+
{file_content[:3000]}
78+
79+
Return ONLY valid JSON array, nothing else."""
80+
81+
try:
82+
response = client.chat.completions.create(
83+
model=AZURE_OPENAI_DEPLOYMENT,
84+
messages=[
85+
{"role": "system", "content": "You are a helpful assistant that extracts structured data from documents."},
86+
{"role": "user", "content": prompt}
87+
],
88+
temperature=0.3,
89+
max_tokens=500
90+
)
91+
result = response.choices[0].message.content.strip()
92+
return json.loads(result)
93+
except:
94+
return []
95+
96+
def extract_env_variables(client, file_content):
97+
"""Use Azure OpenAI to extract environment variables from document"""
98+
prompt = f"""From this document content, identify all environment variables that need to be set before running the commands.
99+
Look for patterns like:
100+
- export VARIABLE_NAME=
101+
- $VARIABLE_NAME or ${{VARIABLE_NAME}}
102+
- Variables mentioned in instructions
103+
104+
For each variable, provide a user-friendly title.
105+
Return as a JSON array of objects with these fields:
106+
- inputType: "textInput"
107+
- commandKey: the exact variable name
108+
- title: user-friendly title in Title Case (e.g., RESOURCE_GROUP -> "Resource Group Name")
109+
- defaultValue: ""
110+
111+
Document content:
112+
{file_content[:3000]}
113+
114+
Return ONLY valid JSON array, nothing else."""
115+
116+
try:
117+
response = client.chat.completions.create(
118+
model=AZURE_OPENAI_DEPLOYMENT,
119+
messages=[
120+
{"role": "system", "content": "You are a helpful assistant that extracts environment variables from technical documents."},
121+
{"role": "user", "content": prompt}
122+
],
123+
temperature=0.3,
124+
max_tokens=500
125+
)
126+
result = response.choices[0].message.content.strip()
127+
return json.loads(result)
128+
except:
129+
return []
130+
51131
def generate_folder_name(client, title, file_content_snippet):
52132
"""Use Azure OpenAI to generate an intuitive folder name"""
53133
prompt = f"""Given this document title: "{title}"
@@ -87,6 +167,96 @@ def pascal_to_kebab(name):
87167
tokens = re.findall(r'[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))', name)
88168
return '-'.join(t.lower() for t in tokens)
89169

170+
def update_metadata_json(scenarios_dir, client):
171+
"""Update metadata.json with missing scenario entries"""
172+
metadata_file = Path(scenarios_dir) / "metadata.json"
173+
174+
# Load existing metadata
175+
if metadata_file.exists():
176+
with open(metadata_file, 'r', encoding='utf-8') as f:
177+
metadata = json.load(f)
178+
else:
179+
metadata = []
180+
181+
# Get all existing keys
182+
existing_keys = {entry['key'] for entry in metadata}
183+
184+
# Check all subdirectories in scenarios folder
185+
scenarios_path = Path(scenarios_dir)
186+
new_entries = []
187+
188+
for folder in scenarios_path.iterdir():
189+
if folder.is_dir() and folder.name != "__pycache__":
190+
# Find markdown files in the folder
191+
md_files = list(folder.glob("*.md"))
192+
193+
for md_file in md_files:
194+
# Create the key
195+
key = f"{folder.name}/{md_file.name}"
196+
197+
# Check if this key exists in metadata
198+
if not any(key in existing_key for existing_key in existing_keys):
199+
print(f"\nProcessing new metadata entry for: {key}")
200+
201+
# Read file content
202+
with open(md_file, 'r', encoding='utf-8') as f:
203+
content = f.read()
204+
205+
# Extract information
206+
title = extract_title_from_markdown(md_file)
207+
description = extract_description_from_markdown(md_file)
208+
209+
# Use AI to extract next steps and env variables
210+
next_steps = extract_next_steps(client, content)
211+
configurable_params = extract_env_variables(client, content)
212+
213+
# Create new entry
214+
new_entry = {
215+
"status": "active",
216+
"key": key,
217+
"title": title,
218+
"description": description,
219+
"stackDetails": "",
220+
"sourceUrl": f"https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/{key}",
221+
"documentationUrl": "",
222+
"nextSteps": next_steps,
223+
"configurations": {
224+
"permissions": [],
225+
"configurableParams": configurable_params
226+
}
227+
}
228+
229+
new_entries.append(new_entry)
230+
print(f" Added metadata for: {key}")
231+
232+
# Append new entries to metadata
233+
if new_entries:
234+
metadata.extend(new_entries)
235+
236+
# Write updated metadata back to file
237+
with open(metadata_file, 'w', encoding='utf-8') as f:
238+
json.dump(metadata, f, indent=4, ensure_ascii=False)
239+
240+
print(f"\nAdded {len(new_entries)} new entries to metadata.json")
241+
else:
242+
print("\nNo new entries needed for metadata.json")
243+
244+
source_dir = "tools/success"
245+
target_dir = "scenarios"
246+
# Setup Azure OpenAI
247+
try:
248+
client = setup_azure_openai()
249+
print("Azure OpenAI client initialized successfully")
250+
except Exception as e:
251+
print(f"Warning: Could not initialize Azure OpenAI: {e}")
252+
print("Will use fallback naming method")
253+
client = None
254+
print("\n" + "="*60)
255+
print("Updating metadata.json...")
256+
update_metadata_json(target_dir, client)
257+
import sys
258+
sys.exit(0)
259+
90260
def process_success_files(source_dir, target_dir, dry_run=False):
91261
"""Process all markdown files with 'success' in filename"""
92262
source_path = Path(source_dir)

0 commit comments

Comments
 (0)