|
2 | 2 | import os
|
3 | 3 | import re
|
4 | 4 | import shutil
|
| 5 | +import json |
5 | 6 | from pathlib import Path
|
6 | 7 | import yaml
|
7 | 8 | from openai import AzureOpenAI
|
@@ -48,6 +49,85 @@ def extract_title_from_markdown(file_path):
|
48 | 49 | # Fallback to filename
|
49 | 50 | return Path(file_path).stem
|
50 | 51 |
|
| 52 | +def extract_description_from_markdown(file_path): |
| 53 | + """Extract description from markdown file metadata""" |
| 54 | + with open(file_path, 'r', encoding='utf-8') as f: |
| 55 | + content = f.read() |
| 56 | + |
| 57 | + # Try to extract YAML frontmatter |
| 58 | + yaml_match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL) |
| 59 | + if yaml_match: |
| 60 | + try: |
| 61 | + metadata = yaml.safe_load(yaml_match.group(1)) |
| 62 | + if metadata and 'description' in metadata: |
| 63 | + return metadata['description'] |
| 64 | + except: |
| 65 | + pass |
| 66 | + |
| 67 | + # Fallback to title |
| 68 | + return extract_title_from_markdown(file_path) |
| 69 | + |
| 70 | +def extract_next_steps(client, file_content): |
| 71 | + """Use Azure OpenAI to extract next steps from document""" |
| 72 | + prompt = f"""From this document content, extract any "Next Steps" or related tutorial links mentioned. |
| 73 | +Return as a JSON array of objects with 'title' and 'url' fields. |
| 74 | +If no next steps are found, return an empty array. |
| 75 | +
|
| 76 | +Document content: |
| 77 | +{file_content[:3000]} |
| 78 | +
|
| 79 | +Return ONLY valid JSON array, nothing else.""" |
| 80 | + |
| 81 | + try: |
| 82 | + response = client.chat.completions.create( |
| 83 | + model=AZURE_OPENAI_DEPLOYMENT, |
| 84 | + messages=[ |
| 85 | + {"role": "system", "content": "You are a helpful assistant that extracts structured data from documents."}, |
| 86 | + {"role": "user", "content": prompt} |
| 87 | + ], |
| 88 | + temperature=0.3, |
| 89 | + max_tokens=500 |
| 90 | + ) |
| 91 | + result = response.choices[0].message.content.strip() |
| 92 | + return json.loads(result) |
| 93 | + except: |
| 94 | + return [] |
| 95 | + |
| 96 | +def extract_env_variables(client, file_content): |
| 97 | + """Use Azure OpenAI to extract environment variables from document""" |
| 98 | + prompt = f"""From this document content, identify all environment variables that need to be set before running the commands. |
| 99 | +Look for patterns like: |
| 100 | +- export VARIABLE_NAME= |
| 101 | +- $VARIABLE_NAME or ${{VARIABLE_NAME}} |
| 102 | +- Variables mentioned in instructions |
| 103 | +
|
| 104 | +For each variable, provide a user-friendly title. |
| 105 | +Return as a JSON array of objects with these fields: |
| 106 | +- inputType: "textInput" |
| 107 | +- commandKey: the exact variable name |
| 108 | +- title: user-friendly title in Title Case (e.g., RESOURCE_GROUP -> "Resource Group Name") |
| 109 | +- defaultValue: "" |
| 110 | +
|
| 111 | +Document content: |
| 112 | +{file_content[:3000]} |
| 113 | +
|
| 114 | +Return ONLY valid JSON array, nothing else.""" |
| 115 | + |
| 116 | + try: |
| 117 | + response = client.chat.completions.create( |
| 118 | + model=AZURE_OPENAI_DEPLOYMENT, |
| 119 | + messages=[ |
| 120 | + {"role": "system", "content": "You are a helpful assistant that extracts environment variables from technical documents."}, |
| 121 | + {"role": "user", "content": prompt} |
| 122 | + ], |
| 123 | + temperature=0.3, |
| 124 | + max_tokens=500 |
| 125 | + ) |
| 126 | + result = response.choices[0].message.content.strip() |
| 127 | + return json.loads(result) |
| 128 | + except: |
| 129 | + return [] |
| 130 | + |
51 | 131 | def generate_folder_name(client, title, file_content_snippet):
|
52 | 132 | """Use Azure OpenAI to generate an intuitive folder name"""
|
53 | 133 | prompt = f"""Given this document title: "{title}"
|
@@ -87,6 +167,96 @@ def pascal_to_kebab(name):
|
87 | 167 | tokens = re.findall(r'[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))', name)
|
88 | 168 | return '-'.join(t.lower() for t in tokens)
|
89 | 169 |
|
| 170 | +def update_metadata_json(scenarios_dir, client): |
| 171 | + """Update metadata.json with missing scenario entries""" |
| 172 | + metadata_file = Path(scenarios_dir) / "metadata.json" |
| 173 | + |
| 174 | + # Load existing metadata |
| 175 | + if metadata_file.exists(): |
| 176 | + with open(metadata_file, 'r', encoding='utf-8') as f: |
| 177 | + metadata = json.load(f) |
| 178 | + else: |
| 179 | + metadata = [] |
| 180 | + |
| 181 | + # Get all existing keys |
| 182 | + existing_keys = {entry['key'] for entry in metadata} |
| 183 | + |
| 184 | + # Check all subdirectories in scenarios folder |
| 185 | + scenarios_path = Path(scenarios_dir) |
| 186 | + new_entries = [] |
| 187 | + |
| 188 | + for folder in scenarios_path.iterdir(): |
| 189 | + if folder.is_dir() and folder.name != "__pycache__": |
| 190 | + # Find markdown files in the folder |
| 191 | + md_files = list(folder.glob("*.md")) |
| 192 | + |
| 193 | + for md_file in md_files: |
| 194 | + # Create the key |
| 195 | + key = f"{folder.name}/{md_file.name}" |
| 196 | + |
| 197 | + # Check if this key exists in metadata |
| 198 | + if not any(key in existing_key for existing_key in existing_keys): |
| 199 | + print(f"\nProcessing new metadata entry for: {key}") |
| 200 | + |
| 201 | + # Read file content |
| 202 | + with open(md_file, 'r', encoding='utf-8') as f: |
| 203 | + content = f.read() |
| 204 | + |
| 205 | + # Extract information |
| 206 | + title = extract_title_from_markdown(md_file) |
| 207 | + description = extract_description_from_markdown(md_file) |
| 208 | + |
| 209 | + # Use AI to extract next steps and env variables |
| 210 | + next_steps = extract_next_steps(client, content) |
| 211 | + configurable_params = extract_env_variables(client, content) |
| 212 | + |
| 213 | + # Create new entry |
| 214 | + new_entry = { |
| 215 | + "status": "active", |
| 216 | + "key": key, |
| 217 | + "title": title, |
| 218 | + "description": description, |
| 219 | + "stackDetails": "", |
| 220 | + "sourceUrl": f"https://raw.githubusercontent.com/MicrosoftDocs/executable-docs/main/scenarios/{key}", |
| 221 | + "documentationUrl": "", |
| 222 | + "nextSteps": next_steps, |
| 223 | + "configurations": { |
| 224 | + "permissions": [], |
| 225 | + "configurableParams": configurable_params |
| 226 | + } |
| 227 | + } |
| 228 | + |
| 229 | + new_entries.append(new_entry) |
| 230 | + print(f" Added metadata for: {key}") |
| 231 | + |
| 232 | + # Append new entries to metadata |
| 233 | + if new_entries: |
| 234 | + metadata.extend(new_entries) |
| 235 | + |
| 236 | + # Write updated metadata back to file |
| 237 | + with open(metadata_file, 'w', encoding='utf-8') as f: |
| 238 | + json.dump(metadata, f, indent=4, ensure_ascii=False) |
| 239 | + |
| 240 | + print(f"\nAdded {len(new_entries)} new entries to metadata.json") |
| 241 | + else: |
| 242 | + print("\nNo new entries needed for metadata.json") |
| 243 | + |
| 244 | +source_dir = "tools/success" |
| 245 | +target_dir = "scenarios" |
| 246 | +# Setup Azure OpenAI |
| 247 | +try: |
| 248 | + client = setup_azure_openai() |
| 249 | + print("Azure OpenAI client initialized successfully") |
| 250 | +except Exception as e: |
| 251 | + print(f"Warning: Could not initialize Azure OpenAI: {e}") |
| 252 | + print("Will use fallback naming method") |
| 253 | + client = None |
| 254 | +print("\n" + "="*60) |
| 255 | +print("Updating metadata.json...") |
| 256 | +update_metadata_json(target_dir, client) |
| 257 | +import sys |
| 258 | +sys.exit(0) |
| 259 | + |
90 | 260 | def process_success_files(source_dir, target_dir, dry_run=False):
|
91 | 261 | """Process all markdown files with 'success' in filename"""
|
92 | 262 | source_path = Path(source_dir)
|
|
0 commit comments