Notes
Notes - notes.io |
import networkx as nx
TRIPLET_FILE_PATH = r"C:U"
def read_triplets(file_path):
triplets = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
parts = line.strip().split('t')
if len(parts) == 3:
triplets.append(tuple(parts))
print(f"✅ Loaded {len(triplets)} triplets from {file_path}")
except Exception as e:
print(f"❌ Error reading triplet file: {e}")
return triplets
triplets = read_triplets(TRIPLET_FILE_PATH)
# Build NetworkX graph
print("Step 1: Building NetworkX graph from triplets...")
graph = nx.MultiDiGraph()
for s, p, o in triplets:
graph.add_node(s)
graph.add_node(o)
graph.add_edge(s, o, relation=p)
print(f"Step 1 completed! Nodes: {len(graph.nodes())}, Edges: {len(graph.edges())}")
# === ADVANCED UNIVERSAL QUESTION ANSWERING ENGINE ===
import openai
import traceback
import re
from difflib import get_close_matches
# Set your OpenAI API key here (or use environment variable)
OPENAI_API_KEY = 'A'
openai.api_key = OPENAI_API_KEY
def split_multi_questions(question):
"""Split a complex question into multiple sub-questions using LLM"""
# Only split if there are clear indicators of multiple questions
question_indicators = ['?', ' and what ', ' and which ', ' also what ', ' also which ']
has_multiple = any(indicator in question.lower() for indicator in question_indicators)
if not has_multiple:
return [question] # Don't split single questions
prompt = f'''
You are a question analysis expert. Given a user question, determine if it contains multiple DISTINCT sub-questions and split them ONLY if they are clearly separate queries.
Rules:
1. ONLY split if the question contains multiple DISTINCT queries with clear separators like "And what is...", "Also which...", multiple "?" marks
2. DO NOT split single complex questions - keep them as one item
3. Each sub-question should be complete and self-contained
4. Return as a Python list of strings, one per sub-question
5. If unsure, keep as single question
User Question: {question}
Return only a Python list, nothing else.
'''
try:
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a question parsing expert. Return only the requested Python list."},
{"role": "user", "content": prompt}
],
temperature=0.1,
)
result = response.choices[0].message.content.strip()
# Try to evaluate the list safely
if result.startswith('[') and result.endswith(']'):
try:
questions = eval(result)
return questions if isinstance(questions, list) else [question]
except:
return [question]
else:
return [question]
except Exception as e:
print(f"Question splitting failed: {e}")
return [question]
def ask_any_question(question, graph):
"""
Advanced question answering system that can handle:
- Simple questions (contact info, pricing, etc.)
- Complex multi-step queries
- Multiple questions in one input
- Fuzzy matching and intelligent reasoning
"""
# First, check if this is a multi-part question
sub_questions = split_multi_questions(question)
if len(sub_questions) > 1:
# Handle multiple questions
print(f"Detected {len(sub_questions)} questions. Processing each...")
answers = []
for i, sub_q in enumerate(sub_questions, 1):
print(f" Processing question {i}: {sub_q}")
answer = process_single_question(sub_q, graph)
answers.append(f"**Question {i}:** {sub_q}n**Answer {i}:** {answer}")
return "nn" + "nn".join(answers)
else:
# Handle single question
return process_single_question(question, graph)
def process_single_question(question, graph):
"""Process a single question with advanced LLM reasoning"""
# Get sample of actual relations in the graph for LLM context
sample_relations = set()
count = 0
for _, _, data in graph.edges(data=True):
if 'relation' in data:
sample_relations.add(data['relation'])
count += 1
if count > 50: # Limit sample size
break
relations_list = sorted(list(sample_relations))[:20] # Top 20 most common
# Enhanced prompt with comprehensive instructions and helper functions
prompt = f'''
You are an expert data analyst with access to a NetworkX MultiDiGraph containing business vendor data.
GRAPH STRUCTURE:
- Nodes: Vendor names and data values (contact info, pricing, etc.)
- Edges: Each edge has a 'relation' attribute
- The graph variable is named `graph`
ACTUAL RELATIONS IN THE GRAPH:
Here are the actual relation types that exist in your data:
{relations_list}
IMPORTANT RELATION MAPPINGS:
- Contact person name: HAS_CONTACT
- Contact phone: HAS_CONTACT_PHONE
- Contact email: HAS_CONTACT_EMAIL
- Contact title: HAS_CONTACT_TITLE
- Pricing info: HAS_PRICING
- DFI programs: DFI_PROGRAM
- Products: OFFERS_PRODUCT
- Address: HAS_ADDRESS
- Website: HAS_WEBSITE
AVAILABLE HELPER FUNCTIONS:
```python
from difflib import get_close_matches
import re
import networkx as nx
def fuzzy_find_vendor(query, graph):
"""Find vendor using fuzzy matching"""
vendors = [n for n in graph.nodes if isinstance(n, str) and len(n) > 5]
matches = get_close_matches(query.lower(), [v.lower() for v in vendors], n=3, cutoff=0.4)
if matches:
idx = [v.lower() for v in vendors].index(matches[0])
return vendors[idx]
return None
def get_vendor_info(vendor, relation_type, graph):
"""Get specific info for a vendor - REQUIRES ALL 3 PARAMETERS"""
for _, target, data in graph.out_edges(vendor, data=True):
if data.get('relation') == relation_type:
return target
return None
def explore_vendor_relations(vendor, graph):
"""Get all relations and values for a vendor"""
relations = []
for _, target, data in graph.out_edges(vendor, data=True):
relations.append((data.get('relation'), target))
return relations
def get_all_vendors_with_relation(relation_type, graph):
"""Get all vendors that have a specific relation"""
vendors = []
for node in graph.nodes():
for _, target, data in graph.out_edges(node, data=True):
if data.get('relation') == relation_type:
vendors.append((node, target))
return vendors
def search_vendors_by_content(search_term, graph):
"""Search for vendors that have any relation containing specific content"""
results = []
search_lower = search_term.lower()
for node in graph.nodes():
if isinstance(node, str) and len(node) > 5:
for _, target, data in graph.out_edges(node, data=True):
if isinstance(target, str) and search_lower in target.lower():
results.append((node, data.get('relation'), target))
return results
def get_all_relations(graph):
"""Get all unique relations in the graph"""
relations = set()
for _, _, data in graph.edges(data=True):
rel = data.get('relation')
if rel:
relations.add(rel)
return sorted(list(relations))
def find_vendors_with_attribute(attribute_keywords, graph):
"""Find vendors that have attributes containing any of the keywords"""
results = []
for node in graph.nodes():
if isinstance(node, str) and len(node) > 5:
for _, target, data in graph.out_edges(node, data=True):
if isinstance(target, str):
target_lower = target.lower()
if any(keyword.lower() in target_lower for keyword in attribute_keywords):
results.append((node, data.get('relation'), target))
return results
def format_pricing_info(pricing_text):
"""Extract and format key pricing information from long text"""
if not pricing_text or pricing_text == "None":
return "No pricing information found"
import re
percentages = re.findall(r'(\d+(?:\.\d+)?%)', pricing_text)
dollars = re.findall(r'\$[\d,]+(?:\.\d{2})?', pricing_text)
formatted = []
if percentages:
formatted.append(f"Rates: {{', '.join(percentages)}}")
if dollars:
formatted.append(f"Amounts: {{', '.join(dollars)}}")
if not formatted:
return pricing_text[:100] + "..." if len(pricing_text) > 100 else pricing_text
return " | ".join(formatted)
def get_vendor_alternatives(vendor_query, graph):
"""Get alternative vendor suggestions when exact match fails"""
vendors = [n for n in graph.nodes if isinstance(n, str) and len(n) > 5]
matches = get_close_matches(vendor_query.lower(), [v.lower() for v in vendors], n=5, cutoff=0.3)
if matches:
suggestions = []
for match in matches:
idx = [v.lower() for v in vendors].index(match)
suggestions.append(vendors[idx])
return suggestions
return []
def search_programs_offering(service_description, graph):
"""Comprehensive search for programs/vendors offering specific services"""
results = []
content_results = search_vendors_by_content(service_description, graph)
results.extend(content_results)
keywords = service_description.lower().split()
keyword_results = find_vendors_with_attribute(keywords, graph)
results.extend(keyword_results)
unique_results = []
seen = set()
for vendor, relation, content in results:
key = (vendor, content)
if key not in seen:
unique_results.append((vendor, relation, content))
seen.add(key)
return unique_results
```
IMPORTANT FUNCTION USAGE EXAMPLES:
- pricing = get_vendor_info(vendor_name, 'HAS_PRICING', graph) # Note: 3 parameters!
- contact = get_vendor_info(vendor_name, 'HAS_CONTACT_PHONE', graph)
- all_pricing = get_all_vendors_with_relation('HAS_PRICING', graph)
- vendor = fuzzy_find_vendor("access network", graph)
- relations = explore_vendor_relations(vendor_name, graph)
- formatted_price = format_pricing_info(pricing_text) # Clean up long pricing text
- alternatives = get_vendor_alternatives("ADI", graph) # Get similar vendors when exact match fails
CONTENT-BASED SEARCH EXAMPLES:
- programs = search_programs_offering("design services", graph) # BEST for "programs that offer X"
- programs = search_programs_offering("demo discounts", graph)
- programs = search_programs_offering("annual rebates", graph)
- programs = search_programs_offering("no credit card fees", graph)
- top_discounts = get_top_vendors_by_discount(graph, 10) # BEST for "top X by discount"
- design_services = search_vendors_by_content("design services", graph)
- demo_programs = search_vendors_by_content("demo discount", graph)
- rebate_programs = search_vendors_by_content("annual rebate", graph)
- credit_card = search_vendors_by_content("no credit card fees", graph)
- keyword_search = find_vendors_with_attribute(["design", "services"], graph)
- multi_keyword = find_vendors_with_attribute(["demo", "discount", "rebate"], graph)
- fee_search = find_vendors_with_attribute(["credit card", "no fees"], graph)
EXAMPLE PROPER ANSWER STRUCTURE:
```python
# For "top X vendors by discount" queries, use the ranking function
if "top" in question.lower() and ("discount" in question.lower() or "rebate" in question.lower()):
# Extract number (default to 10)
import re
numbers = re.findall(r'\d+', question)
limit = int(numbers[0]) if numbers else 10
top_vendors = get_top_vendors_by_discount(graph, limit)
if top_vendors:
answer_parts = []
for i, (vendor, discount_pct, content, relation) in enumerate(top_vendors, 1):
clean_content = content[:80] + "..." if len(content) > 80 else content
answer_parts.append(f"{{i}}. ✅ **{{vendor}}**: {{discount_pct}}% - {{clean_content}}")
answer = "\n".join(answer_parts)
else:
answer = "❌ No vendors found with measurable discount percentages"
# For regular "programs that offer X" queries
else:
search_term = "design services" # Extract from user question
results = search_programs_offering(search_term, graph)
if results:
answer_parts = []
for vendor, relation, content in results[:10]:
answer_parts.append(f"✅ {{vendor}}: {{content}} ({{relation}})")
answer = "\n".join(answer_parts)
else:
answer = f"❌ No programs found offering {{search_term}} in the database"
```
TASK INSTRUCTIONS:
1. ALWAYS use fuzzy_find_vendor() for finding vendors (handles typos, partial names)
2. Use explore_vendor_relations() to see all available info for a vendor
3. Use the EXACT relation names from the list above
4. For contact person, use 'HAS_CONTACT' relation
5. Be comprehensive - if asked about pricing, include all relevant pricing info
6. For percentage/rate questions, look for numeric values in pricing fields
7. For "best" or "top" questions, compare values across multiple vendors
8. Use get_all_vendors_with_relation() to find ALL vendors with specific attributes
9. Always set the variable `answer` to your final result as a string
10. If vendor not found, suggest similar vendors using fuzzy matching
11. When searching for DFI or pricing percentages, look in HAS_PRICING fields for numbers followed by %
12. For "find vendors that offer X" queries, use search_vendors_by_content("X", graph)
13. For "vendors with free shipping/freight" queries, use find_vendors_with_attribute(["free", "freight"], graph)
14. Use get_all_relations(graph) to discover available relation types if needed
15. For content-based searches, always check multiple keywords and variations
CRITICAL ERROR HANDLING:
- ALWAYS check if variables are None before using them in regex or string operations
- Use format_pricing_info() function instead of manual regex on pricing data
- Example: pricing = get_vendor_info(vendor, 'HAS_PRICING', graph)
if pricing: formatted = format_pricing_info(pricing)
- NEVER use re.search() directly on variables that might be None
SPECIAL HANDLING FOR "PROGRAMS THAT OFFER X" QUERIES:
- For "Show me programs that offer X" queries, use BOTH search methods:
1. search_vendors_by_content("X", graph) - searches for exact content match
2. find_vendors_with_attribute(["key", "words"], graph) - searches for keywords
- Examples:
* "programs that offer design services" → search_vendors_by_content("design services", graph) AND find_vendors_with_attribute(["design", "services"], graph)
* "programs that offer demo discounts" → search_vendors_by_content("demo discounts", graph) AND find_vendors_with_attribute(["demo", "discount"], graph)
* "programs with no credit card fees" → search_vendors_by_content("no credit card fees", graph) AND find_vendors_with_attribute(["credit card", "no fees", "fee"], graph)
SPECIAL HANDLING FOR "TOP X" RANKING QUERIES:
- For "Top X vendors by discount/rebate/incentive" queries, use get_top_vendors_by_discount(graph, limit)
- This function extracts numerical percentages and ranks vendors by highest discount
- Examples:
* "Top 10 vendors by strongest discounts" → get_top_vendors_by_discount(graph, 10)
* "Best discount programs" → get_top_vendors_by_discount(graph, 10)
* "Highest rebates available" → get_top_vendors_by_discount(graph, 10)
- Format results showing: "1. ✅ **Vendor Name**: 25% - Description..."
ANSWER FORMATTING RULES:
- Always provide CLEAR, STRUCTURED answers
- If information not found, explain what you searched for and suggest alternatives
- Extract key numbers (percentages, rates) from long text and present them clearly
- Use bullet points or numbered lists for multiple pieces of information
- For "None" results, investigate further using explore_vendor_relations() to see what IS available
- Format like: "✅ VENDOR NAME: Key info here" or "❌ VENDOR NAME: Not found, but here are similar vendors..."
QUESTION: {question}
Write Python code to answer this question. Always assign your final answer to the variable `answer`.
'''
try:
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an expert Python developer and data analyst specializing in graph queries."},
{"role": "user", "content": prompt}
],
temperature=0.1,
)
code = response.choices[0].message.content
# Extract code block if present
match = re.search(r"```python(.*?)```", code, re.DOTALL)
if match:
code = match.group(1)
# Prepare enhanced execution environment with helper functions
local_vars = {
"graph": graph,
"get_close_matches": get_close_matches,
"re": re,
"nx": nx,
"question": question
}
# Add helper functions to execution context
exec('''
def fuzzy_find_vendor(query, graph):
"""Find vendor using fuzzy matching"""
from difflib import get_close_matches
vendors = [n for n in graph.nodes if isinstance(n, str) and len(n) > 5]
matches = get_close_matches(query.lower(), [v.lower() for v in vendors], n=3, cutoff=0.4)
if matches:
idx = [v.lower() for v in vendors].index(matches[0])
return vendors[idx]
return None
def get_vendor_info(vendor, relation_type, graph):
"""Get specific info for a vendor"""
for _, target, data in graph.out_edges(vendor, data=True):
if data.get('relation') == relation_type:
return target
return None
def explore_vendor_relations(vendor, graph):
"""Get all relations and values for a vendor"""
relations = []
for _, target, data in graph.out_edges(vendor, data=True):
relations.append((data.get('relation'), target))
return relations
def get_all_vendors_with_relation(relation_type, graph):
"""Get all vendors that have a specific relation"""
vendors = []
for node in graph.nodes():
for _, target, data in graph.out_edges(node, data=True):
if data.get('relation') == relation_type:
vendors.append((node, target))
return vendors
def search_vendors_by_content(search_term, graph):
"""Search for vendors that have any relation containing specific content"""
results = []
search_lower = search_term.lower()
for node in graph.nodes():
if isinstance(node, str) and len(node) > 5:
for _, target, data in graph.out_edges(node, data=True):
if isinstance(target, str) and search_lower in target.lower():
results.append((node, data.get('relation'), target))
return results
def get_all_relations(graph):
"""Get all unique relations in the graph"""
relations = set()
for _, _, data in graph.edges(data=True):
rel = data.get('relation')
if rel:
relations.add(rel)
return sorted(list(relations))
def find_vendors_with_attribute(attribute_keywords, graph):
"""Find vendors that have attributes containing any of the keywords"""
results = []
for node in graph.nodes():
if isinstance(node, str) and len(node) > 5:
for _, target, data in graph.out_edges(node, data=True):
if isinstance(target, str):
target_lower = target.lower()
if any(keyword.lower() in target_lower for keyword in attribute_keywords):
results.append((node, data.get('relation'), target))
return results
def format_pricing_info(pricing_text):
"""Extract and format key pricing information from long text"""
if not pricing_text or pricing_text == "None":
return "No pricing information found"
import re
percentages = re.findall(r'(\\d+(?:\\.\\d+)?%)', pricing_text)
dollars = re.findall(r'\\$[\\d,]+(?:\\.\\d{{2}})?', pricing_text)
formatted = []
if percentages:
formatted.append("Rates: " + ", ".join(percentages))
if dollars:
formatted.append("Amounts: " + ", ".join(dollars))
if not formatted:
return pricing_text[:100] + "..." if len(pricing_text) > 100 else pricing_text
return " | ".join(formatted)
def get_vendor_alternatives(vendor_query, graph):
"""Get alternative vendor suggestions when exact match fails"""
from difflib import get_close_matches
vendors = [n for n in graph.nodes if isinstance(n, str) and len(n) > 5]
matches = get_close_matches(vendor_query.lower(), [v.lower() for v in vendors], n=5, cutoff=0.3)
if matches:
suggestions = []
for match in matches:
idx = [v.lower() for v in vendors].index(match)
suggestions.append(vendors[idx])
return suggestions
return []
def search_programs_offering(service_description, graph):
"""Comprehensive search for programs/vendors offering specific services"""
results = []
# Method 1: Direct content search
content_results = search_vendors_by_content(service_description, graph)
results.extend(content_results)
# Method 2: Keyword-based search
keywords = service_description.lower().split()
keyword_results = find_vendors_with_attribute(keywords, graph)
results.extend(keyword_results)
# Method 3: Related terms search
related_terms = {
"design": ["design", "custom", "engineering", "development"],
"demo": ["demo", "demonstration", "trial", "sample"],
"discount": ["discount", "rebate", "incentive", "reduction"],
"credit": ["credit", "payment", "fee", "charge"],
"annual": ["annual", "yearly", "year", "12 month"],
"group": ["group", "bulk", "volume", "multiple"]
}
for keyword in keywords:
if keyword in related_terms:
for term in related_terms[keyword]:
term_results = search_vendors_by_content(term, graph)
results.extend(term_results)
# Remove duplicates
unique_results = []
seen = set()
for vendor, relation, content in results:
key = (vendor, content)
if key not in seen:
unique_results.append((vendor, relation, content))
seen.add(key)
return unique_results
def extract_discount_percentage(text):
"""Extract numerical discount percentage from text"""
import re
if not text:
return 0
# Look for percentage patterns
percentages = re.findall(r'(\d+(?:\.\d+)?)%', text.lower())
if percentages:
return max([float(p) for p in percentages])
# Look for "off" patterns
off_matches = re.findall(r'(\d+(?:\.\d+)?)%?\s*off', text.lower())
if off_matches:
return max([float(m) for m in off_matches])
return 0
def get_top_vendors_by_discount(graph, limit=10):
"""Get top vendors ranked by discount percentage"""
vendor_discounts = []
# Search for discount-related content
discount_terms = ["discount", "off", "%", "rebate", "allowance"]
all_results = []
for term in discount_terms:
results = search_vendors_by_content(term, graph)
all_results.extend(results)
# Process each result to extract discount percentage
processed = set()
for vendor, relation, content in all_results:
if vendor not in processed:
discount_pct = extract_discount_percentage(content)
if discount_pct > 0:
vendor_discounts.append((vendor, discount_pct, content, relation))
processed.add(vendor)
# Sort by discount percentage (highest first)
vendor_discounts.sort(key=lambda x: x[1], reverse=True)
return vendor_discounts[:limit]
''', local_vars, local_vars)
try:
exec(code, local_vars, local_vars)
answer = local_vars.get("answer", "No answer variable was set by the generated code.")
return str(answer)
except Exception as e:
# Enhanced error handling with code debugging
error_msg = f"Code execution error: {e}"
if "not found" in str(e).lower() or "nonetype" in str(e).lower():
error_msg += "nnThis might be due to vendor name not matching exactly. Try using fuzzy_find_vendor() function."
return f"{error_msg}nnGenerated code:n```pythonn{code}n```"
except Exception as e:
return f"LLM API error: {e}n{traceback.format_exc()}"
# === ENHANCED INTERACTIVE QUESTION LOOP ===
def interactive_qa_loop(graph):
while True:
q = input("n Ask anything: ")
if q.strip().lower() in ("exit", "quit"):
print("Goodbye!")
break
print("n [Processing...]")
try:
answer = ask_any_question(q, graph)
print(f"n Answer:n{answer}")
except Exception as e:
print(f"n Error: {e}")
print("n" + "-"*40)
interactive_qa_loop(graph)
![]() |
Notes is a web-based application for online taking notes. You can take your notes and share with others people. If you like taking long notes, notes.io is designed for you. To date, over 8,000,000,000+ notes created and continuing...
With notes.io;
- * You can take a note from anywhere and any device with internet connection.
- * You can share the notes in social platforms (YouTube, Facebook, Twitter, instagram etc.).
- * You can quickly share your contents without website, blog and e-mail.
- * You don't need to create any Account to share a note. As you wish you can use quick, easy and best shortened notes with sms, websites, e-mail, or messaging services (WhatsApp, iMessage, Telegram, Signal).
- * Notes.io has fabulous infrastructure design for a short link and allows you to share the note as an easy and understandable link.
Fast: Notes.io is built for speed and performance. You can take a notes quickly and browse your archive.
Easy: Notes.io doesn’t require installation. Just write and share note!
Short: Notes.io’s url just 8 character. You’ll get shorten link of your note when you want to share. (Ex: notes.io/q )
Free: Notes.io works for 14 years and has been free since the day it was started.
You immediately create your first note and start sharing with the ones you wish. If you want to contact us, you can use the following communication channels;
Email: [email protected]
Twitter: http://twitter.com/notesio
Instagram: http://instagram.com/notes.io
Facebook: http://facebook.com/notesio
Regards;
Notes.io Team
