class AdvancedPubMedResearcher:
"""Superior PubMed evaluation assistant with analysis capabilities"""
def __init__(self, gemini_api_key=None):
"""Initialize the researcher with elective Gemini integration"""
self.pubmed_tool = PubmedQueryRun()
self.research_cache = {}
if gemini_api_key:
os.environ["GOOGLE_API_KEY"] = gemini_api_key
self.llm = ChatGoogleGenerativeAI(
model="gemini-1.5-flash",
temperature=0,
convert_system_message_to_human=True
)
self.agent = self._create_agent()
else:
self.llm = None
self.agent = None
def _create_agent(self):
"""Create LangChain agent with PubMed instrument"""
devices = [
Tool(
name="PubMed Search",
func=self.pubmed_tool.invoke,
description="Search PubMed for biomedical literature. Use specific terms."
)
]
return initialize_agent(
devices,
self.llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
def search_papers(self, query, max_results=5):
"""Search PubMed and parse outcomes"""
print(f"🔍 Wanting PubMed for: '{query}'")
attempt:
outcomes = self.pubmed_tool.invoke(query)
papers = self._parse_pubmed_results(outcomes)
self.research_cache[query] = {
'papers': papers,
'timestamp': datetime.now(),
'query': query
}
print(f"✅ Found {len(papers)} papers")
return papers
apart from Exception as e:
print(f"❌ Error wanting PubMed: {str(e)}")
return []
def _parse_pubmed_results(self, outcomes):
"""Parse PubMed search outcomes into structured data"""
papers = []
publications = outcomes.minimize up('nnPublished: ')[1:]
for pub in publications:
attempt:
traces = pub.strip().minimize up('n')
pub_date = traces[0] if traces else "Unknown"
title_line = subsequent((line for line in traces if line.startswith('Title: ')), '')
title = title_line.substitute('Title: ', '') if title_line else "Unknown Title"
summary_start = None
for i, line in enumerate(traces):
if 'Summary::' in line:
summary_start = i + 1
break
summary = ""
if summary_start:
summary=" ".be a part of(traces[summary_start:])
papers.append({
'date': pub_date,
'title': title,
'summary': summary,
'word_count': len(summary.minimize up()) if summary else 0
})
apart from Exception as e:
print(f"⚠️ Error parsing paper: {str(e)}")
proceed
return papers
def analyze_research_trends(self, queries):
"""Analyze tendencies all through quite a few evaluation topics"""
print("📊 Analyzing evaluation tendencies...")
all_papers = []
topic_counts = {}
for query in queries:
papers = self.search_papers(query, max_results=3)
topic_counts[query] = len(papers)
for paper in papers:
paper['topic'] = query
all_papers.append(paper)
df = pd.DataFrame(all_papers)
if df.empty:
print("❌ No papers found for analysis")
return None
self._create_visualizations(df, topic_counts)
return df
def _create_visualizations(self, df, topic_counts):
"""Create evaluation sample visualizations"""
plt.mannequin.use('seaborn-v0_8')
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('PubMed Evaluation Analysis Dashboard', fontsize=16, fontweight="daring")
topics = file(topic_counts.keys())
counts = file(topic_counts.values())
axes[0,0].bar(differ(len(topics)), counts, color="skyblue", alpha=0.7)
axes[0,0].set_xlabel('Evaluation Topics')
axes[0,0].set_ylabel('Number of Papers')
axes[0,0].set_title('Papers Found by Matter')
axes[0,0].set_xticks(differ(len(topics)))
axes[0,0].set_xticklabels([t[:20]+'...' if len(t)>20 else t for t in topics], rotation=45, ha="correct")
if 'word_count' in df.columns and by no means df['word_count'].empty:
axes[0,1].hist(df['word_count'], bins=10, color="lightcoral", alpha=0.7)
axes[0,1].set_xlabel('Abstract Phrase Rely')
axes[0,1].set_ylabel('Frequency')
axes[0,1].set_title('Distribution of Abstract Lengths')
attempt:
dates = pd.to_datetime(df['date'], errors="coerce")
valid_dates = dates.dropna()
if not valid_dates.empty:
axes[1,0].hist(valid_dates, bins=10, color="lightgreen", alpha=0.7)
axes[1,0].set_xlabel('Publication Date')
axes[1,0].set_ylabel('Number of Papers')
axes[1,0].set_title('Publication Timeline')
plt.setp(axes[1,0].xaxis.get_majorticklabels(), rotation=45)
apart from:
axes[1,0].textual content material(0.5, 0.5, 'Date parsing unavailable', ha="coronary heart", va="coronary heart", transform=axes[1,0].transAxes)
all_titles=" ".be a part of(df['title'].fillna('').astype(str))
if all_titles.strip():
clean_titles = re.sub(r'[^a-zA-Zs]', '', all_titles.lower())
attempt:
wordcloud = WordCloud(width=400, high=300, background_color="white",
max_words=50, colormap='viridis').generate(clean_titles)
axes[1,1].imshow(wordcloud, interpolation='bilinear')
axes[1,1].axis('off')
axes[1,1].set_title('Widespread Phrases in Titles')
apart from:
axes[1,1].textual content material(0.5, 0.5, 'Phrase cloud unavailable', ha="coronary heart", va="coronary heart", transform=axes[1,1].transAxes)
plt.tight_layout()
plt.current()
def comparative_analysis(self, topic1, topic2):
"""Study two evaluation topics"""
print(f"🔬 Evaluating '{topic1}' vs '{topic2}'")
papers1 = self.search_papers(topic1)
papers2 = self.search_papers(topic2)
avg_length1 = sum(p['word_count'] for p in papers1) / len(papers1) if papers1 else 0
avg_length2 = sum(p['word_count'] for p in papers2) / len(papers2) if papers2 else 0
print("n📈 Comparability Outcomes:")
print(f"Matter 1 ({topic1}):")
print(f" - Papers found: {len(papers1)}")
print(f" - Avg abstract measurement: {avg_length1:.1f} phrases")
print(f"nTopic 2 ({topic2}):")
print(f" - Papers found: {len(papers2)}")
print(f" - Avg abstract measurement: {avg_length2:.1f} phrases")
return papers1, papers2
def intelligent_query(self, question):
"""Use AI agent to answer evaluation questions (requires Gemini API)"""
if not self.agent:
print("❌ AI agent not obtainable. Please current Gemini API key.")
print("💡 Get free API key at: https://makersuite.google.com/app/apikey")
return None
print(f"🤖 Processing intelligent query with Gemini: '{question}'")
attempt:
response = self.agent.run(question)
return response
apart from Exception as e:
print(f"❌ Error with AI query: {str(e)}")
return None
Keep forward of the curve with NextBusiness 24. Discover extra tales, subscribe to our publication, and be a part of our rising group at nextbusiness24.com

