NEXTTECH NEWS

A Code Implementation To Successfully Leverage LangChain To Automate PubMed Literature Searches, Parsing, And Growth Visualization

By Next Business 24July 25, 2025No Comments4 Mins Read

A Code Implementation To Successfully Leverage LangChain To Automate PubMed Literature Searches, Parsing, And Growth Visualization

class AdvancedPubMedResearcher:
    """Superior PubMed evaluation assistant with analysis capabilities"""
   
    def __init__(self, gemini_api_key=None):
        """Initialize the researcher with elective Gemini integration"""
        self.pubmed_tool = PubmedQueryRun()
        self.research_cache = {}
       
        if gemini_api_key:
            os.environ["GOOGLE_API_KEY"] = gemini_api_key
            self.llm = ChatGoogleGenerativeAI(
                model="gemini-1.5-flash",
                temperature=0,
                convert_system_message_to_human=True
            )
            self.agent = self._create_agent()
        else:
            self.llm = None
            self.agent = None
   
    def _create_agent(self):
        """Create LangChain agent with PubMed instrument"""
        devices = [
            Tool(
                name="PubMed Search",
                func=self.pubmed_tool.invoke,
                description="Search PubMed for biomedical literature. Use specific terms."
            )
        ]
       
        return initialize_agent(
            devices,
            self.llm,
            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
            verbose=True
        )
   
    def search_papers(self, query, max_results=5):
        """Search PubMed and parse outcomes"""
        print(f"🔍 Wanting PubMed for: '{query}'")
       
        attempt:
            outcomes = self.pubmed_tool.invoke(query)
            papers = self._parse_pubmed_results(outcomes)
           
            self.research_cache[query] = {
                'papers': papers,
                'timestamp': datetime.now(),
                'query': query
            }
           
            print(f"✅ Found {len(papers)} papers")
            return papers
           
        apart from Exception as e:
            print(f"❌ Error wanting PubMed: {str(e)}")
            return []
   
    def _parse_pubmed_results(self, outcomes):
        """Parse PubMed search outcomes into structured data"""
        papers = []
       
        publications = outcomes.minimize up('nnPublished: ')[1:]
       
        for pub in publications:
            attempt:
                traces = pub.strip().minimize up('n')
               
                pub_date = traces[0] if traces else "Unknown"
               
                title_line = subsequent((line for line in traces if line.startswith('Title: ')), '')
                title = title_line.substitute('Title: ', '') if title_line else "Unknown Title"
               
                summary_start = None
                for i, line in enumerate(traces):
                    if 'Summary::' in line:
                        summary_start = i + 1
                        break
               
                summary = ""
                if summary_start:
                    summary=" ".be a part of(traces[summary_start:])
               
                papers.append({
                    'date': pub_date,
                    'title': title,
                    'summary': summary,
                    'word_count': len(summary.minimize up()) if summary else 0
                })
               
            apart from Exception as e:
                print(f"⚠️ Error parsing paper: {str(e)}")
                proceed
       
        return papers
   
    def analyze_research_trends(self, queries):
        """Analyze tendencies all through quite a few evaluation topics"""
        print("📊 Analyzing evaluation tendencies...")
       
        all_papers = []
        topic_counts = {}
       
        for query in queries:
            papers = self.search_papers(query, max_results=3)
            topic_counts[query] = len(papers)
           
            for paper in papers:
                paper['topic'] = query
                all_papers.append(paper)
       
        df = pd.DataFrame(all_papers)
       
        if df.empty:
            print("❌ No papers found for analysis")
            return None
       
        self._create_visualizations(df, topic_counts)
       
        return df
   
    def _create_visualizations(self, df, topic_counts):
        """Create evaluation sample visualizations"""
        plt.mannequin.use('seaborn-v0_8')
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle('PubMed Evaluation Analysis Dashboard', fontsize=16, fontweight="daring")
       
        topics = file(topic_counts.keys())
        counts = file(topic_counts.values())
       
        axes[0,0].bar(differ(len(topics)), counts, color="skyblue", alpha=0.7)
        axes[0,0].set_xlabel('Evaluation Topics')
        axes[0,0].set_ylabel('Number of Papers')
        axes[0,0].set_title('Papers Found by Matter')
        axes[0,0].set_xticks(differ(len(topics)))
        axes[0,0].set_xticklabels([t[:20]+'...' if len(t)>20 else t for t in topics], rotation=45, ha="correct")
       
        if 'word_count' in df.columns and by no means df['word_count'].empty:
            axes[0,1].hist(df['word_count'], bins=10, color="lightcoral", alpha=0.7)
            axes[0,1].set_xlabel('Abstract Phrase Rely')
            axes[0,1].set_ylabel('Frequency')
            axes[0,1].set_title('Distribution of Abstract Lengths')
       
        attempt:
            dates = pd.to_datetime(df['date'], errors="coerce")
            valid_dates = dates.dropna()
            if not valid_dates.empty:
                axes[1,0].hist(valid_dates, bins=10, color="lightgreen", alpha=0.7)
                axes[1,0].set_xlabel('Publication Date')
                axes[1,0].set_ylabel('Number of Papers')
                axes[1,0].set_title('Publication Timeline')
                plt.setp(axes[1,0].xaxis.get_majorticklabels(), rotation=45)
        apart from:
            axes[1,0].textual content material(0.5, 0.5, 'Date parsing unavailable', ha="coronary heart", va="coronary heart", transform=axes[1,0].transAxes)
       
        all_titles=" ".be a part of(df['title'].fillna('').astype(str))
        if all_titles.strip():
            clean_titles = re.sub(r'[^a-zA-Zs]', '', all_titles.lower())
           
            attempt:
                wordcloud = WordCloud(width=400, high=300, background_color="white",
                                    max_words=50, colormap='viridis').generate(clean_titles)
                axes[1,1].imshow(wordcloud, interpolation='bilinear')
                axes[1,1].axis('off')
                axes[1,1].set_title('Widespread Phrases in Titles')
            apart from:
                axes[1,1].textual content material(0.5, 0.5, 'Phrase cloud unavailable', ha="coronary heart", va="coronary heart", transform=axes[1,1].transAxes)
       
        plt.tight_layout()
        plt.current()
   
    def comparative_analysis(self, topic1, topic2):
        """Study two evaluation topics"""
        print(f"🔬 Evaluating '{topic1}' vs '{topic2}'")
       
        papers1 = self.search_papers(topic1)
        papers2 = self.search_papers(topic2)
       
        avg_length1 = sum(p['word_count'] for p in papers1) / len(papers1) if papers1 else 0
        avg_length2 = sum(p['word_count'] for p in papers2) / len(papers2) if papers2 else 0
       
        print("n📈 Comparability Outcomes:")
        print(f"Matter 1 ({topic1}):")
        print(f"  - Papers found: {len(papers1)}")
        print(f"  - Avg abstract measurement: {avg_length1:.1f} phrases")
       
        print(f"nTopic 2 ({topic2}):")
        print(f"  - Papers found: {len(papers2)}")
        print(f"  - Avg abstract measurement: {avg_length2:.1f} phrases")
       
        return papers1, papers2
   
    def intelligent_query(self, question):
        """Use AI agent to answer evaluation questions (requires Gemini API)"""
        if not self.agent:
            print("❌ AI agent not obtainable. Please current Gemini API key.")
            print("💡 Get free API key at: https://makersuite.google.com/app/apikey")
            return None
       
        print(f"🤖 Processing intelligent query with Gemini: '{question}'")
        attempt:
            response = self.agent.run(question)
            return response
        apart from Exception as e:
            print(f"❌ Error with AI query: {str(e)}")
            return None

Keep forward of the curve with NextBusiness 24. Discover extra tales, subscribe to our publication, and be a part of our rising group at nextbusiness24.com

Previous ArticleRegardless of RBI’s fee cuts, Indian G-Sec yields stay range-bound as a consequence of US yield pressures, says Rajkumar Subramanian

Next Article Local weather prices set to shake the insurance coverage sector, APRA report finds

Next Business 24

Partner with Us:

For advertising opportunities, content partnerships, or to address any concerns regarding our articles, please contact us. We prioritize delivering accurate and valuable information while maintaining a user-friendly experience.

Topics

-

Regional Insights

2025 © Next Business 24. All rights reserved. The content, design, and images on this website are protected by copyright law and may not be reproduced without permission.