def replace_wildcards(templates, wildcards, replacements):
    if len(wildcards) != len(replacements):
        raise ValueError(
            "The number of wildcards must match the number of replacements."
        )

    new_templates = []
    for tmp in templates:
        tmp_text = tmp["source"]
        for wildcard, replacement in zip(wildcards, replacements):
            tmp_text = tmp_text.replace(wildcard, replacement)
        new_templates.append({"cell_type": tmp["cell_type"], "source": tmp_text})

    return new_templates


rag_cells = [
    {
        "cell_type": "markdown",
        "source": "# Retrieval-Augmented Generation (RAG) System Notebook",
    },
    {"cell_type": "code", "source": ""},
]

embeggins_cells = [
    {
        "cell_type": "markdown",
        "source": "# Embeddings Generation Notebook",
    },
    {"cell_type": "code", "source": ""},
]

eda_cells = [
    {
        "cell_type": "markdown",
        "source": "# Exploratory Data Analysis (EDA) Notebook for {dataset_name} dataset",
    },
    {
        "cell_type": "code",
        "source": """
from IPython.display import HTML
display(HTML("{html_code}"))
""",
    },
    {
        "cell_type": "code",
        "source": """
# 1. Install and import necessary libraries.
!pip install pandas matplotlib seaborn
""",
    },
    {
        "cell_type": "code",
        "source": """
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
""",
    },
    {
        "cell_type": "code",
        "source": """
# 2. Load the dataset as a DataFrame using the provided code
{first_code}
""",
    },
    {
        "cell_type": "code",
        "source": """
# 3. Understand the dataset structure
print(df.head())
print(df.info())
print(df.describe())
""",
    },
    {
        "cell_type": "code",
        "source": """
# 4. Check for missing values
print(df.isnull().sum())
""",
    },
    {
        "cell_type": "code",
        "source": """
# 5. Identify data types of each column
print(df.dtypes)
""",
    },
    {
        "cell_type": "code",
        "source": """
# 6. Detect duplicated rows
print(df.duplicated().sum())
""",
    },
    {
        "cell_type": "code",
        "source": """
# 7. Generate descriptive statistics
print(df.describe())
""",
    },
    {
        "cell_type": "code",
        "source": """
# 8. Visualize the distribution of each column.
# TODO: Add code to visualize the distribution of each column.
# 9. Explore relationships between columns.
# TODO: Add code to explore relationships between columns.
# 10. Perform correlation analysis.
# TODO: Add code to perform correlation analysis.
""",
    },
]


def generate_embedding_system_prompt():
    """You are an expert data scientist tasked with creating a Jupyter notebook to generate embeddings for a specific dataset.
    Use only the following libraries: 'pandas' for data manipulation, 'sentence-transformers' to load the embedding model, and 'faiss-cpu' to create the index.

    The notebook should include:

    1. Install necessary libraries with !pip install.
    2. Import libraries.
    3. Load the dataset as a DataFrame using the provided code.
    4. Select the column to generate embeddings.
    5. Remove duplicate data.
    6. Convert the selected column to a list.
    7. Load the sentence-transformers model.
    8. Create a FAISS index.
    9. Encode a query sample.
    10. Search for similar documents using the FAISS index.

    Ensure the notebook is well-organized with explanations for each step.
    The output should be Markdown content with Python code snippets enclosed in "```python" and "```".

    The user will provide dataset information in the following format:

    ## Columns and Data Types

    ## Sample Data

    ## Loading Data code

    Use the provided code to load the dataset; do not use any other method.
    """


def generate_rag_system_prompt():
    """You are an expert machine learning engineer tasked with creating a Jupyter notebook to demonstrate a Retrieval-Augmented Generation (RAG) system using a specific dataset.
    The dataset is provided as a pandas DataFrame.

    Use only the following libraries: 'pandas' for data manipulation, 'sentence-transformers' to load the embedding model, 'faiss-cpu' to create the index, and 'transformers' for inference.

    The RAG notebook should include:

    1. Install necessary libraries.
    2. Import libraries.
    3. Load the dataset as a DataFrame using the provided code.
    4. Select the column for generating embeddings.
    5. Remove duplicate data.
    6. Convert the selected column to a list.
    7. Load the sentence-transformers model.
    8. Create a FAISS index.
    9. Encode a query sample.
    10. Search for similar documents using the FAISS index.
    11. Load the 'HuggingFaceH4/zephyr-7b-beta' model from the transformers library and create a pipeline.
    12. Create a prompt with two parts: 'system' for instructions based on a 'context' from the retrieved documents, and 'user' for the query.
    13. Send the prompt to the pipeline and display the answer.

    Ensure the notebook is well-organized with explanations for each step.
    The output should be Markdown content with Python code snippets enclosed in "```python" and "```".

    The user will provide the dataset information in the following format:

    ## Columns and Data Types

    ## Sample Data

    ## Loading Data code

    Use the provided code to load the dataset; do not use any other method.
    """