Spaces:
Runtime error
Runtime error
# Generated by CodiumAI | |
# Dependencies: | |
# pip install pytest-mock | |
import pytest | |
from autogpt.commands.web_requests import scrape_links | |
""" | |
Code Analysis | |
Objective: | |
The objective of the 'scrape_links' function is to scrape hyperlinks from a | |
given URL and return them in a formatted way. | |
Inputs: | |
- url: a string representing the URL to be scraped. | |
Flow: | |
1. Send a GET request to the given URL using the requests library and the user agent header from the config file. | |
2. Check if the response contains an HTTP error. If it does, return "error". | |
3. Parse the HTML content of the response using the BeautifulSoup library. | |
4. Remove any script and style tags from the parsed HTML. | |
5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function. | |
6. Format the extracted hyperlinks using the 'format_hyperlinks' function. | |
7. Return the formatted hyperlinks. | |
Outputs: | |
- A list of formatted hyperlinks. | |
Additional aspects: | |
- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP | |
requests and parse HTML content, respectively. | |
- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML. | |
- The 'format_hyperlinks' function is called to format the extracted hyperlinks. | |
- The function checks for HTTP errors and returns "error" if any are found. | |
""" | |
class TestScrapeLinks: | |
# Tests that the function returns a list of formatted hyperlinks when | |
# provided with a valid url that returns a webpage with hyperlinks. | |
def test_valid_url_with_hyperlinks(self): | |
url = "https://www.google.com" | |
result = scrape_links(url) | |
assert len(result) > 0 | |
assert isinstance(result, list) | |
assert isinstance(result[0], str) | |
# Tests that the function returns correctly formatted hyperlinks when given a valid url. | |
def test_valid_url(self, mocker): | |
# Mock the requests.get() function to return a response with sample HTML containing hyperlinks | |
mock_response = mocker.Mock() | |
mock_response.status_code = 200 | |
mock_response.text = ( | |
"<html><body><a href='https://www.google.com'>Google</a></body></html>" | |
) | |
mocker.patch("requests.Session.get", return_value=mock_response) | |
# Call the function with a valid URL | |
result = scrape_links("https://www.example.com") | |
# Assert that the function returns correctly formatted hyperlinks | |
assert result == ["Google (https://www.google.com)"] | |
# Tests that the function returns "error" when given an invalid url. | |
def test_invalid_url(self, mocker): | |
# Mock the requests.get() function to return an HTTP error response | |
mock_response = mocker.Mock() | |
mock_response.status_code = 404 | |
mocker.patch("requests.Session.get", return_value=mock_response) | |
# Call the function with an invalid URL | |
result = scrape_links("https://www.invalidurl.com") | |
# Assert that the function returns "error" | |
assert "Error:" in result | |
# Tests that the function returns an empty list when the html contains no hyperlinks. | |
def test_no_hyperlinks(self, mocker): | |
# Mock the requests.get() function to return a response with sample HTML containing no hyperlinks | |
mock_response = mocker.Mock() | |
mock_response.status_code = 200 | |
mock_response.text = "<html><body><p>No hyperlinks here</p></body></html>" | |
mocker.patch("requests.Session.get", return_value=mock_response) | |
# Call the function with a URL containing no hyperlinks | |
result = scrape_links("https://www.example.com") | |
# Assert that the function returns an empty list | |
assert result == [] | |
# Tests that scrape_links() correctly extracts and formats hyperlinks from | |
# a sample HTML containing a few hyperlinks. | |
def test_scrape_links_with_few_hyperlinks(self, mocker): | |
# Mock the requests.get() function to return a response with a sample HTML containing hyperlinks | |
mock_response = mocker.Mock() | |
mock_response.status_code = 200 | |
mock_response.text = """ | |
<html> | |
<body> | |
<div id="google-link"><a href="https://www.google.com">Google</a></div> | |
<div id="github"><a href="https://github.com">GitHub</a></div> | |
<div id="CodiumAI"><a href="https://www.codium.ai">CodiumAI</a></div> | |
</body> | |
</html> | |
""" | |
mocker.patch("requests.Session.get", return_value=mock_response) | |
# Call the function being tested | |
result = scrape_links("https://www.example.com") | |
# Assert that the function returns a list of formatted hyperlinks | |
assert isinstance(result, list) | |
assert len(result) == 3 | |
assert result[0] == "Google (https://www.google.com)" | |
assert result[1] == "GitHub (https://github.com)" | |
assert result[2] == "CodiumAI (https://www.codium.ai)" | |