You can download this code by clicking the button below.
This code is now available for download.
This function extracts all links from the given HTML content and converts them to absolute URLs. It uses the BeautifulSoup library to parse HTML and the requests library to handle relative URLs.
Technology Stack : BeautifulSoup, requests
Code Type : Python Function
Code Difficulty : Intermediate
def extract_links_from_html(html_content, base_url):
from bs4 import BeautifulSoup
import requests
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Extract all the anchor tags from the soup
links = soup.find_all('a')
# Create a list to store the absolute URLs
absolute_urls = []
# Iterate through each link and append the absolute URL to the list
for link in links:
href = link.get('href')
if href and not href.startswith('#'):
# Use requests to get the absolute URL if it's a relative link
if href.startswith('/'):
absolute_url = requests.get(base_url + href).url
else:
absolute_url = href
absolute_urls.append(absolute_url)
return absolute_urls