add utf-8 not working note

This commit is contained in:
zramsay 2024-11-05 08:43:18 -05:00
parent c72a9c4f14
commit 89569a687c

View File

@ -12,6 +12,7 @@ class WebflowScraper:
self.output_dir = output_dir self.output_dir = output_dir
self.visited_urls = set() self.visited_urls = set()
self.session = requests.Session() self.session = requests.Session()
# doesn't seem to work https://stackoverflow.com/a/70647581
self.encoding='utf-8' self.encoding='utf-8'
# Set up logging # Set up logging
@ -146,6 +147,7 @@ class WebflowScraper:
form['action'] = relative_path form['action'] = relative_path
# Handle UTF-8 encoding issues # Handle UTF-8 encoding issues
# Doesn't seem to work, website still has non-standard chars
html_content = str(soup) html_content = str(soup)
html_content = html_content.encode('utf-8', 'replace').decode('utf-8') html_content = html_content.encode('utf-8', 'replace').decode('utf-8')