diff --git a/scrape.py b/scrape.py index 884555e..c709801 100644 --- a/scrape.py +++ b/scrape.py @@ -12,6 +12,7 @@ class WebflowScraper: self.output_dir = output_dir self.visited_urls = set() self.session = requests.Session() + # doesn't seem to work https://stackoverflow.com/a/70647581 self.encoding='utf-8' # Set up logging @@ -146,6 +147,7 @@ class WebflowScraper: form['action'] = relative_path # Handle UTF-8 encoding issues + # Doesn't seem to work, website still has non-standard chars html_content = str(soup) html_content = html_content.encode('utf-8', 'replace').decode('utf-8')