add utf-8 not working note
This commit is contained in:
parent
c72a9c4f14
commit
89569a687c
@ -12,6 +12,7 @@ class WebflowScraper:
|
|||||||
self.output_dir = output_dir
|
self.output_dir = output_dir
|
||||||
self.visited_urls = set()
|
self.visited_urls = set()
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
|
# doesn't seem to work https://stackoverflow.com/a/70647581
|
||||||
self.encoding='utf-8'
|
self.encoding='utf-8'
|
||||||
|
|
||||||
# Set up logging
|
# Set up logging
|
||||||
@ -146,6 +147,7 @@ class WebflowScraper:
|
|||||||
form['action'] = relative_path
|
form['action'] = relative_path
|
||||||
|
|
||||||
# Handle UTF-8 encoding issues
|
# Handle UTF-8 encoding issues
|
||||||
|
# Doesn't seem to work, website still has non-standard chars
|
||||||
html_content = str(soup)
|
html_content = str(soup)
|
||||||
html_content = html_content.encode('utf-8', 'replace').decode('utf-8')
|
html_content = html_content.encode('utf-8', 'replace').decode('utf-8')
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user