不能使用 ele.get_text()
,需要使用 ele.contents
。
soup = None
with open(filename, "rb") as f:
content = f.read()
soup = BeautifulSoup(content, "html.parser")
if not soup:
print("parse file failed: {}".format(filename), file=sys.stderr)
return 0
for s in soup.find_all("script"):
for c in s.contents:
print(c)