import requests
r = requests.get("https://www.ptt.cc/bbs/movie/index.html")
r
text = r.text
print(text)
pos = text.find('<div class="title">', pos+1)
print(pos)
print(text[pos:pos+100])
pos = 0
while True:
pos = text.find('<div class="r-ent">', pos)
if pos == -1:
break
print(text[pos:pos+500])
print("======================")
pos+=1
pos = 0
while True:
pos = text.find('<div class="r-ent">', pos)
if pos == -1:
break
pos = text.find('<a href=', pos)
print(text[pos:pos+100])
print("======================")
pos+=1
pos = 0
while True:
pos = text.find('<div class="r-ent">', pos)
if pos == -1:
break
pos = text.find('<a href=', pos)
pos2 = text.find('</a>',pos)
print(text[pos:pos2+4])
pos+=1
pos = 0
while True:
pos = text.find('<div class="r-ent">', pos)
if pos == -1:
break
pos = text.find('<a href=', pos)
pos = text.find('>', pos)
pos2 = text.find('</a>',pos)
print(text[pos+1:pos2])
pos+=1
pos = 0
while True:
pos = text.find('<div class="r-ent">', pos)
if pos == -1:
break
#得到讚數
#<div class="nrec"><span class="hl f1">爆</span></div>
like_pos = text.find('<span class="hl',pos)
like_pos = text.find('>', like_pos)
like_pos2 = text.find('<', like_pos)
print(text[like_pos+1:like_pos2])
#得到標題
pos = text.find('<a href=', pos)
pos = text.find('>', pos)
pos2 = text.find('</a>',pos)
print(text[pos+1:pos2])
pos+=1
def get_titles_from_html(html):
res = []
pos = 0
while True:
pos = html.find('<div class="r-ent">', pos)
if pos == -1:
break
#得到讚數
#<div class="nrec"><span class="hl f1">爆</span></div>
#like_pos = text.find('<span class="hl',pos)
#like_pos = text.find('>', like_pos)
#like_pos2 = text.find('<', like_pos)
#得到標題
pos = html.find('<a href=', pos)
pos = html.find('>', pos)
pos2 = html.find('</a>',pos)
res.append(text[pos+1:pos2])
pos+=1
return res
titles = get_titles_from_html(text)
titles
pos = text.find('href="/bbs/movie/index')
text[pos:pos+100]
pos = text.find('<a class="btn wide"')
text[pos:pos+100]
pos = text.find('最舊</a>')
text[pos:pos+100]
pos = text.find('最舊</a>')
pos = text.find('/bbs/movie',pos)
text[pos:pos+50]
pos = text.find('最舊</a>')
pos = text.find('/bbs/movie',pos)
pos2 = text.find('"',pos)
text[pos:pos2]
def get_prev_url(html):
pos = html.find('最舊</a>')
pos = html.find('/bbs/movie',pos)
pos2 = html.find('"',pos)
html[pos:pos2]
url = "https://www.ptt.cc"+html[pos:pos2]
return url
get_prev_url(text)
url = get_prev_url(text)
r = requests.get(url)
text = r.text
titles += get_titles_from_html(text)
def get_titles_from_html(html):
res = []
pos = 0
while True:
pos = html.find('<div class="r-ent">', pos)
if pos == -1:
break
#得到讚數
#<div class="nrec"><span class="hl f1">爆</span></div>
#like_pos = text.find('<span class="hl',pos)
#like_pos = text.find('>', like_pos)
#like_pos2 = text.find('<', like_pos)
#得到標題
pos = html.find('<a href=', pos)
pos = html.find('>', pos)
pos2 = html.find('</a>',pos)
if pos == -1:
break
res.append(text[pos+1:pos2])
pos+=1
return res
def get_prev_url(html):
pos = html.find('最舊</a>')
pos = html.find('/bbs/movie',pos)
pos2 = html.find('"',pos)
html[pos:pos2]
url = "https://www.ptt.cc"+html[pos:pos2]
return url
import time
titles = []
url = "https://www.ptt.cc/bbs/movie/index.html"
for i in range(200):
r = requests.get(url)
text = r.text
titles += get_titles_from_html(text)
url = get_prev_url(text)
time.sleep(0.1)
for title in titles:
if '負' in title:
print(title)
len(titles)