{"id":261941,"date":"2023-02-24T09:48:20","date_gmt":"2023-02-24T01:48:20","guid":{"rendered":"https:\/\/gulass.cn\/?p=261941"},"modified":"2023-01-19T11:50:15","modified_gmt":"2023-01-19T03:50:15","slug":"quickly-complete-crawler","status":"publish","type":"post","link":"https:\/\/gulass.cn\/quickly-complete-crawler.html","title":{"rendered":"\u7b80\u5355\u51e0\u6b65\u5feb\u901f\u5b8c\u6210\u722c\u866b\u4efb\u52a1"},"content":{"rendered":"
\r\nfrom selenium import webdriver\r\nfrom time import sleep\r\n#from selenium.webdriver.chrome.options import Options\r\n#import xlrd\r\nimport csv\r\nimport os\r\n#\u56fa\u5b9acsv\u4fdd\u5b58\u5728\u684c\u9762\r\nos.chdir(r'C:\\Users\\Administrator\\Desktop')\r\n<\/pre>\n\u4e8c\u3001\u6253\u5f00\u7f51\u9875<\/strong><\/div>\n\r\n# \u4f7f\u7528webkit\u65e0\u754c\u9762\u6d4f\u89c8\u5668\r\n# \u5982\u679c\u8def\u5f84\u4e3a exe \u542f\u52a8\u7a0b\u5e8f\u7684\u8def\u5f84\uff0c\u90a3\u4e48\u8be5\u8def\u5f84\u9700\u8981\u52a0\u4e00\u4e2a r\r\ndriver =webdriver.Firefox()\r\n# \u83b7\u53d6\u6307\u5b9a\u7f51\u9875\u7684\u6570\u636e start_urls\r\ndriver.get('https:\/\/movie.douban.com\/top250')\r\ndriver.implicitly_wait(20)\r\n<\/pre>\n\u4e09\u3001\u7ffb\u9875\u3001\u83b7\u53d6\u5185\u5bb9\u3001\u5199\u5165CSV<\/strong><\/div>\n\r\n#\u904d\u5386\u5faa\u73af20\u6b21\r\nfor o in range (1,13):\r\n #\u904d\u5386\u5faa\u73af15\u6b21\r\n sleep(0.5)\r\n for i in range (1,26):\r\n #\u83b7\u53d6\u6807\u9898\u548c\u65f6\u95f4\r\n #\u62fc\u63a5\u5b57\u7b26\u4e32\r\n data1 = driver.find_element_by_css_selector('.grid_view > li:nth-child('+str(i)+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > a:nth-child(1) > span:nth-child(1)').text\r\n data2 = driver.find_element_by_css_selector('.grid_view > li:nth-child('+str(i)+') > div:nth-child(1) > div:nth-child(2) > div:nth-child(2) > p:nth-child(3) > span:nth-child(1)').text\r\n print(data1, data2)\r\n #\u5199\u5165csv\r\n with open('\u8c46\u74e3.csv', 'a+', newline = '\\n')as f:\r\n w = csv.writer(f)\r\n w.writerow([data1, data2]) \r\n sleep(0.5)\r\n #\u5e76\u8fdb\u884c\u70b9\u51fb\u7ffb\u9875\r\n driver.find_element_by_css_selector('.next > a:nth-child(2)').click() \r\n \r\n\r\ndriver.quit()\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"from selenium import webdriver from time import sleep # […]<\/p>\n","protected":false},"author":1903,"featured_media":262249,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[55],"tags":[],"class_list":["post-261941","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-thread"],"acf":[],"_links":{"self":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/261941","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/users\/1903"}],"replies":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/comments?post=261941"}],"version-history":[{"count":2,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/261941\/revisions"}],"predecessor-version":[{"id":261947,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/261941\/revisions\/261947"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/media\/262249"}],"wp:attachment":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/media?parent=261941"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/categories?post=261941"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/tags?post=261941"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}