Baris
New member
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import os
from slugify import slugify
def deleteTarih(text):
locControl = 0
for i in range(len(text)):
if text == '.':
if i + 1 < len(text):
if text[i + 1].isnumeric():
loc = i + 1
locControl = 1
if locControl == 1:
return text[0:loc]
else:
return text
##### ARAMA ######
text = "Arı kuşu"
txtFolder = "başlık_txtler"
print("----- Bot Başarıyla Başlatıldı -----")
options = webdriver.ChromeOptions()
options.add_argument("headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.implicitly_wait(20)
driver.get("https://www.google.com/")
if not os.path.exists(txtFolder):
os.makedirs(txtFolder)
with open("yeni2.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
m = driver.find_element("name", "q")
m.send_keys(line.strip())
m.send_keys(Keys.ENTER)
time.sleep(2)
for i in range(4):
try:
test = driver.find_element(By.XPATH, "//div[@jsname='yEVEwb'][" + str(i + 1) + "]")
test.click()
time.sleep(2)
except WebDriverException:
print("Hata: Sayfa yüklenirken beklenmeyen bir hata oluştu. Devam ediliyor...")
continue
soup = BeautifulSoup(driver.page_source, "html.parser")
for idx, link in enumerate(soup.find_all("div", {"jsname": "yEVEwb"})):
header = link.find_all_next("div", {"jsaction": "AWEk5c"})[0].find_all_next("div")[0].find_all_next("span")[
0].text
content = link.find_all_next("div", {"jsname": "oQYOj"})[0].find_all_next("div")
if len(content) > 0 and len(content[0].text.split()) >= 8: # İçerik 8 kelime veya daha fazla kontrolü
print(header)
contentW = deleteTarih(content[0].text)
if contentW == "":
contentW = deleteTarih(content[0].find_next_siblings("div")[0].text)
# Tire (-) işaretlerini boşluklarla değiştir
file_name = slugify(header, separator="_", remove=None)
with open(os.path.join(txtFolder, file_name + ".txt"), 'w', encoding='utf-8') as f:
f.write(header)
f.write('\n')
f.write(contentW)
driver.back()
time.sleep(2)
# İlgili kelimenin silinmesi
with open('yeni2.txt', 'r+', encoding='utf-8') as f:
lines_temp = f.readlines()
f.seek(0) # Dosyanın başına dön
for l in lines_temp:
if l.strip() != line.strip():
f.write(l)
f.truncate() # Dosyanın sonundaki gereksiz kısmı kes
print("***** Bot Başarıyla Tamamlandı Ve Başlık TXT Dosyaları Oluşturuldu. *****")
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
import os
from slugify import slugify
def deleteTarih(text):
locControl = 0
for i in range(len(text)):
if text == '.':
if i + 1 < len(text):
if text[i + 1].isnumeric():
loc = i + 1
locControl = 1
if locControl == 1:
return text[0:loc]
else:
return text
##### ARAMA ######
text = "Arı kuşu"
txtFolder = "başlık_txtler"
print("----- Bot Başarıyla Başlatıldı -----")
options = webdriver.ChromeOptions()
options.add_argument("headless")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.maximize_window()
driver.implicitly_wait(30)
wait = WebDriverWait(driver, 30)
driver.implicitly_wait(20)
driver.get("https://www.google.com/")
if not os.path.exists(txtFolder):
os.makedirs(txtFolder)
with open("yeni2.txt", "r", encoding="utf-8") as f:
lines = f.readlines()
for line in lines:
m = driver.find_element("name", "q")
m.send_keys(line.strip())
m.send_keys(Keys.ENTER)
time.sleep(2)
for i in range(4):
try:
test = driver.find_element(By.XPATH, "//div[@jsname='yEVEwb'][" + str(i + 1) + "]")
test.click()
time.sleep(2)
except WebDriverException:
print("Hata: Sayfa yüklenirken beklenmeyen bir hata oluştu. Devam ediliyor...")
continue
soup = BeautifulSoup(driver.page_source, "html.parser")
for idx, link in enumerate(soup.find_all("div", {"jsname": "yEVEwb"})):
header = link.find_all_next("div", {"jsaction": "AWEk5c"})[0].find_all_next("div")[0].find_all_next("span")[
0].text
content = link.find_all_next("div", {"jsname": "oQYOj"})[0].find_all_next("div")
if len(content) > 0 and len(content[0].text.split()) >= 8: # İçerik 8 kelime veya daha fazla kontrolü
print(header)
contentW = deleteTarih(content[0].text)
if contentW == "":
contentW = deleteTarih(content[0].find_next_siblings("div")[0].text)
# Tire (-) işaretlerini boşluklarla değiştir
file_name = slugify(header, separator="_", remove=None)
with open(os.path.join(txtFolder, file_name + ".txt"), 'w', encoding='utf-8') as f:
f.write(header)
f.write('\n')
f.write(contentW)
driver.back()
time.sleep(2)
# İlgili kelimenin silinmesi
with open('yeni2.txt', 'r+', encoding='utf-8') as f:
lines_temp = f.readlines()
f.seek(0) # Dosyanın başına dön
for l in lines_temp:
if l.strip() != line.strip():
f.write(l)
f.truncate() # Dosyanın sonundaki gereksiz kısmı kes
print("***** Bot Başarıyla Tamamlandı Ve Başlık TXT Dosyaları Oluşturuldu. *****")