Koreliler Okula Kaç Yaşında Başlar ?

Baris

New member
from selenium.common.exceptions import WebDriverException

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.chrome.service import Service

from webdriver_manager.chrome import ChromeDriverManager

from selenium import webdriver

from selenium.webdriver.common.keys import Keys

from selenium.webdriver.common.by import By

from bs4 import BeautifulSoup

import time

import os

from slugify import slugify

def deleteTarih(text):

locControl = 0

for i in range(len(text)):

if text == '.':

if i + 1 < len(text):

if text[i + 1].isnumeric():

loc = i + 1

locControl = 1

if locControl == 1:

return text[0:loc]

else:

return text

##### ARAMA ######

text = "Arı kuşu"

txtFolder = "başlık_txtler"

print("----- Bot Başarıyla Başlatıldı -----")

options = webdriver.ChromeOptions()

options.add_argument("headless")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

driver.maximize_window()

driver.implicitly_wait(30)

wait = WebDriverWait(driver, 30)

driver.implicitly_wait(20)

driver.get("https://www.google.com/")

if not os.path.exists(txtFolder):

os.makedirs(txtFolder)

with open("yeni2.txt", "r", encoding="utf-8") as f:

lines = f.readlines()

for line in lines:

m = driver.find_element("name", "q")

m.send_keys(line.strip())

m.send_keys(Keys.ENTER)

time.sleep(2)

for i in range(4):

try:

test = driver.find_element(By.XPATH, "//div[@jsname='yEVEwb'][" + str(i + 1) + "]")

test.click()

time.sleep(2)

except WebDriverException:

print("Hata: Sayfa yüklenirken beklenmeyen bir hata oluştu. Devam ediliyor...")

continue

soup = BeautifulSoup(driver.page_source, "html.parser")

for idx, link in enumerate(soup.find_all("div", {"jsname": "yEVEwb"})):

header = link.find_all_next("div", {"jsaction": "AWEk5c"})[0].find_all_next("div")[0].find_all_next("span")[

0].text

content = link.find_all_next("div", {"jsname": "oQYOj"})[0].find_all_next("div")

if len(content) > 0 and len(content[0].text.split()) >= 8: # İçerik 8 kelime veya daha fazla kontrolü

print(header)

contentW = deleteTarih(content[0].text)

if contentW == "":

contentW = deleteTarih(content[0].find_next_siblings("div")[0].text)

# Tire (-) işaretlerini boşluklarla değiştir

file_name = slugify(header, separator="_", remove=None)

with open(os.path.join(txtFolder, file_name + ".txt"), 'w', encoding='utf-8') as f:

f.write(header)

f.write('\n')

f.write(contentW)

driver.back()

time.sleep(2)

# İlgili kelimenin silinmesi

with open('yeni2.txt', 'r+', encoding='utf-8') as f:

lines_temp = f.readlines()

f.seek(0) # Dosyanın başına dön

for l in lines_temp:

if l.strip() != line.strip():

f.write(l)

f.truncate() # Dosyanın sonundaki gereksiz kısmı kes

print("***** Bot Başarıyla Tamamlandı Ve Başlık TXT Dosyaları Oluşturuldu. *****")