From be2b059b1dc409d75fe550f5a741b54b7eadb1c6 Mon Sep 17 00:00:00 2001 From: janis Date: Mon, 2 May 2022 15:39:09 +0200 Subject: [PATCH] Added a new web scrapper --- .idea/test scrapping.iml | 2 +- alpha_modules/top_games.py | 36 +++++++++++++++++++++++++++++++ bin/lib/website_source_grabber.py | 14 ++++++++++++ data/prices.csv | 6 +----- data/product_info/mbd.csv | 4 ++-- dev_tools/notes.txt | 1 + dev_tools/testfile.py | 3 +++ dev_tools/website-requestor.py | 25 +++++++++++++++++++++ 8 files changed, 83 insertions(+), 8 deletions(-) create mode 100644 alpha_modules/top_games.py create mode 100644 bin/lib/website_source_grabber.py create mode 100644 dev_tools/notes.txt create mode 100644 dev_tools/testfile.py create mode 100644 dev_tools/website-requestor.py diff --git a/.idea/test scrapping.iml b/.idea/test scrapping.iml index 9a323b0..74d515a 100644 --- a/.idea/test scrapping.iml +++ b/.idea/test scrapping.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/alpha_modules/top_games.py b/alpha_modules/top_games.py new file mode 100644 index 0000000..0528f34 --- /dev/null +++ b/alpha_modules/top_games.py @@ -0,0 +1,36 @@ +import bin.lib.website_source_grabber + + +class TopGamesUpdater: + def __init__(self): + self.__get_source = "" + self.__index = 0 + self.__extracted = "" + self.letter = "" + self.__output = "" + self.__source = "" + self.__go = 1 + + def updater(self): + self.__get_source = bin.lib.website_source_grabber.WebsiteSourceGrabber().grabber() + print("ok") + self.list_generator() + # while self.__go == 1: + # self.list_generator() + + def list_generator(self): + self.__source = self.__get_source[self.__index + 100:] + try: + self.__index = self.__source.index("
") + self.__extracted = self.__source[self.__index + 80:self.__index + 200] + for self.letter in self.__extracted: + if self.letter == "<": + break + else: + self.__output += self.letter + print(self.__output) + except ValueError: + self.__go = 0 + + +TopGamesUpdater().updater() diff --git a/bin/lib/website_source_grabber.py b/bin/lib/website_source_grabber.py new file mode 100644 index 0000000..12597bf --- /dev/null +++ b/bin/lib/website_source_grabber.py @@ -0,0 +1,14 @@ +import requests + + +class WebsiteSourceGrabber: + def __init__(self): + self.__website = "" + self.__res = "" + self.__request_done = "" + + def grabber(self): + self.__website = "https://store.steampowered.com/search/?filter=topsellers" + self.__res = requests.get(self.__website) + self.__request_done = self.__res.text + return self.__request_done diff --git a/data/prices.csv b/data/prices.csv index 8fe97fe..dc9cdf2 100644 --- a/data/prices.csv +++ b/data/prices.csv @@ -1,5 +1 @@ -version from,2022-02-26 11:06:52.832345 -1,495.0 -2,290.0 -3,9.3 -4,359.0 +version from,2022-04-25 16:23:46.891576 diff --git a/data/product_info/mbd.csv b/data/product_info/mbd.csv index 911bacf..5782268 100644 --- a/data/product_info/mbd.csv +++ b/data/product_info/mbd.csv @@ -1,3 +1,3 @@ simplePCBuilding-Motherboard-data-Library -ID,SOCKET,CHIPSET,SIZE,RAM_SUPPORT,MaxRAMCapacity,PCIeSLOTS(x16),NVMeSLOTS,FAN_HEADERS,ARGB_HEADERS,RGB12V_Headers,WiFi,S-ATA-CONNECTORS,F_USB_3.x,F_USB_2.x,F_USB_C,EPS12V_Pins -1,AM4,Z590,ATX,DDR4-3200,128,2,2,4,2,2,y,6,2,3,0,12 \ No newline at end of file +ID,SOCKET,CHIPSET,SIZE,RAM_SUPPORT,RAM_Slots,MaxRAMCapacity,PCIeSLOTS(x16),NVMeSLOTS,FAN_HEADERS,ARGB_HEADERS,RGB12V_Headers,WiFi,S-ATA-CONNECTORS,F_USB_3.x,F_USB_2.x,F_USB_C,EPS12V_Pins,WiFi,Name +1,AM4,X570,ATX,DDR4-3200,4,128,2,2,4,2,2,y,6,2,3,0,12,y,MSI MPG X570 Gaming Edge WiFi \ No newline at end of file diff --git a/dev_tools/notes.txt b/dev_tools/notes.txt new file mode 100644 index 0000000..c5ef753 --- /dev/null +++ b/dev_tools/notes.txt @@ -0,0 +1 @@ +
--> Search term \ No newline at end of file diff --git a/dev_tools/testfile.py b/dev_tools/testfile.py new file mode 100644 index 0000000..50b308d --- /dev/null +++ b/dev_tools/testfile.py @@ -0,0 +1,3 @@ +a = "testtest" + +a.index("a") \ No newline at end of file diff --git a/dev_tools/website-requestor.py b/dev_tools/website-requestor.py new file mode 100644 index 0000000..88194c3 --- /dev/null +++ b/dev_tools/website-requestor.py @@ -0,0 +1,25 @@ +import requests +import pyperclip as pc + +while True: + website = str(input("Website to get the sourcecode from: ")) + print(website) + + if website == "q": + print("Leaving") + break + else: + res = requests.get(website) + print(res.text) + try: + pc.copy(res.text) + print(""" + +-------------------------- + + COPIED + +-------------------------- +""") + except: + pass