Added a new web scrapper

This commit is contained in:
janis
2022-05-02 15:39:09 +02:00
parent 0c46453064
commit be2b059b1d
8 changed files with 83 additions and 8 deletions

View File

@@ -4,7 +4,7 @@
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@@ -0,0 +1,36 @@
import bin.lib.website_source_grabber
class TopGamesUpdater:
def __init__(self):
self.__get_source = ""
self.__index = 0
self.__extracted = ""
self.letter = ""
self.__output = ""
self.__source = ""
self.__go = 1
def updater(self):
self.__get_source = bin.lib.website_source_grabber.WebsiteSourceGrabber().grabber()
print("ok")
self.list_generator()
# while self.__go == 1:
# self.list_generator()
def list_generator(self):
self.__source = self.__get_source[self.__index + 100:]
try:
self.__index = self.__source.index("<div class=\"col search_name ellipsis\">")
self.__extracted = self.__source[self.__index + 80:self.__index + 200]
for self.letter in self.__extracted:
if self.letter == "<":
break
else:
self.__output += self.letter
print(self.__output)
except ValueError:
self.__go = 0
TopGamesUpdater().updater()

View File

@@ -0,0 +1,14 @@
import requests
class WebsiteSourceGrabber:
def __init__(self):
self.__website = ""
self.__res = ""
self.__request_done = ""
def grabber(self):
self.__website = "https://store.steampowered.com/search/?filter=topsellers"
self.__res = requests.get(self.__website)
self.__request_done = self.__res.text
return self.__request_done

View File

@@ -1,5 +1 @@
version from,2022-02-26 11:06:52.832345
1,495.0
2,290.0
3,9.3
4,359.0
version from,2022-04-25 16:23:46.891576
1 version from 2022-02-26 11:06:52.832345 2022-04-25 16:23:46.891576
1 495.0
2 290.0
3 9.3
4 359.0

View File

@@ -1,3 +1,3 @@
simplePCBuilding-Motherboard-data-Library
ID,SOCKET,CHIPSET,SIZE,RAM_SUPPORT,MaxRAMCapacity,PCIeSLOTS(x16),NVMeSLOTS,FAN_HEADERS,ARGB_HEADERS,RGB12V_Headers,WiFi,S-ATA-CONNECTORS,F_USB_3.x,F_USB_2.x,F_USB_C,EPS12V_Pins
1,AM4,Z590,ATX,DDR4-3200,128,2,2,4,2,2,y,6,2,3,0,12
ID,SOCKET,CHIPSET,SIZE,RAM_SUPPORT,RAM_Slots,MaxRAMCapacity,PCIeSLOTS(x16),NVMeSLOTS,FAN_HEADERS,ARGB_HEADERS,RGB12V_Headers,WiFi,S-ATA-CONNECTORS,F_USB_3.x,F_USB_2.x,F_USB_C,EPS12V_Pins,WiFi,Name
1,AM4,X570,ATX,DDR4-3200,4,128,2,2,4,2,2,y,6,2,3,0,12,y,MSI MPG X570 Gaming Edge WiFi
1 simplePCBuilding-Motherboard-data-Library
2 ID,SOCKET,CHIPSET,SIZE,RAM_SUPPORT,MaxRAMCapacity,PCIeSLOTS(x16),NVMeSLOTS,FAN_HEADERS,ARGB_HEADERS,RGB12V_Headers,WiFi,S-ATA-CONNECTORS,F_USB_3.x,F_USB_2.x,F_USB_C,EPS12V_Pins ID,SOCKET,CHIPSET,SIZE,RAM_SUPPORT,RAM_Slots,MaxRAMCapacity,PCIeSLOTS(x16),NVMeSLOTS,FAN_HEADERS,ARGB_HEADERS,RGB12V_Headers,WiFi,S-ATA-CONNECTORS,F_USB_3.x,F_USB_2.x,F_USB_C,EPS12V_Pins,WiFi,Name
3 1,AM4,Z590,ATX,DDR4-3200,128,2,2,4,2,2,y,6,2,3,0,12 1,AM4,X570,ATX,DDR4-3200,4,128,2,2,4,2,2,y,6,2,3,0,12,y,MSI MPG X570 Gaming Edge WiFi

1
dev_tools/notes.txt Normal file
View File

@@ -0,0 +1 @@
<div class="col search_name ellipsis"> --> Search term

3
dev_tools/testfile.py Normal file
View File

@@ -0,0 +1,3 @@
a = "testtest"
a.index("a")

View File

@@ -0,0 +1,25 @@
import requests
import pyperclip as pc
while True:
website = str(input("Website to get the sourcecode from: "))
print(website)
if website == "q":
print("Leaving")
break
else:
res = requests.get(website)
print(res.text)
try:
pc.copy(res.text)
print("""
--------------------------
COPIED
--------------------------
""")
except:
pass