new file structure and some changes to the main file

2022-05-16 08:04:15 +02:00
parent 028b94c26c
commit e6634480c6
3 changed files with 9 additions and 3 deletions
--- a/bin/webscratching/price_extractor_objectoriented.py
+++ b/bin/webscratching/price_extractor_objectoriented.py
@@ -0,0 +1,82 @@
+"""@package docstring
+This package extracts prices from websites. Currently, the package only allows for extraction of prices from
+https://digitec.ch/"""
+
+import requests
+import csv
+import datetime
+
+
+class PriceExtractor:
+    def __init__(self):
+        with open("../../data/prices.csv", "w") as pricedata:
+            self.__writing = csv.writer(pricedata, delimiter=',', quoting=csv.QUOTE_MINIMAL)
+            self.__writing.writerow(["version from", datetime.datetime.now()])
+        self.__imp = open("../../data/products.csv", "r")
+        self.__raw_imp = csv.reader(self.__imp, delimiter=',')
+        self.__raw_list = list(self.__raw_imp)
+        self.__productcount = len(self.__raw_list)
+        print("needing to update", self.__productcount, "prices")
+        self.__productnumber = 0
+        self.__website = ""
+        self.__ingest = ""
+        self.__res = ""
+        self.__priceIdx = 0
+        self.__check = 0
+        self.__request_done = ""
+        self.__raw_price = ""
+        self.__price = 0
+        self.__price_extract = 0
+
+    def readfile(self, filename):
+        """Reads a new file that contains links in csv format. Arguments:
+        Filename. Either specify full path (e.g. /home/[username]/webscratching/prices.csv), relative path when
+        inside the folder of the executable (e.g. /pricedata/prices2.csv) or inside another folder that is located in
+        the parent folder (e.g. ../pricedata/prices3.csv).
+        Returns the content of the file inside of a list.
+
+        Example: prices = PriceExtractor.readfile(/pricedata/prices2.csv) (NOTE: prices is a list in this case!)"""
+        with open(filename, "w") as pricedata:
+            self.__writing = csv.writer(pricedata, delimiter=',', quoting=csv.QUOTE_MINIMAL)
+            self.__writing.writerow(["version from", datetime.datetime.now()])
+        self.__imp = open("../../data/products.csv", "r")
+        self.__raw_imp = csv.reader(self.__imp, delimiter=',')
+        self.__raw_list = list(self.__raw_imp)
+        self.__productcount = len(self.__raw_list)
+        print("needing to update", self.__productcount, "prices")
+        self.__productnumber = 0
+        return self.__raw_list
+
+    def digitec_extractor(self):
+        """Run through the entire list of links specified in the csv file that was selected either when loading the
+        function or when specified through the method \"readfile\". NOTE: This method does not require any additional
+        arguments and also does run through the entire file!"""
+        while self.__productnumber < self.__productcount:
+            self.__ingest = self.__raw_list.pop(0)
+            self.__website = self.__ingest.pop(1)
+            self.__productnumber = int(self.__ingest.pop(0))
+            print("fetching data... This step might take a couple of seconds")
+            self.__res = requests.get(self.__website)
+            print("recieved data from", self.__website)
+            self.__check = str(self.__res)
+            if self.__check == "<Response [404]>":
+                print("Ressource unavailable, skipping..")
+            else:
+                self.__request_done = self.__res.text
+                self.__priceIdx = self.__request_done.index('property="product:price:amount')
+                self.__raw_price = self.__request_done[self.__priceIdx + 41:self.__priceIdx + 60]
+                self.__price_extract = ""
+                for buchstabe in self.__raw_price:
+                    if buchstabe == "\"":
+                        break
+                    else:
+                        self.__price_extract += buchstabe
+                self.__price = float(self.__price_extract)
+                print("The price is following: ", self.__price, "CHF\n")
+                with open("../../data/prices.csv", "a") as pricedata:
+                    writing = csv.writer(pricedata, delimiter=',', quoting=csv.QUOTE_MINIMAL)
+                    writing.writerow([self.__productnumber, self.__price])
+
+
+digitec_ext = PriceExtractor()
+digitec_ext.digitec_extractor()
--- a/bin/webscratching/top_games.py
+++ b/bin/webscratching/top_games.py
@@ -0,0 +1,41 @@
+import bin.lib.website_source_grabber
+
+
+class TopGamesUpdater:
+    def __init__(self):
+        self.__get_source = ""
+        self.__index = 0
+        self.__extracted = ""
+        self.letter = ""
+        self.__output = ""
+        self.__source = ""
+        self.__go = 1
+        self.__location = 0
+        self.err = ""
+        self.__return_value = []
+
+    def updater(self):
+        self.__source = bin.lib.website_source_grabber.WebsiteSourceGrabber().grabber("https://store.steampowered.com/search/?filter=topsellers")
+        self.list_generator()
+        return self.__return_value
+
+    def list_generator(self):
+        while self.__go == 1:
+            try:
+                self.__index = self.__source[self.__location:].index("<div class=\"col search_name ellipsis\">")
+                self.__index += 80
+                self.__location += self.__index
+                self.__extracted = self.__source[self.__location:self.__location + 120]
+                self.__output = ""
+                for self.letter in self.__extracted:
+                    if self.letter == "<":
+                        break
+                    else:
+                        self.__output += self.letter
+                self.__return_value.append(self.__output)
+
+            except ValueError:
+                self.__go = 0
+
+
+TopGamesUpdater().updater()