All Links from given Webpage

2024-05-10 23:05:21 +03:00
parent 25eaa39813
commit 4dc96bb5ea
4 changed files with 33 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -1,2 +1,14 @@
-# All_Links_from_given_Webpage
+# All Links from given Webpage
 Этот скрипт извлекает все ссылки с заданной веб-страницы и сохраняет их в виде текстового файла.
 > BeautifulSoup4
 > requests
 to install:
 ```
 pip install -r req.pip
 python get_links.py
 ```
 Затем вас спросят, какую веб-страницу вы хотите проанализировать. После этого извлеченные ссылки будут сохранены в виде массива в myLinks.txt.
--- a/get_links.py
+++ b/get_links.py
@@ -0,0 +1,17 @@
 import requests as rq
 from bs4 import BeautifulSoup
 url = input("Enter Link: ")
 if ("https" or "http") in url:
    data = rq.get(url)
 else:
    data = rq.get("https://" + url)
 soup = BeautifulSoup(data.text, "html.parser")
 links = []
 for link in soup.find_all("a"):
    links.append(link.get("href"))
 # Writing the output to a file (myLinks.txt) instead of to stdout
 # You can change 'a' to 'w' to overwrite the file each time
 with open("myLinks.txt", 'a') as saved:
    print(links[:10], file=saved)
--- a/myLinks.txt
+++ b/myLinks.txt
@@ -0,0 +1 @@
 ['https://trk.mail.ru/c/i02y74?mt_sub1=home', 'https://trk.mail.ru/c/cptmm9?mt_sub1=home', 'https://trk.mail.ru/c/psc3a5?mt_sub1=home', 'https://trk.mail.ru/c/jr23b4?mt_sub1=home', 'https://trk.mail.ru/c/gqhkg6?mt_sub1=mail.ru&mt_campaign=newpromomail&mt_sub2=navinew', 'https://trk.mail.ru/c/fvhzw7?mt_sub1=home', 'https://trk.mail.ru/c/t6ks72?mt_sub1=home', 'https://trk.mail.ru/c/bwzm48?mt_sub1=home', 'https://trk.mail.ru/c/yr65t1?mt_sub1=home', 'https://trk.mail.ru/c/bgt8c3?mt_sub1=home']
--- a/req.pip
+++ b/req.pip
@@ -0,0 +1,2 @@
 beautifulsoup4==4.12.3
 requests==2.31.0
		`@@ -0,0 +1 @@`
							`['https://trk.mail.ru/c/i02y74?mt_sub1=home', 'https://trk.mail.ru/c/cptmm9?mt_sub1=home', 'https://trk.mail.ru/c/psc3a5?mt_sub1=home', 'https://trk.mail.ru/c/jr23b4?mt_sub1=home', 'https://trk.mail.ru/c/gqhkg6?mt_sub1=mail.ru&mt_campaign=newpromomail&mt_sub2=navinew', 'https://trk.mail.ru/c/fvhzw7?mt_sub1=home', 'https://trk.mail.ru/c/t6ks72?mt_sub1=home', 'https://trk.mail.ru/c/bwzm48?mt_sub1=home', 'https://trk.mail.ru/c/yr65t1?mt_sub1=home', 'https://trk.mail.ru/c/bgt8c3?mt_sub1=home']`