The web page I need to scrape for data is after a login page. I have tried numerous methods to accomplish this but none seem to work. Can someone help? My code is below...
import requests
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)
AppleWebKit/537.36(KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
login_data = {
'appname': 'unknown',
'appversion': 'unknown',
'ostype': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36
(khtml, like gecko) chrome/70.0.3538.110 safari/537.36',
'type': 'null',
'ssobypass': 'true',
'dirlogin': 'true',
'inch': 'true',
'scrWidth': '1920',
'scrHeight': '1040',
'username': 'TA_KAITM_B_4a',
'userpassword': ''}
with requests.Session() as s:
url = "http://cmis.ittdublin.ie"
r = s.get(url, headers=headers)
soup = BeautifulSoup(r.content, 'lxml')
r = s.post(url, data=login_data, headers=headers)
print(r.content)
It will not allow me to add the HTML of the log in screen here... Below is code, if run will return the HTML of the login page...
import requests
from lxml import html
session_requests = requests.session()
login_url = "http://cmis.ittdublin.ie/eportal/index.jsp"
result = session_requests.get(login_url)
payload = {
"username": "TA_KAITM_B_4a"
}
result = session_requests.post(
login_url,
data = payload,
headers = dict(referer=login_url)
)
print(result.text)
url = 'http://cmis.ittdublin.ie/eportal/index.jsp'
result = session_requests.get(
url,
headers = dict(referer = url)
)