edx-oauth2-wordpress-backend/wp_oauth_backend/wp_oauth.py

302 lines
14 KiB
Python
Raw Normal View History

"""
written by: Lawrence McDaniel
https://lawrencemcdaniel.com
date: oct-2022
2022-10-04 17:58:18 +03:00
usage: subclass of BaseOAuth2 Third Party Authtencation client to
handle the field mapping and data conversions between
the dict that WP Oauth returns versus the dict that Open edX
actually needs.
"""
2022-10-03 19:46:35 +03:00
import json
from urllib.parse import urlencode
from urllib.request import urlopen
from logging import getLogger
2022-10-03 19:46:35 +03:00
from social_core.backends.oauth import BaseOAuth2
from django.contrib.auth import get_user_model
2022-10-03 19:46:35 +03:00
User = get_user_model()
2022-10-03 19:46:35 +03:00
logger = getLogger(__name__)
VERBOSE_LOGGING = True
2022-10-04 17:21:54 +03:00
class StepwiseMathWPOAuth2(BaseOAuth2):
"""
2022-10-04 17:58:18 +03:00
WP OAuth authentication backend customized for Open edX.
see https://python-social-auth.readthedocs.io/en/latest/backends/implementation.html
Notes:
- Python Social Auth social_core and/or Open edX's third party authentication core
are finicky about how the "properties" are implemented. Anything that actually
declared as a Python class variable needs to remain a Python class variable.
DO NOT refactor these into formal Python properties as something upstream will
break your code.
- for some reason adding an __init__() def to this class also causes something
upstream to break. If you try this then you'll get an error about a missing
positional argument, 'strategy'.
"""
_user_details = None
2022-10-04 17:58:18 +03:00
# This defines the backend name and identifies it during the auth process.
# The name is used in the URLs /login/<backend name> and /complete/<backend name>.
#
2022-10-04 17:21:54 +03:00
# This is the string value that will appear in the LMS Django Admin
# Third Party Authentication / Provider Configuration (OAuth)
# setup page drop-down box titled, "Backend name:", just above
# the "Client ID:" and "Client Secret:" fields.
2022-10-04 17:58:18 +03:00
name = 'stepwisemath-oauth'
2022-10-04 17:21:54 +03:00
# note: no slash at the end of the base url. Python Social Auth
# might clean this up for you, but i'm not 100% certain of that.
2022-10-04 17:58:18 +03:00
BASE_URL = "https://stepwisemath.ai"
# The default key name where the user identification field is defined, its
# used in the auth process when some basic user data is returned. This Id
# is stored in the UserSocialAuth.uid field and this, together with the
# UserSocialAuth.provider field, is used to uniquely identify a user association.
ID_KEY = 'id'
# Flags the backend to enforce email validation during the pipeline
# (if the corresponding pipeline social_core.pipeline.mail.mail_validation was enabled).
REQUIRES_EMAIL_VALIDATION = False
# Some providers give nothing about the user but some basic data like the
# user Id or an email address. The default scope attribute is used to
# specify a default value for the scope argument to request those extra bits.
2022-10-04 20:00:00 +03:00
#
# wp-oauth supports 4 scopes: basic, email, profile, openeid.
# we want the first three of these.
# see https://wp-oauth.com/docs/how-to/adding-supported-scopes/
DEFAULT_SCOPE = ['basic', 'profile', 'email']
2022-10-04 17:58:18 +03:00
# Specifying the method type required to retrieve your access token if its
# not the default GET request.
ACCESS_TOKEN_METHOD = 'POST'
# require redirect domain to match the original initiating domain.
SOCIAL_AUTH_SANITIZE_REDIRECTS = True
# During the auth process some basic user data is returned by the provider
# or retrieved by the user_data() method which usually is used to call
# some API on the provider to retrieve it. This data will be stored in the
# UserSocialAuth.extra_data attribute, but to make it accessible under some
# common names on different providers, this attribute defines a list of
# tuples in the form (name, alias) where name is the key in the user data
# (which should be a dict instance) and alias is the name to store it on extra_data.
EXTRA_DATA = [
('id', 'id'),
('is_superuser', 'is_superuser'),
('is_staff', 'is_staff'),
('date_joined', 'date_joined'),
]
2022-10-04 17:21:54 +03:00
# the value of the scope separator is user-defined. Check the
# scopes field value for your oauth client in your wordpress host.
# the wp-oauth default value for scopes is 'basic' but can be
# changed to a list. example 'basic, email, profile'. This
# list can be delimited with commas, spaces, whatever.
2022-10-04 17:58:18 +03:00
SCOPE_SEPARATOR = " "
2022-10-03 19:46:35 +03:00
2022-10-04 17:58:18 +03:00
# private utility function. not part of psa.
def _urlopen(self, url):
return urlopen(url).read().decode("utf-8")
2022-10-03 19:46:35 +03:00
2022-10-06 04:51:52 +03:00
def is_valid_user_details(self, response) -> bool:
if not type(response) == dict: return False
qc_keys = ['id', 'date_joined', 'email', 'first_name', 'fullname', 'is_staff', 'is_superuser', 'last_name', 'username']
if all(key in response for key in qc_keys): return True
return False
def is_wp_oauth_response(self, response) -> bool:
if not type(response) == dict: return False
qc_keys = ['ID' 'display_name', 'user_email', 'user_login', 'user_roles']
if all(key in response for key in qc_keys): return True
return False
2022-10-04 18:16:48 +03:00
# override Python Social Auth default end points.
2022-10-04 17:21:54 +03:00
# see https://wp-oauth.com/docs/general/endpoints/
2022-10-04 18:16:48 +03:00
#
# Note that we're only implementing Python properties
# so that we can include logging for diagnostic purposes.
2022-10-03 19:46:35 +03:00
@property
def AUTHORIZATION_URL(self) -> str:
2022-10-04 18:16:48 +03:00
retval = f"{self.BASE_URL}/oauth/authorize"
if VERBOSE_LOGGING:
logger.info('AUTHORIZATION_URL: {url}'.format(url=retval))
return retval
2022-10-03 19:46:35 +03:00
@property
def ACCESS_TOKEN_URL(self) -> str:
2022-10-04 18:16:48 +03:00
retval = f"{self.BASE_URL}/oauth/token"
if VERBOSE_LOGGING:
logger.info('ACCESS_TOKEN_URL: {url}'.format(url=retval))
return retval
2022-10-03 19:46:35 +03:00
2022-10-04 17:21:54 +03:00
@property
def USER_QUERY(self) -> str:
2022-10-04 18:16:48 +03:00
retval = f"{self.BASE_URL}/oauth/me"
if VERBOSE_LOGGING:
logger.info('USER_QUERY: {url}'.format(url=retval))
return retval
2022-10-04 17:21:54 +03:00
2022-10-06 03:35:44 +03:00
@property
def user_details(self) -> dict:
return self._user_details
@user_details.setter
def user_details(self, value: dict):
2022-10-06 04:51:52 +03:00
if self.is_valid_user_details(value):
self._user_details = value
else:
logger.error('user_details.setter: tried to pass an invalid object {value}'.format(
value=json.dumps(value, sort_keys=True, indent=4)
))
2022-10-06 04:36:27 +03:00
2022-10-04 17:21:54 +03:00
# see https://python-social-auth.readthedocs.io/en/latest/backends/implementation.html
2022-10-04 18:16:48 +03:00
# Return user details from the Wordpress user account
def get_user_details(self, response) -> dict:
tainted = False
if not response:
logger.warning('get_user_details() - response object is missing.')
tainted = True
if type(response)!=dict:
logger.warning('get_user_details() - was expecting a response object of type dict but received an object of type {t}'.format(
t=type(response)
))
tainted = True
if not tainted:
# a def in the third_party_auth pipeline list calls get_user_details() after its already
# been called once. i don't know why. but, it passes the original get_user_details() dict
# enhanced with additional token-related keys. if we receive this modified dict then we
# should pass it along to the next defs in the pipeline.
#
# If most of the original keys (see dict definition below) exist in the response object
# then we can assume that this is our case.
2022-10-06 04:36:27 +03:00
if self.is_valid_user_details(response):
2022-10-06 03:57:32 +03:00
# -------------------------------------------------------------
# expected use case #2: a potentially enhanced version of an original user_details dict.
# -------------------------------------------------------------
if VERBOSE_LOGGING:
logger.info('get_user_details() - detected an enhanced get_user_details() dict in the response: {response}'.format(
response=json.dumps(response, sort_keys=True, indent=4)
))
return response
# otherwise we pobably received the default response from the oauth provider based on
# the scopes 'basic' 'email' 'profile'. We'll check a few of the most important keys to see
# if they exist.
2022-10-06 04:51:52 +03:00
if not self.is_wp_oauth_response(response):
logger.warning('get_user_details() - response object is missing one or more required keys: {response}'.format(
response=json.dumps(response, sort_keys=True, indent=4)
))
tainted = True
else:
2022-10-06 03:57:32 +03:00
# -------------------------------------------------------------
# expected use case #1: response object is a dict with all required keys.
# -------------------------------------------------------------
if VERBOSE_LOGGING:
logger.info('get_user_details() - start. response: {response}'.format(
response=json.dumps(response, sort_keys=True, indent=4)
))
2022-10-06 03:35:44 +03:00
if tainted and self.user_details:
logger.warning('get_user_details() - returning cached results. user_details: {user_details}'.format(
2022-10-06 03:35:44 +03:00
user_details=json.dumps(self.user_details, sort_keys=True, indent=4)
))
2022-10-06 03:35:44 +03:00
return self.user_details
if tainted:
logger.error('response object is missing or misformed, and no cached results were found. Cannot get user details from oauth provider.')
return None
# ---------------------------------------------------------------------
# build and internally cache the get_user_details() dict
# ---------------------------------------------------------------------
# try to parse out the first and last names
split_name = response.get('display_name', '').split()
first_name = split_name[0] if len(split_name) > 0 else ''
last_name = split_name[-1] if len(split_name) == 2 else ''
# check for superuser / staff status
user_roles = response.get('user_roles', [])
super_user = 'administrator' in user_roles
is_staff = 'administrator' in user_roles
2022-10-06 03:35:44 +03:00
self.user_details = {
'id': int(response.get('ID'), 0),
'username': response.get('user_login', ''),
2022-10-04 14:47:52 +03:00
'email': response.get('user_email', ''),
'first_name': first_name,
'last_name': last_name,
2022-10-04 14:47:52 +03:00
'fullname': response.get('display_name', ''),
'is_superuser': super_user,
'is_staff': is_staff,
2022-10-04 14:47:52 +03:00
'refresh_token': response.get('refresh_token', ''),
'scope': response.get('scope', ''),
2022-10-04 14:47:52 +03:00
'token_type': response.get('token_type', ''),
'date_joined': response.get('user_registered', ''),
'user_status': response.get('user_status', ''),
2022-10-03 19:46:35 +03:00
}
if VERBOSE_LOGGING:
2022-10-04 18:16:48 +03:00
logger.info('get_user_details() - finish. user_details: {user_details}'.format(
2022-10-06 03:35:44 +03:00
user_details=json.dumps(self.user_details, sort_keys=True, indent=4)
))
2022-10-06 03:35:44 +03:00
return self.user_details
2022-10-03 19:46:35 +03:00
2022-10-04 18:16:48 +03:00
# Load user data from service url end point. Note that in the case of
# wp oauth, the response object returned by self.USER_QUERY
2022-10-04 17:21:54 +03:00
# is the same as the response object passed to get_user_details().
#
# see https://python-social-auth.readthedocs.io/en/latest/backends/implementation.html
def user_data(self, access_token, *args, **kwargs) -> dict:
2022-10-03 19:46:35 +03:00
url = f'{self.USER_QUERY}?' + urlencode({
'access_token': access_token
})
if VERBOSE_LOGGING:
2022-10-04 20:47:14 +03:00
logger.info("user_data() url: {url}".format(url=url))
2022-10-03 19:46:35 +03:00
try:
2022-10-04 17:33:05 +03:00
response = json.loads(self._urlopen(url))
except ValueError as e:
2022-10-04 17:58:18 +03:00
logger.error('user_data() {err}'.format(err=e))
2022-10-03 19:46:35 +03:00
return None
2022-10-06 04:36:27 +03:00
if not self.is_valid_user_details(response):
2022-10-06 04:14:13 +03:00
return self.user_details
2022-10-06 04:36:27 +03:00
# refresh our internal user_details property after having validated
# response from USER_QUERY
self.get_user_details(response)
# add syncronization of any data fields that get missed by the built-in
# open edx third party authentication sync functionality.
try:
# this gets called just prior to account creation for
# new users, hence, we need to catch DoesNotExist
# exceptions.
2022-10-06 04:14:13 +03:00
user=User.objects.get(username=self.user_details['username'])
except User.DoesNotExist:
2022-10-06 04:14:13 +03:00
return self.user_details
2022-10-06 04:14:13 +03:00
if (user.is_superuser != self.user_details['is_superuser']) or (user.is_staff != self.user_details['is_staff']):
user.is_superuser = self.user_details['is_superuser']
user.is_staff = self.user_details['is_staff']
user.save()
logger.info('Updated the is_superuser/is_staff flags for user {username}'.format(username=user.username))
2022-10-06 04:14:13 +03:00
if (user.first_name != self.user_details['first_name']) or (user.last_name != self.user_details['last_name']):
user.first_name = self.user_details['first_name']
user.last_name = self.user_details['last_name']
user.save()
logger.info('Updated first_name/last_name for user {username}'.format(username=user.username))
2022-10-06 04:14:13 +03:00
return self.user_details