Building up on the previous answers, and assuming one has a Client ID, Client Secret and Tenant ID as well as the necessary permissions on Microsoft Azure, here is the step-by-step process in Python showing how to download multiple files from a specific user's subfolder in OneDrive:
First, acquire an access token using the Microsoft Authentication Library:
import requests
import msal
import os
from office365.graph_client import GraphClient
# Configuration (Azure app registration details and user email)
CLIENT_ID = os.getenv('OFFICE365_CLIENT_ID')
CLIENT_SECRET = os.getenv('OFFICE365_SECRET_VALUE')
TENANT_ID = os.getenv('OFFICE365_TENANT_ID')
target_email = "[email protected]" # Replace with the target user's email
# Define the authority and scope
authority = f'https://login.microsoftonline.com/{TENANT_ID}'
scope = ['https://graph.microsoft.com/.default']
# Create an MSAL confidential client application
app = msal.ConfidentialClientApplication(
CLIENT_ID, authority=authority, client_credential=CLIENT_SECRET
)
# Acquire token for the Microsoft Graph API
result = app.acquire_token_for_client(scopes=scope)
if 'access_token' in result:
access_token = result['access_token']
else:
raise Exception(result['error_description'])
Second, use the access token to authenticate with the MS Graph API and find the specific user from a list of users:
# Construct the headers with your access token
headers = {
'Authorization': f'Bearer {access_token}'
}
# Get a list of users
users_url = "https://graph.microsoft.com/v1.0/users"
users_response = requests.get(users_url, headers=headers)
users_response.raise_for_status()
users = users_response.json().get('value', [])
# Find the specific user
user_id = None
for user in users:
if user['mail'] == target_email:
user_id = user['id']
break
if not user_id:
raise Exception("User not found")
Third, list drives for the user, access the desired drive and list its contents:
# List drives for the user
drives_url = f"https://graph.microsoft.com/v1.0/users/{user_id}/drives"
drives_response = requests.get(drives_url, headers=headers)
drives_response.raise_for_status()
drives = drives_response.json().get('value', [])
# Access the desired drive and list its contents
for drive in drives:
if drive['name'] == 'OneDrive':
drive_id = drive['id']
drive_items_url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root/children"
drive_items_response = requests.get(drive_items_url, headers=headers)
drive_items_response.raise_for_status()
items = drive_items_response.json().get('value', [])
Fourth, find the desired folder and list its contents:
# Get a list of folders (and files)
for item in items:
if item['name'] == 'MyFolder':
break
folder_id = item['id']
# URL to get the children (items) of the datasets folder
datasets_url = f'https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{folder_id}/children'
# Send the request to fetch folder contents
response = requests.get(datasets_url, headers=headers)
response.raise_for_status() # Check if the request was successful
folder_contents = response.json().get('value', [])
# Filter the contents to only include folders
folders = [item for item in folder_contents if 'folder' in item]
Fifth, find the desired subfolder and list its contents:
# Print the names of the folders
for folder in folders:
if folder['name'] == 'MySubFolder':
sub_folder_id = folder['id']
break
# URL to get the children (items) of the CME folder
cme_url = f'https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{sub_folder_id}/children'
# Send the request to fetch folder contents
response = requests.get(cme_url, headers=headers)
response.raise_for_status() # Check if the request was successful
folder_contents = response.json().get('value', [])
# Filter the contents to only include files
files = [item for item in folder_contents if 'file' in item]
Finally, download all files from the subfolder to a local directory:
# Define your target directory
target_directory = "/path/to/local/directory"
# Make sure the target directory exists
os.makedirs(target_directory, exist_ok=True)
# Function to download a file
def download_file(headers, drive_id, file_id, file_name, target_directory):
target_path = os.path.join(target_directory, file_name)
# Construct the download URL
download_url = f'https://graph.microsoft.com/v1.0/drives/{drive_id}/items/{file_id}/content'
# Send the request to download the file
response = requests.get(download_url, headers=headers, stream=True)
response.raise_for_status() # Check if the request was successful
# Save the file locally
with open(target_path, 'wb') as file:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
file.write(chunk)
print(f"Downloaded {file_name} to {target_path}")
# Print the names and IDs of the files
for file in files:
print(f"File Name: {file['name']}, File ID: {file['id']}, Size: {file['size']} bytes")
file_name = file["name"]
file_id = file["id"]
download_file(headers, drive_id, file_id, file_name, target_directory)