Sablier/themet.py
2025-04-25 15:18:01 +02:00

56 lines
1.6 KiB
Python

import requests
import json
import time
import re
query = "yes"
tags = "Clocks"
object_type = "Clock"
search_url = f"https://collectionapi.metmuseum.org/public/collection/v1/search?hasImages=true&q={query}&tags={tags}&objectType={object_type}"
response = requests.get(search_url)
data = response.json()
# Liste des IDs d'objets
object_ids = data['objectIDs'][:10] # Limitez à 10 objets pour éviter une surcharge
objects = []
def extract_year(date_str):
if not date_str or date_str == "Unknown":
return None
match = re.search(r'\b(\d{3,4})\b', date_str)
if match:
return int(match.group(1))
return None
for obj_id in object_ids:
obj_url = f"https://collectionapi.metmuseum.org/public/collection/v1/objects/{obj_id}"
obj_data = requests.get(obj_url).json()
if obj_data.get("primaryImageSmall"):
date_str = obj_data.get("objectDate", "")
parsed_year = extract_year(date_str)
obj_entry = {
"source": "The Met",
"id": obj_data.get("objectID"),
"title": obj_data.get("title"),
"date": date_str,
"parsed_year": parsed_year,
"culture": obj_data.get("culture"),
"description": obj_data.get("creditLine"),
"image_url": obj_data.get("primaryImageSmall"),
"image_url_hd": obj_data.get("primaryImage")
}
objects.append(obj_entry)
time.sleep(0.1)
objects.sort(key=lambda x: x["parsed_year"] if x["parsed_year"] is not None else 9999)
with open("data2.json", "w", encoding="utf-8") as f:
json.dump(objects, f, ensure_ascii=False, indent=2)
print(f"{len(objects)}")