import requests import json import time import re query = "yes" tags = "Clocks" object_type = "Clock" search_url = f"https://collectionapi.metmuseum.org/public/collection/v1/search?hasImages=true&q={query}&tags={tags}&objectType={object_type}" response = requests.get(search_url) data = response.json() # Liste des IDs d'objets object_ids = data['objectIDs'][:10] # Limitez à 10 objets pour éviter une surcharge objects = [] def extract_year(date_str): if not date_str or date_str == "Unknown": return None match = re.search(r'\b(\d{3,4})\b', date_str) if match: return int(match.group(1)) return None for obj_id in object_ids: obj_url = f"https://collectionapi.metmuseum.org/public/collection/v1/objects/{obj_id}" obj_data = requests.get(obj_url).json() if obj_data.get("primaryImageSmall"): date_str = obj_data.get("objectDate", "") parsed_year = extract_year(date_str) obj_entry = { "source": "The Met", "id": obj_data.get("objectID"), "title": obj_data.get("title"), "date": date_str, "parsed_year": parsed_year, "culture": obj_data.get("culture"), "description": obj_data.get("creditLine"), "image_url": obj_data.get("primaryImageSmall"), "image_url_hd": obj_data.get("primaryImage") } objects.append(obj_entry) time.sleep(0.1) objects.sort(key=lambda x: x["parsed_year"] if x["parsed_year"] is not None else 9999) with open("data2.json", "w", encoding="utf-8") as f: json.dump(objects, f, ensure_ascii=False, indent=2) print(f"{len(objects)} ✅")