56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
import requests
|
|
import json
|
|
import time
|
|
import re
|
|
|
|
query = "yes"
|
|
tags = "Clocks"
|
|
object_type = "Clock"
|
|
search_url = f"https://collectionapi.metmuseum.org/public/collection/v1/search?hasImages=true&q={query}&tags={tags}&objectType={object_type}"
|
|
|
|
response = requests.get(search_url)
|
|
data = response.json()
|
|
|
|
# Liste des IDs d'objets
|
|
object_ids = data['objectIDs'][:10] # Limitez à 10 objets pour éviter une surcharge
|
|
|
|
objects = []
|
|
|
|
def extract_year(date_str):
|
|
if not date_str or date_str == "Unknown":
|
|
return None
|
|
match = re.search(r'\b(\d{3,4})\b', date_str)
|
|
if match:
|
|
return int(match.group(1))
|
|
return None
|
|
|
|
for obj_id in object_ids:
|
|
obj_url = f"https://collectionapi.metmuseum.org/public/collection/v1/objects/{obj_id}"
|
|
obj_data = requests.get(obj_url).json()
|
|
|
|
if obj_data.get("primaryImageSmall"):
|
|
date_str = obj_data.get("objectDate", "")
|
|
parsed_year = extract_year(date_str)
|
|
|
|
obj_entry = {
|
|
"source": "The Met",
|
|
"id": obj_data.get("objectID"),
|
|
"title": obj_data.get("title"),
|
|
"date": date_str,
|
|
"parsed_year": parsed_year,
|
|
"culture": obj_data.get("culture"),
|
|
"description": obj_data.get("creditLine"),
|
|
"image_url": obj_data.get("primaryImageSmall"),
|
|
"image_url_hd": obj_data.get("primaryImage")
|
|
}
|
|
objects.append(obj_entry)
|
|
|
|
time.sleep(0.1)
|
|
|
|
objects.sort(key=lambda x: x["parsed_year"] if x["parsed_year"] is not None else 9999)
|
|
|
|
with open("data2.json", "w", encoding="utf-8") as f:
|
|
json.dump(objects, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"{len(objects)} ✅")
|