{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Get started \n", "(last updated: 2025/06/09 zensvi=v1.3.0)\n", "\n", "To use `zensvi` in a project, run the following command. Please make sure to install a compatiable version of `PyTorch` and `torchvision` separately before running the command.\n", "\n", "See the [documentation](https://pytorch.org/get-started/locally/) for more information on how to install PyTorch." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install --upgrade zensvi # if zen-svi is not installed\n", "\n", "import zensvi\n", "\n", "print(zensvi.__version__)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## How to download Street View Images\n", "zensvi.download: A module to download Street View Images\n", "### Mapillary" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.download import MLYDownloader\n", "\n", "mly_api_key = \"YOUR_OWN_MLY_API_KEY\" # please register your own Mapillary API key at https://www.mapillary.com/dashboard/developers\n", "# additional keyword arguments for the Mapillary API\n", "kwarg = {\n", " \"image_type\": \"all\", # The tile image_type to be obtained, either as ‘flat’, ‘pano’ (panoramic), or 'all'.\n", " \"min_captured_at\": 1484549945000, # The min date. Format in Unix timestamp (milliseconds).\n", " \"max_captured_at\": 1642935417694, # The max date. Format in Unix timestamp (milliseconds).\n", " \"organization_id\": [1805883732926354], # The organization id, ID of the organization this image (or sets of images) belong to. It can be absent.\n", " \"compass_angle\": (0,180) # The compass angle of the image. It can be absent. A range of values can be provided as a tuple. Here, we're setting the minimum and maximum compass angle to 0 and 180 degrees, respectively.\n", "}\n", "mly_downloader = MLYDownloader(\n", " mly_api_key, # Mapillary API key\n", " log_path=None, # path to the log file\n", ")\n", "mly_downloader.download_svi(\n", " \"path/to/output\", # output directory\n", " path_pid=None, # if you already have a list of panorama IDs, you can specify the path to the file here\n", " lat=None,\n", " lon=None, # latitude and longitude of the location to download\n", " input_csv_file=\"\", # path to the input CSV file containing the location information\n", " input_shp_file=\"\", # path to the input shapefile containing the location information\n", " input_place_name=\"\", # name of the location to download\n", " id_columns=None, # column name of the ID in the input CSV file or shapefile\n", " buffer=0, # buffer size in meters around the input location\n", " update_pids=False, # if True, the list of panorama IDs will be updated. If False, the list of panorama IDs will be loaded from the path_pid file\n", " resolution=1024, # resolution of the image\n", " cropped=False, # if True, only the upper half of the image is saved\n", " batch_size=1000, # batch size for downloading images\n", " start_date=None, # start date for downloading images (YYYY-MM-DD)\n", " end_date=None, # end date for downloading images (YYYY-MM-DD)\n", " metadata_only=False, # if True, only metadata is downloaded\n", " use_cache=True, # if True, the cache is used\n", " additional_fields=[\"all\"], # Additional fields to fetch from the API. Defaults to [\"all\"].\n", " # Possible fields include:\n", " # 1. altitude - float, original altitude from Exif\n", " # 2. atomic_scale - float, scale of the SfM reconstruction around the image\n", " # 3. camera_parameters - array of float, intrinsic camera parameters\n", " # 4. camera_type - enum, type of camera projection (perspective, fisheye, or spherical)\n", " # 5. captured_at - timestamp, capture time\n", " # 6. compass_angle - float, original compass angle of the image\n", " # 7. computed_altitude - float, altitude after running image processing\n", " # 8. computed_compass_angle - float, compass angle after running image processing\n", " # 9. computed_geometry - GeoJSON Point, location after running image processing\n", " # 10. computed_rotation - enum, corrected orientation of the image\n", " # 11. exif_orientation - enum, orientation of the camera as given by the exif tag\n", " # 12. geometry - GeoJSON Point geometry\n", " # 13. height - int, height of the original image uploaded\n", " # 14. thumb_256_url - string, URL to the 256px wide thumbnail\n", " # 15. thumb_1024_url - string, URL to the 1024px wide thumbnail\n", " # 16. thumb_2048_url - string, URL to the 2048px wide thumbnail\n", " # 17. merge_cc - int, id of the connected component of images that were aligned together\n", " # 18. mesh - { id: string, url: string } - URL to the mesh\n", " # 19. quality_score - float, how good the image is (experimental)\n", " # 20. sequence - string, ID of the sequence\n", " # 21. sfm_cluster - { id: string, url: string } - URL to the point cloud\n", " # 22. width - int, width of the original image uploaded\n", " **kwarg\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### KartaView" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.download import KVDownloader\n", "\n", "kv_downloader = KVDownloader(\n", " log_path=None, # path to the log file\n", ")\n", "kv_downloader.download_svi(\n", " \"path/to/output_kv\", # output directory\n", " path_pid=None, # if you already have a list of panorama IDs, you can specify the path to the file here\n", " lat=None,\n", " lon=None, # latitude and longitude of the location to download\n", " input_csv_file=\"\", # path to the input CSV file containing the location information\n", " input_shp_file=\"\", # path to the input shapefile containing the location information\n", " input_place_name=\"\", # name of the location to download\n", " buffer=0, # buffer size in meters around the input location\n", " update_pids=False, # if True, the list of panorama IDs will be updated. If False, the list of panorama IDs will be loaded from the path_pid file\n", " cropped=False, # if True, only the upper half of the image is saved\n", " batch_size=1000, # batch size for downloading images\n", " start_date=\"2021-01-01\", # start date for downloading images (YYYY-MM-DD)\n", " end_date=\"2023-01-01\", # end date for downloading images (YYYY-MM-DD)\n", " metadata_only=False, # if True, only metadata is downloaded\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Amsterdam" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.download import AMSDownloader\n", "\n", "ams_downloader = AMSDownloader(\n", " log_path=None, # path to the log file\n", ")\n", "ams_downloader.download_svi(\n", " \"path/to/output_ams\", # output directory\n", " path_pid=None, # if you already have a list of panorama IDs, you can specify the path to the file here\n", " lat=None,\n", " lon=None, # latitude and longitude of the location to download\n", " input_csv_file=\"\", # path to the input CSV file containing the location information\n", " input_shp_file=\"\", # path to the input shapefile containing the location information\n", " input_place_name=\"\", # name of the location to download\n", " buffer=0, # buffer size in meters around the input location\n", " update_pids=False, # if True, the list of panorama IDs will be updated. If False, the list of panorama IDs will be loaded from the path_pid file\n", " cropped=False, # if True, only the upper half of the image is saved\n", " batch_size=1000, # batch size for downloading images\n", " start_date=\"2021-01-01\", # start date for downloading images (YYYY-MM-DD)\n", " end_date=\"2023-01-01\", # end date for downloading images (YYYY-MM-DD)\n", " metadata_only=False, # if True, only metadata is downloaded\n", ")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## How to analyze metadata\n", "zensvi.metadata: A module to analyze metadata from Mapillary\n", "### Image-level metadata analysis\n", "\n", "List of metadata fields:\n", "- `year`: Year of the image\n", "- `month`: Month of the image\n", "- `day`: Day of the image\n", "- `hour`: Hour of the image\n", "- `day_of_week`: Day of the week of the image\n", "- `daytime_nighttime`: Daytime or nighttime of the image\n", "- `season`: Season of the image\n", "- `relative_angle`: Relative angle of the image with respect to the street\n", "- `h3_id`: H3 ID of the image from level 0 to 15\n", "- `speed_kmh`: Speed of the vehicle when the image was captured" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.metadata import MLYMetadata\n", "\n", "path_input = \"path/to/input\"\n", "mly_metadata = MLYMetadata(path_input)\n", "mly_metadata.compute_metadata(\n", " unit=\"image\", # unit of the metadata\n", " indicator_list=\"all\", # list of indicators to compute. You can specify a list of indicators in space-separated format, e.g., \"year month day\" or \"all\" to compute all indicators\n", " path_output=\"path/to/output\" # path to the output file\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Street-level and grid-level metadata analysis\n", "\n", "List of metadata fields:\n", "- `coverage`: Coverage of street view images. For street-level metadata, it is the coverage of the street in terms of length covered by user-defined buffers from street view images. For grid-level metadata, it is the coverage of the grid in terms of area covered by user-defined buffers from street view images.\n", "- `count`: Count of the street view images. For street-level metadata, it is the count of the street view images close to the street. For grid-level metadata, it is the count of the street view images in the grid.\n", "- `days_elapsed`: Number of days elapsed between the oldest and most recent street view images. \n", "- `most_recent_date`: Most recent date of the street view images.\n", "- `oldest_date`: Oldest date of the street view images.\n", "- `number_of_years`: Number of unique years of the street view images.\n", "- `number_of_months`: Number of unique months of the street view images.\n", "- `number_of_days`: Number of unique days of the street view images.\n", "- `number_of_hours`: Number of unique hours of the street view images.\n", "- `number_of_days_of_week`: Number of unique days of the week of the street view images.\n", "- `number_of_daytime`: Number of daytime street view images. This is computed based on the sunrise and sunset times of the location.\n", "- `number_of_nighttime`: Number of nighttime street view images. This is computed based on the sunrise and sunset times of the location.\n", "- `number_of_spring`: Number of spring street view images. This is computed based on the season of the location.\n", "- `number_of_summer`: Number of summer street view images. This is computed based on the season of the location.\n", "- `number_of_autumn`: Number of autumn street view images. This is computed based on the season of the location.\n", "- `number_of_winter`: Number of winter street view images. This is computed based on the season of the location.\n", "- `average_compass_angle`: Average compass angle of the street view images.\n", "- `average_relative_angle`: Average relative angle of the street view images.\n", "- `average_is_pano`: Average ratio of panoramic street view images.\n", "- `number_of_users`: Number of unique users of the street view images.\n", "- `number_of_sequences`: Number of unique sequences of the street view images.\n", "- `number_of_organizations`: Number of unique organizations of the street view images.\n", "- `average_speed_kmh`: Average speed of camera when the street view images were captured." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Street-level metadata analysis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path_input = \"path/to/input\"\n", "mly_metadata = MLYMetadata(path_input)\n", "mly_metadata.compute_metadata(\n", " unit=\"street\", # unit of the metadata\n", " indicator_list=\"all\", # list of indicators to compute. You can specify a list of indicators in space-separated format, e.g., \"coverage count days_elapsed\" or \"all\" to compute all indicators\n", " coverage_buffer=50, # buffer size in meters for computing coverage\n", " path_output=\"path/to/output\", # path to the output file\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Grid-level metadata analysis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path_input = \"path/to/input\"\n", "mly_metadata = MLYMetadata(path_input)\n", "mly_metadata.compute_metadata(\n", " unit=\"grid\", # unit of the metadata\n", " grid_resolution=7, # resolution of the grid in terms of H3 resolution (0-15) to aggregate the metadata\n", " indicator_list=\"all\", # list of indicators to compute. You can specify a list of indicators in space-separated format, e.g., \"coverage count days_elapsed\" or \"all\" to compute all indicators\n", " coverage_buffer=50, # buffer size in meters for computing coverage\n", " path_output=\"path/to/output\", # path to the output file\n", ")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## How to run computer vision models\n", "zensvi.cv: A module to run computer vision models\n", "### Semantic/panoptic segmentation\n", "- Semantic segmentation: Assigns a class to each pixel in the image.\n", "- Panoptic segmentation: Assigns a class to each pixel in the image and assigns an instance ID to each object.\n", "\n", "For the models, we used the following pre-trained models from [Mask2Former](https://github.com/facebookresearch/Mask2Former) \n", "#### Cityscapes\n", "\n", "List of semantic segmentation classes for Cityscapes:\n", "- `road`\n", "- `sidewalk`\n", "- `building`\n", "- `wall`\n", "- `fence`\n", "- `pole`\n", "- `traffic light`\n", "- `traffic sign`\n", "- `vegetation`\n", "- `terrain`\n", "- `sky`\n", "- `person`\n", "- `rider`\n", "- `car`\n", "- `truck`\n", "- `bus`\n", "- `train`\n", "- `motorcycle`\n", "- `bicycle`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import Segmenter\n", "\n", "# initialize the segmenter\n", "segmenter = Segmenter()\n", "\n", "# set arguments\n", "dir_input = \"path/to/input\"\n", "dir_image_output = \"path/to/image_output\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "save_image_options = \"segmented_image blend_image\" # segmented_image (colored image), blend_image (blended image)\n", "save_format = \"csv json\"\n", "segmenter.segment(\n", " dir_input,\n", " dir_image_output=dir_image_output,\n", " dir_summary_output=dir_summary_output,\n", " save_image_options=save_image_options,\n", " save_format=save_format,\n", ")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### Mapillary Vistas\n", "\n", "List of semantic segmentation classes for Mapillary Vistas:\n", "- `Bird`\n", "- `Ground Animal`\n", "- `Curb`\n", "- `Fence`\n", "- `Guard Rail`\n", "- `Barrier`\n", "- `Wall`\n", "- `Bike Lane`\n", "- `Crosswalk - Plain`\n", "- `Curb Cut`\n", "- `Parking`\n", "- `Pedestrian Area`\n", "- `Rail Track`\n", "- `Road`\n", "- `Service Lane`\n", "- `Sidewalk`\n", "- `Bridge`\n", "- `Building`\n", "- `Tunnel`\n", "- `Person`\n", "- `Bicyclist`\n", "- `Motorcyclist`\n", "- `Other Rider`\n", "- `Lane Marking - Crosswalk`\n", "- `Lane Marking - General`\n", "- `Mountain`\n", "- `Sand`\n", "- `Sky`\n", "- `Snow`\n", "- `Terrain`\n", "- `Vegetation`\n", "- `Water`\n", "- `Banner`\n", "- `Bench`\n", "- `Bike Rack`\n", "- `Billboard`\n", "- `Catch Basin`\n", "- `CCTV Camera`\n", "- `Fire Hydrant`\n", "- `Junction Box`\n", "- `Mailbox`\n", "- `Manhole`\n", "- `Phone Booth` \n", "- `Pothole`\n", "- `Street Light`\n", "- `Pole`\n", "- `Traffic Sign Frame`\n", "- `Utility Pole`\n", "- `Traffic Light`\n", "- `Traffic Sign (Back)`\n", "- `Traffic Sign (Front)`\n", "- `Trash Can`\n", "- `Bicycle`\n", "- `Boat`\n", "- `Bus`\n", "- `Car`\n", "- `Caravan`\n", "- `Motorcycle`\n", "- `On Rails`\n", "- `Other Vehicle`\n", "- `Trailer`\n", "- `Truck`\n", "- `Wheeled Slow`\n", "- `Car Mount`\n", "- `Ego Vehicle`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# initialize the segmenter\n", "dataset = \"mapillary\" # this can be either \"mapillary\" or \"cityscapes\"\n", "task = \"panoptic\" # this can be either \"semantic\" or \"panoptic\"\n", "segmenter = Segmenter(dataset=dataset, task=task)\n", "\n", "# set arguments\n", "dir_input = \"path/to/input\"\n", "dir_image_output = \"path/to/image_output\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "save_image_options = \"segmented_image blend_image\"\n", "save_format = \"csv json\"\n", "csv_format = \"long\" # \"long\" or \"wide\"\n", "segmenter.segment(\n", " dir_input,\n", " dir_image_output=dir_image_output,\n", " dir_summary_output=dir_summary_output,\n", " save_image_options=save_image_options,\n", " save_format=save_format,\n", " csv_format=csv_format,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Image classification/regression\n", "- Image classification: Assigns a class to the entire image.\n", "- Image regression: Predicts a continuous value for the entire image.\n", "#### Places365\n", "Places365 is a scene-centric database with 365 scene categories and 102 attributes. The model predicts the scene category and attributes of the image.\n", "List of scene classes for Places365 can be found [here](https://github.com/CSAILVision/places365/blob/master/categories_places365.txt) and a list of attributes can be found [here](https://github.com/CSAILVision/places365/blob/master/labels_sunattribute.txt)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierPlaces365\n", "\n", "# initialize the classifier\n", "classifier = ClassifierPlaces365(\n", " device=\"cpu\", # device to use (either \"cpu\" or \"gpu\")\n", ")\n", "\n", "# set arguments\n", "classifier = ClassifierPlaces365()\n", "dir_input = \"path/to/input\"\n", "dir_image_output = \"path/to/image_output\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "csv_format = \"long\" # \"long\" or \"wide\"\n", "classifier.classify(\n", " dir_input,\n", " dir_image_output=dir_image_output,\n", " dir_summary_output=dir_summary_output,\n", " csv_format=csv_format,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### PlacePulse 2.0\n", "PlacePulse 2.0 is a dataset that contains images of pairs of images and asks users to choose which image is better. The model predicts the image that is better. The original paper can be found [here](https://arxiv.org/pdf/1608.01769v2), and the dataset can be found [here](https://figshare.com/articles/dataset/Place_Pulse/11859993). Indicators of urban perception are:\n", "\n", "- `Safety`\n", "- `Liveliness`\n", "- `Beauty`\n", "- `Wealth`\n", "- `Boringness`\n", "- `Depressingness`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierPerception\n", "\n", "classifier = ClassifierPerception(\n", " perception_study=\"safer\", # Other options are \"livelier\", \"wealthier\", \"more beautiful\", \"more boring\", \"more depressing\"\n", " device=\"cpu\", # device to use (either \"cpu\" or \"gpu\")\n", ")\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\" \n", "batch_size = 32\n", "save_format = \"csv json\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", " batch_size=batch_size,\n", " save_format=save_format,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Global Streetscapes\n", "Global Streetscapes is a dataset of street-level images with various attributes. The model predicts the attributes of the image.\n", "##### Glare\n", "\n", "List of glare classes:\n", "- `True`: Glare is present in the image\n", "- `False`: Glare is not present in the image" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierGlare\n", "\n", "classifier = ClassifierGlare()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Lighting\n", "\n", "- `day`: The image was taken during the day.\n", "- `night`: The image was taken during the night.\n", "- `dawn/dusk`: The image was taken during dawn or dusk." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierLighting\n", "\n", "classifier = ClassifierLighting()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Panorama\n", "\n", "List of panorama classes:\n", "- `True`: The image is a panorama.\n", "- `False`: The image is not a panorama." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierPanorama\n", "\n", "classifier = ClassifierPanorama()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Platform\n", "\n", "List of platform classes:\n", "- `cycling surface`: The image shows a cycling surface.\n", "- `driving surface`: The image shows a driving surface.\n", "- `fields`: The image shows fields.\n", "- `railway`: The image shows a railway.\n", "- `tunnel`: The image shows a tunnel.\n", "- `walking surface`: The image shows a walking surface." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierPlatform\n", "\n", "classifier = ClassifierPlatform()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Quality\n", "\n", "List of quality classes:\n", "- `good`: The image quality is good.\n", "- `slightly poor`: The image quality is slightly poor.\n", "- `very poor`: The image quality is very poor." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierQuality\n", "\n", "classifier = ClassifierQuality()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Reflection\n", "\n", "List of reflection classes:\n", "- `True`: Reflection is present in the image.\n", "- `False`: Reflection is not present in the image." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierReflection\n", "\n", "classifier = ClassifierReflection()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### View Direction\n", "\n", "List of view direction classes:\n", "- `front/back`: The image shows the front or back view.\n", "- `side`: The image shows the left or right view." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierViewDirection\n", "\n", "classifier = ClassifierViewDirection()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### Weather\n", "\n", "List of weather classes:\n", "- `clear`: The weather is clear.\n", "- `cloudy`: The weather is cloudy.\n", "- `foggy`: The weather is foggy.\n", "- `rainy`: The weather is rainy.\n", "- `snowy`: The weather is snowy." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import ClassifierWeather\n", "\n", "classifier = ClassifierWeather()\n", "dir_input = \"path/to/input\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "classifier.classify(\n", " dir_input,\n", " dir_summary_output=dir_summary_output,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Depth estimation\n", "- Depth estimation: Predicts the depth map of the image.\n", "- Relative depth estimation: Predicts the relative depth map of the image. We use pre-trained models from [Depth-Anything-V2](https://github.com/DepthAnything/Depth-Anything-V2)\n", "- Absolute depth estimation: Predicts the absolute depth map of the image. We use pre-trained metric depth models from [Depth-Anything-V2](https://github.com/DepthAnything/Depth-Anything-V2/tree/main/metric_depth)\n", "\n", "The DepthEstimator supports multiple encoder variants (vits, vitb, vitl, vitg) and automatically downloads the appropriate model weights from Hugging Face when first used." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import DepthEstimator\n", "\n", "depth_estimator = DepthEstimator(\n", " device=\"cpu\", # device to use (either \"cpu\", \"cuda\", or \"mps\")\n", " task=\"relative\", # task to perform (either \"relative\" or \"absolute\")\n", " encoder=\"vitl\", # encoder variant (\"vits\", \"vitb\", \"vitl\", \"vitg\")\n", " max_depth=80.0 # maximum depth for absolute estimation (only used when task=\"absolute\")\n", ")\n", "\n", "dir_input = \"path/to/input\"\n", "dir_image_output = \"path/to/image_output\" # estimated depth map\n", "depth_estimator.estimate_depth(\n", " dir_input,\n", " dir_image_output\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Low-level features\n", "- Low-level features: Extracts low-level features from the image.\n", "- Edge detection: Detects edges in the image.\n", "- Blob detection: Detects blobs in the image.\n", "- Blur detection: Detects blur in the image.\n", "- HSL color histogram: Extracts HSL color histogram from the image." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import get_low_level_features\n", "\n", "dir_input = \"path/to/input\"\n", "dir_image_output = \"path/to/image_output\"\n", "dir_summary_output = \"path/to/summary_output\"\n", "csv_format = \"long\" # \"long\" or \"wide\"\n", "get_low_level_features(\n", " dir_input,\n", " dir_image_output=dir_image_output,\n", " dir_summary_output=dir_summary_output,\n", " save_format=\"json csv\",\n", " csv_format=csv_format,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Image embeddings\n", "- Image embeddings: Extracts embeddings from the image. We used the following packages for image embeddings: [img2vec_pytorch](https://github.com/christiansafka/img2vec)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.cv import Embeddings\n", "\n", "emb = Embeddings(\n", " model_name=\"resnet-18\", # model name to use for generating embeddings options are: 'alexnet', 'vgg-11', 'densenet', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7'\n", " cuda=True) # if True, use GPU for generating embeddings\n", "emb.generate_embedding(\n", " \"path/to/image_directory\",\n", " \"path/to/output_directory\",\n", " batch_size=1000, # batch size for generating embeddings\n", ")\n", "results = emb.search_similar_images(\n", " \"path/to/target_image_file\",\n", " \"path/to/embeddings_directory\",\n", " 20, # number of similar images to retrieve\n", ")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## How to transform images\n", "zensvi.transform: A module to transform images\n", "\n", "- Panorama to perscpective image transformation: Transforms a panorama image to a perspective image.\n", "- Panorama to fisheye image transformation: Transforms a panorama image to a fisheye image. Types of fisheye transformations include stereographic, equidistant, equisolid, and orthographic." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.transform import ImageTransformer\n", "\n", "dir_input = \"path/to/input\"\n", "dir_output = \"path/to/output\"\n", "image_transformer = ImageTransformer(dir_input=dir_input, dir_output=dir_output)\n", "image_transformer.transform_images(\n", " style_list=\"perspective equidistant_fisheye orthographic_fisheye stereographic_fisheye equisolid_fisheye\", # list of projection styles in the form of a string separated by a space\n", " FOV=90, # field of view\n", " theta=120, # angle of view (horizontal)\n", " phi=0, # angle of view (vertical)\n", " aspects=(9, 16), # aspect ratio\n", " show_size=100, # size of the image to show (i.e. scale factor)\n", " use_upper_half=False, # if True, only the upper half of the image is used for transformation. Use this for fisheye images to estimate sky view.\n", ") " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "\n", "- Image to Point Cloud transformation: Converts depth and color images into 3D point clouds.\n", "- Point Cloud Saving: Supports saving point clouds in multiple formats such as PCD, PLY, NumPy, and CSV.\n", "- Point Cloud Visualization: Provides visualization tools for inspecting generated point clouds." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.transform import PointCloudProcessor\n", "import pandas as pd\n", "\n", "# Directories for input and output\n", "dir_input = \"path/to/input\"\n", "dir_output = \"path/to/output\"\n", "\n", "# Metadata of SVI, controlling the global attributes of generated point clouds (e.g., ID of images to process, global coordinates, and headings)\n", "data = pd.read_csv(f\"{dir_input}/point_cloud_test_df.csv\")\n", "\n", "# Initialize the PointCloudProcessor with paths to the image and depth folders\n", "image_folder = f\"{dir_input}/color\"\n", "depth_folder = f\"{dir_input}/depth\"\n", "point_cloud_processor = PointCloudProcessor(image_folder=image_folder, depth_folder=depth_folder)\n", "\n", "# Process multiple point clouds\n", "point_cloud_processor.process_multiple_images(\n", " data=data,\n", " output_dir=dir_output, # Output directory to save the point clouds. If None, the point clouds are not saved\n", " save_format=\"pcd\" # Format to save the point clouds ('pcd', 'ply', 'npz', 'csv')\n", ")\n", "\n", "# Optional: Visualize one of the generated point clouds\n", "point_clouds = point_cloud_processor.process_multiple_images(data=data)\n", "point_cloud_processor.visualize_point_cloud(point_clouds[0])\n", "\n", "# Optional: Save the first generated point cloud in additional formats\n", "point_cloud_processor.save_point_cloud_numpy(point_clouds[0], f\"{dir_output}/point_cloud_0001.npz\")\n", "point_cloud_processor.save_point_cloud_csv(point_clouds[0], f\"{dir_output}/point_cloud_0001.csv\")" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## How to visualize the results\n", "zensvi.visualization: A module to visualize the results\n", "### Point map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.visualization import plot_map\n", "\n", "dir_input = \"path/to/input\"\n", "path_output = \"path/to/output.png\" # output file path\n", "path_pid = \"path/to/pid\" # path to the panorama ID file with latitude and longitude\n", "csv_file_pattern = \"pixel_ratios.csv\" # pattern of the CSV files that contain the pixel ratios (or any other variable to plot)\n", "variable = \"vegetation\" # variable to plot (e.g. vegetation, building, sky, etc.). This should be the column name in the CSV file. If None, count of the number of images is plotted\n", "plot_type = \"point\" # plot type (either \"point\", \"line\", or \"hexagon\")\n", "fig, ax = plot_map(\n", " path_pid,\n", " dir_input=dir_input,\n", " csv_file_pattern=csv_file_pattern,\n", " variable_name=variable,\n", " plot_type=plot_type,\n", " path_output=path_output,\n", " resolution=13,\n", " cmap=\"viridis\",\n", " legend=True,\n", " title=\"Point Map\",\n", " legend_title=\"Vegetation\",\n", " dark_mode=False,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Line map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dir_input = \"path/to/input\"\n", "path_output = \"path/to/output.png\" # output file path\n", "path_pid = \"path/to/pid\" # path to the panorama ID file with latitude and longitude\n", "csv_file_pattern = \"pixel_ratios.csv\" # pattern of the CSV files that contain the pixel ratios (or any other variable to plot)\n", "variable = \"vegetation\" # variable to plot (e.g. vegetation, building, sky, etc.). This should be the column name in the CSV file. If None, count of the number of images is plotted\n", "plot_type = \"line\" # plot type (either \"point\", \"line\", or \"hexagon\")\n", "fig, ax = plot_map(\n", " path_pid,\n", " dir_input=dir_input,\n", " csv_file_pattern=csv_file_pattern,\n", " variable_name=variable,\n", " plot_type=plot_type,\n", " path_output=path_output,\n", " resolution=13,\n", " cmap=\"viridis\",\n", " legend=True,\n", " title=\"Point Map\",\n", " legend_title=\"Vegetation\",\n", " dark_mode=False,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hexagon map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dir_input = \"path/to/input\"\n", "path_output = \"path/to/output.png\" # output file path\n", "path_pid = \"path/to/pid\" # path to the panorama ID file with latitude and longitude\n", "csv_file_pattern = \"pixel_ratios.csv\" # pattern of the CSV files that contain the pixel ratios (or any other variable to plot)\n", "variable = \"vegetation\" # variable to plot (e.g. vegetation, building, sky, etc.). This should be the column name in the CSV file. If None, count of the number of images is plotted\n", "plot_type = \"hexagon\" # plot type (either \"point\", \"line\", or \"hexagon\")\n", "fig, ax = plot_map(\n", " path_pid,\n", " dir_input=dir_input,\n", " csv_file_pattern=csv_file_pattern,\n", " variable_name=variable,\n", " plot_type=plot_type,\n", " path_output=path_output,\n", " resolution=13,\n", " cmap=\"viridis\",\n", " legend=True,\n", " title=\"Point Map\",\n", " legend_title=\"Vegetation\",\n", " dark_mode=False,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot images as grid" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.visualization import plot_image\n", "\n", "dir_image_input = \"path/to/input\"\n", "path_output = \"path/to/output.png\" # output file path\n", "image_file_pattern = \"*.png\" # pattern of the image files to plot\n", "dir_csv_input = \"path/to/csv\" # directory of the CSV files\n", "csv_file_pattern = \"pixel_ratios.csv\" # pattern of the CSV files\n", "sort_by = \"random\" # sort the images by either \"random\" or names of variables in the CSV files (e.g. \"vegetation\")\n", "fig, ax = plot_image(\n", " dir_image_input,\n", " 4, # number of rows\n", " 5, # number of columns\n", " dir_csv_input=dir_csv_input, # directory of the CSV files\n", " csv_file_pattern=csv_file_pattern, # pattern of the CSV files\n", " sort_by=sort_by, # sort the images by either \"random\" or names of variables in the CSV files (e.g. \"vegetation\")\n", " title=\"Image Grid\",\n", " path_output=path_output,\n", " dark_mode=False, # if True, the background is dark\n", " random_seed=123,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot variables as Kernel Density Estimation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.visualization import plot_kde\n", "\n", "path_input = \"path/to/input.csv\" # input CSV file\n", "columns = [\"vegetation\", \"building\", \"sky\"] # list of columns to plot\n", "path_output = \"path/to/output.png\" # output file path\n", "kwargs = {\n", " \"clip\": (0, 1), # clip the values\n", " \"palette\": \"twilight\", # color palette. This can be any color palette from the seaborn library or matplotlib library or your own color palette\n", "}\n", "\n", "plot_kde(\n", " path_input,\n", " columns,\n", " path_output = path_output,\n", " legend = True,\n", " title = \"KDE Plot\",\n", " legend_title = \"Categories\",\n", " dpi = 300,\n", " font_size = 30,\n", " dark_mode = False,\n", " **kwargs,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot variables as histograms" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from zensvi.visualization import plot_hist\n", "\n", "path_input = \"path/to/input.csv\" # input CSV file\n", "columns = [\"truck\", \"person\", \"car\"] # list of columns to plot\n", "path_output = \"path/to/output.png\" # output file path\n", "kwargs = {\n", " \"clip\": (0, 1), # clip the values\n", " \"palette\": \"twilight\", # color palette. This can be any color palette from the seaborn library or matplotlib library or your own color palette\n", "}\n", "\n", "plot_hist(\n", " path_input,\n", " columns,\n", " path_output = path_output,\n", " legend = True,\n", " title = \"Histogram\",\n", " legend_title = \"Count\",\n", " dpi = 300,\n", " font_size = 30,\n", " dark_mode = False,\n", " **kwargs,\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "streetcope", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 4 }