Commit de1bea2e authored by mhdbashard's avatar mhdbashard

Update

parent 5efe29bb
......@@ -33,7 +33,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Python 3.10.16\n"
"Python 3.12.12\n"
]
}
],
......@@ -44,6 +44,33 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "7557d183",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Filesystem Size Used Avail Use% Mounted on\n",
"overlay 113G 39G 74G 35% /\n",
"tmpfs 64M 0 64M 0% /dev\n",
"shm 5.7G 0 5.7G 0% /dev/shm\n",
"/dev/root 2.0G 1.2G 750M 62% /usr/sbin/docker-init\n",
"/dev/sda1 74G 41G 33G 56% /opt/bin/.nvidia\n",
"tmpfs 6.4G 64K 6.4G 1% /var/colab\n",
"tmpfs 6.4G 0 6.4G 0% /proc/acpi\n",
"tmpfs 6.4G 0 6.4G 0% /proc/scsi\n",
"tmpfs 6.4G 0 6.4G 0% /sys/firmware\n"
]
}
],
"source": [
"!df -h"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "7bef3027-7d62-493b-820b-999d9631d249",
"metadata": {},
"outputs": [
......@@ -51,16 +78,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Wed Jan 15 09:26:17 2025 \n",
"Wed Jan 14 07:45:20 2026 \n",
"+-----------------------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 550.120 Driver Version: 550.120 CUDA Version: 12.4 |\n",
"| NVIDIA-SMI 550.54.15 Driver Version: 550.54.15 CUDA Version: 12.4 |\n",
"|-----------------------------------------+------------------------+----------------------+\n",
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|=========================================+========================+======================|\n",
"| 0 NVIDIA GeForce RTX 4090 Off | 00000000:01:00.0 Off | Off |\n",
"| 0% 29C P8 17W / 450W | 2MiB / 24564MiB | 0% Default |\n",
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
"| N/A 39C P8 11W / 70W | 0MiB / 15360MiB | 0% Default |\n",
"| | | N/A |\n",
"+-----------------------------------------+------------------------+----------------------+\n",
" \n",
......@@ -88,117 +115,726 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Package Version\n",
"----------------------- -----------\n",
"absl-py 2.1.0\n",
"aiohappyeyeballs 2.4.4\n",
"aiohttp 3.11.11\n",
"aiosignal 1.3.2\n",
"asttokens 2.0.5\n",
"astunparse 1.6.3\n",
"async-timeout 5.0.1\n",
"attrs 24.3.0\n",
"blinker 1.9.0\n",
"Brotli 1.1.0\n",
"cached-property 1.5.2\n",
"cachetools 5.5.0\n",
"certifi 2024.12.14\n",
"cffi 1.17.1\n",
"charset-normalizer 3.4.1\n",
"click 8.1.8\n",
"comm 0.2.1\n",
"cryptography 44.0.0\n",
"debugpy 1.8.11\n",
"decorator 5.1.1\n",
"exceptiongroup 1.2.0\n",
"executing 0.8.3\n",
"filelock 3.16.1\n",
"flatbuffers 24.12.23\n",
"frozenlist 1.5.0\n",
"fsspec 2024.12.0\n",
"gast 0.6.0\n",
"google-auth 2.37.0\n",
"google-auth-oauthlib 1.2.1\n",
"google-pasta 0.2.0\n",
"grpcio 1.59.3\n",
"h2 4.1.0\n",
"h5py 3.12.1\n",
"hpack 4.0.0\n",
"huggingface-hub 0.27.1\n",
"hyperframe 6.0.1\n",
"idna 3.10\n",
"importlib_metadata 8.5.0\n",
"ipykernel 6.29.5\n",
"ipython 8.30.0\n",
"jedi 0.19.2\n",
"joblib 1.4.2\n",
"jupyter_client 8.6.0\n",
"jupyter_core 5.7.2\n",
"keras 2.15.0\n",
"Markdown 3.6\n",
"MarkupSafe 3.0.2\n",
"matplotlib-inline 0.1.6\n",
"ml-dtypes 0.2.0\n",
"multidict 6.1.0\n",
"nest-asyncio 1.6.0\n",
"numpy 1.26.4\n",
"oauthlib 3.2.2\n",
"opt_einsum 3.4.0\n",
"packaging 24.2\n",
"pandas 2.2.3\n",
"parso 0.8.4\n",
"pexpect 4.8.0\n",
"pip 24.2\n",
"platformdirs 3.10.0\n",
"prompt-toolkit 3.0.43\n",
"propcache 0.2.1\n",
"protobuf 4.24.4\n",
"psutil 5.9.0\n",
"ptyprocess 0.7.0\n",
"pure-eval 0.2.2\n",
"pyasn1 0.6.1\n",
"pyasn1_modules 0.4.1\n",
"pycparser 2.22\n",
"Pygments 2.15.1\n",
"PyJWT 2.10.1\n",
"pyOpenSSL 24.3.0\n",
"PySocks 1.7.1\n",
"python-dateutil 2.9.0.post0\n",
"pytz 2024.1\n",
"pyu2f 0.1.5\n",
"PyYAML 6.0.2\n",
"pyzmq 26.2.0\n",
"regex 2024.11.6\n",
"requests 2.32.3\n",
"requests-oauthlib 2.0.0\n",
"rsa 4.9\n",
"safetensors 0.5.2\n",
"scikit-learn 1.6.1\n",
"scipy 1.15.1\n",
"setuptools 75.1.0\n",
"six 1.16.0\n",
"stack-data 0.2.0\n",
"tensorboard 2.15.2\n",
"tensorboard_data_server 0.7.0\n",
"tensorflow 2.15.0\n",
"tensorflow_estimator 2.15.0\n",
"termcolor 2.5.0\n",
"threadpoolctl 3.5.0\n",
"tokenizers 0.15.2\n",
"tornado 6.4.2\n",
"tqdm 4.67.1\n",
"traitlets 5.14.3\n",
"transformers 4.37.0\n",
"typing_extensions 4.12.2\n",
"tzdata 2024.2\n",
"urllib3 2.3.0\n",
"wcwidth 0.2.5\n",
"Werkzeug 3.1.3\n",
"wheel 0.44.0\n",
"wrapt 1.14.1\n",
"yarl 1.18.3\n",
"zipp 3.21.0\n",
"zstandard 0.23.0\n"
"Package Version\n",
"---------------------------------------- ------------------\n",
"absl-py 1.4.0\n",
"accelerate 1.12.0\n",
"access 1.1.10.post3\n",
"affine 2.4.0\n",
"aiofiles 24.1.0\n",
"aiohappyeyeballs 2.6.1\n",
"aiohttp 3.13.3\n",
"aiosignal 1.4.0\n",
"aiosqlite 0.22.1\n",
"alabaster 1.0.0\n",
"albucore 0.0.24\n",
"albumentations 2.0.8\n",
"ale-py 0.11.2\n",
"alembic 1.17.2\n",
"altair 5.5.0\n",
"annotated-doc 0.0.4\n",
"annotated-types 0.7.0\n",
"antlr4-python3-runtime 4.9.3\n",
"anyio 4.12.1\n",
"anywidget 0.9.21\n",
"apsw 3.51.1.0\n",
"apswutils 0.1.2\n",
"argon2-cffi 25.1.0\n",
"argon2-cffi-bindings 25.1.0\n",
"array_record 0.8.3\n",
"arrow 1.4.0\n",
"arviz 0.22.0\n",
"astropy 7.2.0\n",
"astropy-iers-data 0.2026.1.5.0.43.43\n",
"astunparse 1.6.3\n",
"atpublic 5.1\n",
"attrs 25.4.0\n",
"audioread 3.1.0\n",
"Authlib 1.6.6\n",
"autograd 1.8.0\n",
"babel 2.17.0\n",
"backcall 0.2.0\n",
"beartype 0.22.9\n",
"beautifulsoup4 4.13.5\n",
"betterproto 2.0.0b6\n",
"bigframes 2.31.0\n",
"bigquery-magics 0.10.3\n",
"bleach 6.3.0\n",
"blinker 1.9.0\n",
"blis 1.3.3\n",
"blobfile 3.1.0\n",
"blosc2 3.12.2\n",
"bokeh 3.7.3\n",
"Bottleneck 1.4.2\n",
"bqplot 0.12.45\n",
"branca 0.8.2\n",
"brotli 1.2.0\n",
"CacheControl 0.14.4\n",
"cachetools 6.2.4\n",
"catalogue 2.0.10\n",
"certifi 2026.1.4\n",
"cffi 2.0.0\n",
"chardet 5.2.0\n",
"charset-normalizer 3.4.4\n",
"chex 0.1.90\n",
"clarabel 0.11.1\n",
"click 8.3.1\n",
"click-plugins 1.1.1.2\n",
"cligj 0.7.2\n",
"cloudpathlib 0.23.0\n",
"cloudpickle 3.1.2\n",
"cmake 3.31.10\n",
"cmdstanpy 1.3.0\n",
"colorcet 3.1.0\n",
"colorlover 0.3.0\n",
"colour 0.1.5\n",
"community 1.0.0b1\n",
"confection 0.1.5\n",
"cons 0.4.7\n",
"contourpy 1.3.3\n",
"cramjam 2.11.0\n",
"cryptography 43.0.3\n",
"cuda-bindings 12.9.5\n",
"cuda-core 0.3.2\n",
"cuda-pathfinder 1.3.3\n",
"cuda-python 12.9.5\n",
"cuda-toolkit 12.9.1\n",
"cudf-cu12 25.10.0\n",
"cudf-polars-cu12 25.10.0\n",
"cufflinks 0.17.3\n",
"cuml-cu12 25.10.0\n",
"cupy-cuda12x 13.6.0\n",
"curl_cffi 0.14.0\n",
"cvxopt 1.3.2\n",
"cvxpy 1.6.7\n",
"cycler 0.12.1\n",
"cyipopt 1.5.0\n",
"cymem 2.0.13\n",
"Cython 3.0.12\n",
"dask 2025.9.1\n",
"dask-cuda 25.10.0\n",
"dask-cudf-cu12 25.10.0\n",
"dataproc-spark-connect 1.0.1\n",
"datasets 4.0.0\n",
"db-dtypes 1.5.0\n",
"dbus-python 1.2.18\n",
"debugpy 1.8.15\n",
"decorator 4.4.2\n",
"defusedxml 0.7.1\n",
"deprecation 2.1.0\n",
"diffusers 0.36.0\n",
"dill 0.3.8\n",
"distributed 2025.9.1\n",
"distributed-ucxx-cu12 0.46.0\n",
"distro 1.9.0\n",
"dlib 19.24.6\n",
"dm-tree 0.1.9\n",
"docstring_parser 0.17.0\n",
"docutils 0.21.2\n",
"dopamine_rl 4.1.2\n",
"duckdb 1.3.2\n",
"earthengine-api 1.5.24\n",
"easydict 1.13\n",
"editdistance 0.8.1\n",
"eerepr 0.1.2\n",
"einops 0.8.1\n",
"en_core_web_sm 3.8.0\n",
"entrypoints 0.4\n",
"esda 2.8.1\n",
"et_xmlfile 2.0.0\n",
"etils 1.13.0\n",
"etuples 0.3.10\n",
"Farama-Notifications 0.0.4\n",
"fastai 2.8.6\n",
"fastapi 0.123.10\n",
"fastcore 1.11.2\n",
"fastdownload 0.0.7\n",
"fastjsonschema 2.21.2\n",
"fastlite 0.2.3\n",
"fastprogress 1.1.3\n",
"fastrlock 0.8.3\n",
"fasttransform 0.0.2\n",
"ffmpy 1.0.0\n",
"filelock 3.20.2\n",
"fiona 1.10.1\n",
"firebase-admin 6.9.0\n",
"Flask 3.1.2\n",
"flatbuffers 25.12.19\n",
"flax 0.10.7\n",
"folium 0.20.0\n",
"fonttools 4.61.1\n",
"fqdn 1.5.1\n",
"frozendict 2.4.7\n",
"frozenlist 1.8.0\n",
"fsspec 2025.3.0\n",
"future 1.0.0\n",
"gast 0.7.0\n",
"gcsfs 2025.3.0\n",
"GDAL 3.8.4\n",
"gdown 5.2.0\n",
"geemap 0.35.3\n",
"geocoder 1.38.1\n",
"geographiclib 2.1\n",
"geopandas 1.1.2\n",
"geopy 2.4.1\n",
"giddy 2.3.8\n",
"gin-config 0.5.0\n",
"gitdb 4.0.12\n",
"GitPython 3.1.46\n",
"glob2 0.7\n",
"google 3.0.0\n",
"google-adk 1.21.0\n",
"google-ai-generativelanguage 0.6.15\n",
"google-api-core 2.29.0\n",
"google-api-python-client 2.187.0\n",
"google-auth 2.43.0\n",
"google-auth-httplib2 0.3.0\n",
"google-auth-oauthlib 1.2.2\n",
"google-cloud-aiplatform 1.130.0\n",
"google-cloud-appengine-logging 1.7.0\n",
"google-cloud-audit-log 0.4.0\n",
"google-cloud-bigquery 3.40.0\n",
"google-cloud-bigquery-connection 1.19.0\n",
"google-cloud-bigquery-storage 2.36.0\n",
"google-cloud-bigtable 2.35.0\n",
"google-cloud-core 2.5.0\n",
"google-cloud-dataproc 5.23.0\n",
"google-cloud-datastore 2.23.0\n",
"google-cloud-discoveryengine 0.13.12\n",
"google-cloud-firestore 2.22.0\n",
"google-cloud-functions 1.21.0\n",
"google-cloud-language 2.18.0\n",
"google-cloud-logging 3.13.0\n",
"google-cloud-monitoring 2.28.0\n",
"google-cloud-resource-manager 1.15.0\n",
"google-cloud-secret-manager 2.26.0\n",
"google-cloud-spanner 3.61.0\n",
"google-cloud-speech 2.35.0\n",
"google-cloud-storage 3.7.0\n",
"google-cloud-trace 1.17.0\n",
"google-cloud-translate 3.23.0\n",
"google-colab 1.0.0\n",
"google-crc32c 1.8.0\n",
"google-genai 1.55.0\n",
"google-generativeai 0.8.6\n",
"google-pasta 0.2.0\n",
"google-resumable-media 2.8.0\n",
"googleapis-common-protos 1.72.0\n",
"googledrivedownloader 1.1.0\n",
"gradio 5.50.0\n",
"gradio_client 1.14.0\n",
"graphviz 0.21\n",
"greenlet 3.3.0\n",
"groovy 0.1.2\n",
"grpc-google-iam-v1 0.14.3\n",
"grpc-interceptor 0.15.4\n",
"grpcio 1.76.0\n",
"grpcio-status 1.71.2\n",
"grpclib 0.4.9\n",
"gspread 6.2.1\n",
"gspread-dataframe 4.0.0\n",
"gym 0.25.2\n",
"gym-notices 0.1.0\n",
"gymnasium 1.2.3\n",
"h11 0.16.0\n",
"h2 4.3.0\n",
"h5netcdf 1.7.3\n",
"h5py 3.15.1\n",
"hdbscan 0.8.41\n",
"hf_transfer 0.1.9\n",
"hf-xet 1.2.0\n",
"highspy 1.12.0\n",
"holidays 0.88\n",
"holoviews 1.22.1\n",
"hpack 4.1.0\n",
"html5lib 1.1\n",
"httpcore 1.0.9\n",
"httpimport 1.4.1\n",
"httplib2 0.31.0\n",
"httptools 0.7.1\n",
"httpx 0.28.1\n",
"httpx-sse 0.4.3\n",
"huggingface-hub 0.36.0\n",
"humanize 4.15.0\n",
"hyperframe 6.1.0\n",
"hyperopt 0.2.7\n",
"ibis-framework 9.5.0\n",
"idna 3.11\n",
"ImageIO 2.37.2\n",
"imageio-ffmpeg 0.6.0\n",
"imagesize 1.4.1\n",
"imbalanced-learn 0.14.1\n",
"immutabledict 4.2.2\n",
"importlib_metadata 8.7.1\n",
"importlib_resources 6.5.2\n",
"imutils 0.5.4\n",
"inequality 1.1.2\n",
"inflect 7.5.0\n",
"iniconfig 2.3.0\n",
"intel-cmplr-lib-ur 2025.3.1\n",
"intel-openmp 2025.3.1\n",
"ipyevents 2.0.4\n",
"ipyfilechooser 0.6.0\n",
"ipykernel 6.17.1\n",
"ipyleaflet 0.20.0\n",
"ipyparallel 8.8.0\n",
"ipython 7.34.0\n",
"ipython-genutils 0.2.0\n",
"ipython-sql 0.5.0\n",
"ipytree 0.2.2\n",
"ipywidgets 7.7.1\n",
"isoduration 20.11.0\n",
"itsdangerous 2.2.0\n",
"jaraco.classes 3.4.0\n",
"jaraco.context 6.0.2\n",
"jaraco.functools 4.4.0\n",
"jax 0.7.2\n",
"jax-cuda12-pjrt 0.7.2\n",
"jax-cuda12-plugin 0.7.2\n",
"jaxlib 0.7.2\n",
"jeepney 0.9.0\n",
"jieba 0.42.1\n",
"Jinja2 3.1.6\n",
"jiter 0.12.0\n",
"joblib 1.5.3\n",
"jsonpatch 1.33\n",
"jsonpickle 4.1.1\n",
"jsonpointer 3.0.0\n",
"jsonschema 4.26.0\n",
"jsonschema-specifications 2025.9.1\n",
"jupyter_client 7.4.9\n",
"jupyter-console 6.6.3\n",
"jupyter_core 5.9.1\n",
"jupyter-events 0.12.0\n",
"jupyter_kernel_gateway 2.5.2\n",
"jupyter-leaflet 0.20.0\n",
"jupyter_server 2.14.0\n",
"jupyter_server_terminals 0.5.3\n",
"jupyterlab_pygments 0.3.0\n",
"jupyterlab_widgets 3.0.16\n",
"jupytext 1.18.1\n",
"kaggle 1.7.4.5\n",
"kagglehub 0.3.13\n",
"keras 3.10.0\n",
"keras-hub 0.21.1\n",
"keras-nlp 0.21.1\n",
"keyring 25.7.0\n",
"keyrings.google-artifactregistry-auth 1.1.2\n",
"kiwisolver 1.4.9\n",
"langchain 1.2.3\n",
"langchain-core 1.2.6\n",
"langgraph 1.0.5\n",
"langgraph-checkpoint 3.0.1\n",
"langgraph-prebuilt 1.0.5\n",
"langgraph-sdk 0.3.1\n",
"langsmith 0.6.1\n",
"lark 1.3.1\n",
"launchpadlib 1.10.16\n",
"lazr.restfulclient 0.14.4\n",
"lazr.uri 1.0.6\n",
"lazy_loader 0.4\n",
"libclang 18.1.1\n",
"libcudf-cu12 25.10.0\n",
"libcugraph-cu12 25.10.1\n",
"libcuml-cu12 25.10.0\n",
"libkvikio-cu12 25.10.0\n",
"libpysal 4.14.0\n",
"libraft-cu12 25.10.0\n",
"librmm-cu12 25.10.0\n",
"librosa 0.11.0\n",
"libucx-cu12 1.19.0\n",
"libucxx-cu12 0.46.0\n",
"lightgbm 4.6.0\n",
"linkify-it-py 2.0.3\n",
"llvmlite 0.43.0\n",
"locket 1.0.0\n",
"logical-unification 0.4.7\n",
"lxml 6.0.2\n",
"Mako 1.3.10\n",
"mapclassify 2.10.0\n",
"Markdown 3.10\n",
"markdown-it-py 4.0.0\n",
"MarkupSafe 3.0.3\n",
"matplotlib 3.10.0\n",
"matplotlib-inline 0.2.1\n",
"matplotlib-venn 1.1.2\n",
"mcp 1.25.0\n",
"mdit-py-plugins 0.5.0\n",
"mdurl 0.1.2\n",
"mgwr 2.2.1\n",
"miniKanren 1.0.5\n",
"missingno 0.5.2\n",
"mistune 3.2.0\n",
"mizani 0.13.5\n",
"mkl 2025.3.0\n",
"ml_dtypes 0.5.4\n",
"mlxtend 0.23.4\n",
"mmh3 5.2.0\n",
"momepy 0.11.0\n",
"more-itertools 10.8.0\n",
"moviepy 1.0.3\n",
"mpmath 1.3.0\n",
"msgpack 1.1.2\n",
"multidict 6.7.0\n",
"multipledispatch 1.0.0\n",
"multiprocess 0.70.16\n",
"multitasking 0.0.12\n",
"murmurhash 1.0.15\n",
"music21 9.9.1\n",
"namex 0.1.0\n",
"narwhals 2.15.0\n",
"natsort 8.4.0\n",
"nbclassic 1.3.3\n",
"nbclient 0.10.4\n",
"nbconvert 7.16.6\n",
"nbformat 5.10.4\n",
"ndindex 1.10.1\n",
"nest-asyncio 1.6.0\n",
"networkx 3.6.1\n",
"nibabel 5.3.3\n",
"nltk 3.9.1\n",
"notebook 6.5.7\n",
"notebook_shim 0.2.4\n",
"numba 0.60.0\n",
"numba-cuda 0.19.1\n",
"numexpr 2.14.1\n",
"numpy 2.0.2\n",
"nvidia-cublas-cu12 12.6.4.1\n",
"nvidia-cuda-cccl-cu12 12.9.27\n",
"nvidia-cuda-cupti-cu12 12.6.80\n",
"nvidia-cuda-nvcc-cu12 12.5.82\n",
"nvidia-cuda-nvrtc-cu12 12.6.77\n",
"nvidia-cuda-runtime-cu12 12.6.77\n",
"nvidia-cudnn-cu12 9.10.2.21\n",
"nvidia-cufft-cu12 11.3.0.4\n",
"nvidia-cufile-cu12 1.11.1.6\n",
"nvidia-curand-cu12 10.3.7.77\n",
"nvidia-cusolver-cu12 11.7.1.2\n",
"nvidia-cusparse-cu12 12.5.4.2\n",
"nvidia-cusparselt-cu12 0.7.1\n",
"nvidia-ml-py 13.590.44\n",
"nvidia-nccl-cu12 2.27.5\n",
"nvidia-nvjitlink-cu12 12.6.85\n",
"nvidia-nvshmem-cu12 3.3.20\n",
"nvidia-nvtx-cu12 12.6.77\n",
"nvtx 0.2.14\n",
"nx-cugraph-cu12 25.10.0\n",
"oauth2client 4.1.3\n",
"oauthlib 3.3.1\n",
"omegaconf 2.3.0\n",
"onemkl-license 2025.3.0\n",
"openai 2.14.0\n",
"opencv-contrib-python 4.12.0.88\n",
"opencv-python 4.12.0.88\n",
"opencv-python-headless 4.12.0.88\n",
"openpyxl 3.1.5\n",
"opentelemetry-api 1.37.0\n",
"opentelemetry-exporter-gcp-logging 1.11.0a0\n",
"opentelemetry-exporter-gcp-monitoring 1.11.0a0\n",
"opentelemetry-exporter-gcp-trace 1.11.0\n",
"opentelemetry-exporter-otlp-proto-common 1.37.0\n",
"opentelemetry-exporter-otlp-proto-http 1.37.0\n",
"opentelemetry-proto 1.37.0\n",
"opentelemetry-resourcedetector-gcp 1.11.0a0\n",
"opentelemetry-sdk 1.37.0\n",
"opentelemetry-semantic-conventions 0.58b0\n",
"opt_einsum 3.4.0\n",
"optax 0.2.6\n",
"optree 0.18.0\n",
"orbax-checkpoint 0.11.31\n",
"orjson 3.11.5\n",
"ormsgpack 1.12.1\n",
"osqp 1.0.5\n",
"overrides 7.7.0\n",
"packaging 25.0\n",
"pandas 2.2.2\n",
"pandas-datareader 0.10.0\n",
"pandas-gbq 0.30.0\n",
"pandas-stubs 2.2.2.240909\n",
"pandocfilters 1.5.1\n",
"panel 1.8.5\n",
"param 2.3.1\n",
"parso 0.8.5\n",
"parsy 2.2\n",
"partd 1.4.2\n",
"patsy 1.0.2\n",
"peewee 3.19.0\n",
"peft 0.18.0\n",
"pexpect 4.9.0\n",
"pickleshare 0.7.5\n",
"pillow 11.3.0\n",
"pip 24.1.2\n",
"platformdirs 4.5.1\n",
"plotly 5.24.1\n",
"plotnine 0.14.5\n",
"pluggy 1.6.0\n",
"plum-dispatch 2.6.1\n",
"ply 3.11\n",
"pointpats 2.5.2\n",
"polars 1.31.0\n",
"pooch 1.8.2\n",
"portpicker 1.5.2\n",
"preshed 3.0.12\n",
"prettytable 3.17.0\n",
"proglog 0.1.12\n",
"progressbar2 4.5.0\n",
"prometheus_client 0.23.1\n",
"promise 2.3\n",
"prompt_toolkit 3.0.52\n",
"propcache 0.4.1\n",
"prophet 1.2.1\n",
"proto-plus 1.27.0\n",
"protobuf 5.29.5\n",
"psutil 5.9.5\n",
"psycopg2 2.9.11\n",
"psygnal 0.15.1\n",
"ptyprocess 0.7.0\n",
"PuLP 3.3.0\n",
"py-cpuinfo 9.0.0\n",
"py4j 0.10.9.9\n",
"pyarrow 18.1.0\n",
"pyasn1 0.6.1\n",
"pyasn1_modules 0.4.2\n",
"pycairo 1.29.0\n",
"pycocotools 2.0.11\n",
"pycparser 2.23\n",
"pycryptodomex 3.23.0\n",
"pydantic 2.12.3\n",
"pydantic_core 2.41.4\n",
"pydantic-settings 2.12.0\n",
"pydata-google-auth 1.9.1\n",
"pydot 4.0.1\n",
"pydotplus 2.0.2\n",
"PyDrive2 1.21.3\n",
"pydub 0.25.1\n",
"pyerfa 2.0.1.5\n",
"pygame 2.6.1\n",
"pygit2 1.19.1\n",
"Pygments 2.19.2\n",
"PyGObject 3.48.2\n",
"PyJWT 2.10.1\n",
"pylibcudf-cu12 25.10.0\n",
"pylibcugraph-cu12 25.10.1\n",
"pylibraft-cu12 25.10.0\n",
"pymc 5.27.0\n",
"pynndescent 0.6.0\n",
"pyogrio 0.12.1\n",
"pyomo 6.9.5\n",
"PyOpenGL 3.1.10\n",
"pyOpenSSL 24.2.1\n",
"pyparsing 3.3.1\n",
"pyperclip 1.11.0\n",
"pyproj 3.7.2\n",
"pysal 25.7\n",
"pyshp 3.0.3\n",
"PySocks 1.7.1\n",
"pyspark 4.0.1\n",
"pytensor 2.36.3\n",
"pytest 8.4.2\n",
"python-apt 0.0.0\n",
"python-box 7.3.2\n",
"python-dateutil 2.9.0.post0\n",
"python-dotenv 1.2.1\n",
"python-fasthtml 0.12.37\n",
"python-json-logger 4.0.0\n",
"python-louvain 0.16\n",
"python-multipart 0.0.21\n",
"python-slugify 8.0.4\n",
"python-snappy 0.7.3\n",
"python-utils 3.9.1\n",
"pytz 2025.2\n",
"pyviz_comms 3.0.6\n",
"PyWavelets 1.9.0\n",
"PyYAML 6.0.3\n",
"pyzmq 26.2.1\n",
"quantecon 0.10.1\n",
"raft-dask-cu12 25.10.0\n",
"rapids-dask-dependency 25.10.0\n",
"rapids-logger 0.1.19\n",
"rasterio 1.5.0\n",
"rasterstats 0.20.0\n",
"ratelim 0.1.6\n",
"referencing 0.37.0\n",
"regex 2025.11.3\n",
"requests 2.32.4\n",
"requests-oauthlib 2.0.0\n",
"requests-toolbelt 1.0.0\n",
"requirements-parser 0.9.0\n",
"rfc3339-validator 0.1.4\n",
"rfc3986-validator 0.1.1\n",
"rfc3987-syntax 1.1.0\n",
"rich 13.9.4\n",
"rmm-cu12 25.10.0\n",
"roman-numerals 4.1.0\n",
"roman-numerals-py 4.1.0\n",
"rpds-py 0.30.0\n",
"rpy2 3.5.17\n",
"rsa 4.9.1\n",
"rtree 1.4.1\n",
"ruff 0.14.11\n",
"safehttpx 0.1.7\n",
"safetensors 0.7.0\n",
"scikit-image 0.25.2\n",
"scikit-learn 1.6.1\n",
"scipy 1.16.3\n",
"scooby 0.11.0\n",
"scs 3.2.10\n",
"seaborn 0.13.2\n",
"SecretStorage 3.5.0\n",
"segregation 2.5.3\n",
"semantic-version 2.10.0\n",
"Send2Trash 2.0.0\n",
"sentence-transformers 5.2.0\n",
"sentencepiece 0.2.1\n",
"sentry-sdk 2.49.0\n",
"setuptools 75.2.0\n",
"shap 0.50.0\n",
"shapely 2.1.2\n",
"shellingham 1.5.4\n",
"simple-parsing 0.1.7\n",
"simplejson 3.20.2\n",
"simsimd 6.5.12\n",
"six 1.17.0\n",
"sklearn-compat 0.1.5\n",
"sklearn-pandas 2.2.0\n",
"slicer 0.0.8\n",
"smart_open 7.5.0\n",
"smmap 5.0.2\n",
"sniffio 1.3.1\n",
"snowballstemmer 3.0.1\n",
"sortedcontainers 2.4.0\n",
"soundfile 0.13.1\n",
"soupsieve 2.8.1\n",
"soxr 1.0.0\n",
"spacy 3.8.11\n",
"spacy-legacy 3.0.12\n",
"spacy-loggers 1.0.5\n",
"spaghetti 1.7.6\n",
"spanner-graph-notebook 1.1.8\n",
"spglm 1.1.0\n",
"Sphinx 8.2.3\n",
"sphinxcontrib-applehelp 2.0.0\n",
"sphinxcontrib-devhelp 2.0.0\n",
"sphinxcontrib-htmlhelp 2.1.0\n",
"sphinxcontrib-jsmath 1.0.1\n",
"sphinxcontrib-qthelp 2.0.0\n",
"sphinxcontrib-serializinghtml 2.0.0\n",
"spint 1.0.7\n",
"splot 1.1.7\n",
"spopt 0.7.0\n",
"spreg 1.8.4\n",
"SQLAlchemy 2.0.45\n",
"sqlalchemy-spanner 1.17.2\n",
"sqlglot 25.20.2\n",
"sqlparse 0.5.5\n",
"srsly 2.5.2\n",
"sse-starlette 3.1.2\n",
"stanio 0.5.1\n",
"starlette 0.50.0\n",
"statsmodels 0.14.6\n",
"stringzilla 4.6.0\n",
"stumpy 1.13.0\n",
"sympy 1.14.0\n",
"tables 3.10.2\n",
"tabulate 0.9.0\n",
"tbb 2022.3.0\n",
"tblib 3.2.2\n",
"tcmlib 1.4.1\n",
"tenacity 9.1.2\n",
"tensorboard 2.19.0\n",
"tensorboard-data-server 0.7.2\n",
"tensorflow 2.19.0\n",
"tensorflow-datasets 4.9.9\n",
"tensorflow_decision_forests 1.12.0\n",
"tensorflow-hub 0.16.1\n",
"tensorflow-metadata 1.17.2\n",
"tensorflow-probability 0.25.0\n",
"tensorflow-text 2.19.0\n",
"tensorstore 0.1.80\n",
"termcolor 3.3.0\n",
"terminado 0.18.1\n",
"text-unidecode 1.3\n",
"textblob 0.19.0\n",
"tf_keras 2.19.0\n",
"tf-slim 1.1.0\n",
"thinc 8.3.10\n",
"threadpoolctl 3.6.0\n",
"tifffile 2025.12.20\n",
"tiktoken 0.12.0\n",
"timm 1.0.24\n",
"tinycss2 1.4.0\n",
"tobler 0.13.0\n",
"tokenizers 0.22.2\n",
"toml 0.10.2\n",
"tomlkit 0.13.3\n",
"toolz 0.12.1\n",
"torch 2.9.0+cu126\n",
"torchao 0.10.0\n",
"torchaudio 2.9.0+cu126\n",
"torchdata 0.11.0\n",
"torchsummary 1.5.1\n",
"torchtune 0.6.1\n",
"torchvision 0.24.0+cu126\n",
"tornado 6.5.1\n",
"tqdm 4.67.1\n",
"traitlets 5.7.1\n",
"traittypes 0.2.3\n",
"transformers 4.57.3\n",
"treelite 4.4.1\n",
"treescope 0.1.10\n",
"triton 3.5.0\n",
"tsfresh 0.21.1\n",
"tweepy 4.16.0\n",
"typeguard 4.4.4\n",
"typer 0.21.1\n",
"typer-slim 0.21.1\n",
"types-pytz 2025.2.0.20251108\n",
"types-setuptools 80.9.0.20250822\n",
"typing_extensions 4.15.0\n",
"typing-inspection 0.4.2\n",
"tzdata 2025.3\n",
"tzlocal 5.3.1\n",
"uc-micro-py 1.0.3\n",
"ucxx-cu12 0.46.0\n",
"umap-learn 0.5.9.post2\n",
"umf 1.0.2\n",
"uri-template 1.3.0\n",
"uritemplate 4.2.0\n",
"urllib3 2.5.0\n",
"uuid_utils 0.13.0\n",
"uvicorn 0.40.0\n",
"uvloop 0.22.1\n",
"vega-datasets 0.9.0\n",
"wadllib 1.3.6\n",
"wandb 0.23.1\n",
"wasabi 1.1.3\n",
"watchdog 6.0.0\n",
"watchfiles 1.1.1\n",
"wcwidth 0.2.14\n",
"weasel 0.4.3\n",
"webcolors 25.10.0\n",
"webencodings 0.5.1\n",
"websocket-client 1.9.0\n",
"websockets 15.0.1\n",
"Werkzeug 3.1.5\n",
"wheel 0.45.1\n",
"widgetsnbextension 3.6.10\n",
"wordcloud 1.9.5\n",
"wrapt 2.0.1\n",
"wurlitzer 3.1.1\n",
"xarray 2025.12.0\n",
"xarray-einstats 0.9.1\n",
"xgboost 3.1.2\n",
"xlrd 2.0.2\n",
"xxhash 3.6.0\n",
"xyzservices 2025.11.0\n",
"yarl 1.22.0\n",
"ydf 0.13.0\n",
"yellowbrick 1.5\n",
"yfinance 0.2.66\n",
"zict 3.0.0\n",
"zipp 3.23.0\n",
"zstandard 0.25.0\n"
]
}
],
......@@ -217,9 +853,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:base] *",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "conda-base-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
......@@ -231,7 +867,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"version": "3.12.12"
}
},
"nbformat": 4,
......
......@@ -18,9 +18,13 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 1,
"id": "6d20c462-711f-4f72-abbf-8bab4117e9d7",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [],
"source": [
"import numpy as np\n",
......@@ -30,15 +34,19 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 2,
"id": "99ad7f98-138d-45e4-9268-f42cb3a98f21",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"120 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
"3.93 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
]
}
],
......@@ -48,15 +56,19 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 3,
"id": "0d3af9b3-f418-44ed-8a23-e30aa188fd98",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.000225 s\n"
"0.000097 s\n"
]
}
],
......@@ -71,9 +83,13 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 4,
"id": "02440471-d8c6-4ba8-8a89-e3f8c0dcca0a",
"metadata": {},
"metadata": {
"vscode": {
"languageId": "python"
}
},
"outputs": [
{
"name": "stdout",
......@@ -99,21 +115,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:base] *",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
"name": "python3"
}
},
"nbformat": 4,
......
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "b6d3aebb",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"ref:https://cuda-tutorial.readthedocs.io/en/latest/tutorials/tutorial01/"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8eff613",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"outputs": [],
"source": [
"import cupy\n",
"\n",
"# size of the vectors\n",
"size = 1024\n",
"\n",
"# allocating and populating the vectors\n",
"a_gpu = cupy.random.rand(size, dtype=cupy.float32)\n",
"b_gpu = cupy.random.rand(size, dtype=cupy.float32)\n",
"c_gpu = cupy.zeros(size, dtype=cupy.float32)\n",
"\n",
"# CUDA vector_add\n",
"vector_add_cuda_code = r'''\n",
"extern \"C\"\n",
"__global__ void vector_add(const float * A, const float * B, float * C, const int size)\n",
"{\n",
" int item = threadIdx.x;\n",
" C[item] = A[item] + B[item];\n",
"}\n",
"'''\n",
"vector_add_gpu = cupy.RawKernel(vector_add_cuda_code, \"vector_add\")\n",
"\n",
"vector_add_gpu((1, 1, 1), (size, 1, 1), (a_gpu, b_gpu, c_gpu, size))"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "5TxHrnfxMYtK"
},
"source": [
"ref: https://cuda-tutorial.readthedocs.io/en/latest/tutorials/tutorial01/"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "bz0VYqONOBe1",
"outputId": "ef1b5673-0037-42a1-d1b6-44ca770bcec7"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"nvcc: NVIDIA (R) Cuda compiler driver\n",
"Copyright (c) 2005-2023 NVIDIA Corporation\n",
"Built on Tue_Aug_15_22:02:13_PDT_2023\n",
"Cuda compilation tools, release 12.2, V12.2.140\n",
"Build cuda_12.2.r12.2/compiler.33191640_0\n"
]
}
],
"source": [
"!nvcc --version"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "y5UasDOL-GN1",
"outputId": "e419a790-c080-4ff3-fcaf-2e7f9879f163"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing add1.cu\n"
]
}
],
"source": [
"%%writefile add1.cu\n",
"#include <stdio.h>\n",
"#include <stdlib.h>\n",
"__global__ void add(int *a, int *b) {\n",
"a[0]+= b[0];\n",
"}\n",
"\n",
"int main() {\n",
"int a, b;\n",
"\n",
"// host copies of variables a, b\n",
"int *d_a, *d_b;\n",
"\n",
"// device copies of variables a, b\n",
"int size = sizeof(int);\n",
"\n",
"// Allocate space for device copies of a, b\n",
"cudaMalloc(&d_a, size);\n",
"cudaMalloc(&d_b, size);\n",
"\n",
"// Setup input values\n",
"a = 5;\n",
"b = 100;\n",
"\n",
"// Copy inputs to device\n",
"cudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice);\n",
"cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);\n",
"\n",
"// Launch add() kernel on GPU\n",
"add<<<1,1>>>(d_a, d_b);\n",
"\n",
"// Copy result back to host\n",
"cudaError err = cudaMemcpy(&a, d_a, size, cudaMemcpyDeviceToHost);\n",
" if(err!=cudaSuccess) {\n",
" printf(\"CUDA error copying to Host: %s\\n\", cudaGetErrorString(err));\n",
" }\n",
"printf(\"result is %d\\n\",a);\n",
"\n",
"// Cleanup\n",
"cudaFree(d_a);\n",
"cudaFree(d_b);\n",
"\n",
"return 0;\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "i4mVKFx97KTx",
"outputId": "bc0cb70b-7f84-44ca-c5cd-7e9fc648065c"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==4619== NVPROF is profiling process 4619, command: ./out\n",
"result is 105\n",
"==4619== Profiling application: ./out\n",
"==4619== Profiling result:\n",
" Type Time(%) Time Calls Avg Min Max Name\n",
" GPU activities: 46.26% 3.3600us 1 3.3600us 3.3600us 3.3600us add(int*, int*)\n",
" 28.63% 2.0800us 1 2.0800us 2.0800us 2.0800us [CUDA memcpy DtoH]\n",
" 25.11% 1.8240us 2 912ns 640ns 1.1840us [CUDA memcpy HtoD]\n",
" API calls: 53.54% 150.98ms 2 75.492ms 7.5110us 150.98ms cudaMalloc\n",
" 46.28% 130.52ms 1 130.52ms 130.52ms 130.52ms cudaLaunchKernel\n",
" 0.08% 215.40us 2 107.70us 34.649us 180.76us cudaFree\n",
" 0.07% 194.04us 114 1.7020us 258ns 75.755us cuDeviceGetAttribute\n",
" 0.03% 72.888us 3 24.296us 6.7490us 38.437us cudaMemcpy\n",
" 0.00% 11.698us 1 11.698us 11.698us 11.698us cuDeviceGetName\n",
" 0.00% 8.3590us 1 8.3590us 8.3590us 8.3590us cuDeviceGetPCIBusId\n",
" 0.00% 5.0250us 1 5.0250us 5.0250us 5.0250us cuDeviceTotalMem\n",
" 0.00% 1.9900us 3 663ns 339ns 1.3000us cuDeviceGetCount\n",
" 0.00% 1.0380us 2 519ns 315ns 723ns cuDeviceGet\n",
" 0.00% 642ns 1 642ns 642ns 642ns cuModuleGetLoadingMode\n",
" 0.00% 357ns 1 357ns 357ns 357ns cuDeviceGetUuid\n"
]
}
],
"source": [
"!nvcc add1.cu -o out\n",
"!nvprof ./out"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"id": "xQMHTBWR7x4o",
"outputId": "71790427-a2b9-4f49-dde0-ed1bb2043b53"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting add2.cu\n"
]
}
],
"source": [
"%%writefile add2.cu\n",
"#include <stdio.h>\n",
"\n",
"#define N 10000\n",
"\n",
"\n",
"__global__ void vector_add(float *out, float *a, float *b, int n) {\n",
" for(int i = 0; i < n; i += 1){\n",
" out[i] = a[i] + b[i];\n",
" }\n",
"}\n",
"\n",
"int main(){\n",
" float *a, *b, *out;\n",
" float *d_a, *d_b, *d_out;\n",
"\n",
" // Allocate host memory\n",
" a = (float*)malloc(sizeof(float) * N);\n",
" b = (float*)malloc(sizeof(float) * N);\n",
" out = (float*)malloc(sizeof(float) * N);\n",
"\n",
" // Initialize host arrays\n",
" for(int i = 0; i < N; i++){\n",
" a[i] = i;\n",
" b[i] = 5;\n",
" }\n",
"\n",
" // Allocate device memory\n",
" cudaMalloc((void**)&d_a, sizeof(float) * N);\n",
" cudaMalloc((void**)&d_b, sizeof(float) * N);\n",
" cudaMalloc((void**)&d_out, sizeof(float) * N);\n",
"\n",
" // Transfer data from host to device memory\n",
" cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice);\n",
" cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice);\n",
"\n",
" // Executing kernel\n",
" vector_add<<<1,256>>>(d_out, d_a, d_b, N);\n",
"\n",
" // Transfer data back to host memory\n",
" cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost);\n",
"\n",
" // Verification\n",
"\n",
"\n",
"\n",
"\n",
" // Deallocate device memory\n",
" cudaFree(d_a);\n",
" cudaFree(d_b);\n",
" cudaFree(d_out);\n",
"\n",
" // Deallocate host memory\n",
" free(a);\n",
" free(b);\n",
" free(out);\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 319
},
"id": "14oI7-QW8EPy",
"outputId": "f2648648-a7a3-49ca-9750-70ba060441cd"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==596== NVPROF is profiling process 596, command: ./out\n",
"==596== Profiling application: ./out\n",
"==596== Profiling result:\n",
" Type Time(%) Time Calls Avg Min Max Name\n",
" GPU activities: 98.52% 1.0440ms 1 1.0440ms 1.0440ms 1.0440ms vector_add(float*, float*, float*, int)\n",
" 1.09% 11.550us 2 5.7750us 5.7590us 5.7910us [CUDA memcpy HtoD]\n",
" 0.39% 4.0960us 1 4.0960us 4.0960us 4.0960us [CUDA memcpy DtoH]\n",
" API calls: 99.00% 176.44ms 3 58.814ms 6.7960us 176.43ms cudaMalloc\n",
" 0.65% 1.1567ms 3 385.58us 26.484us 1.0936ms cudaMemcpy\n",
" 0.20% 353.24us 1 353.24us 353.24us 353.24us cuDeviceTotalMem\n",
" 0.07% 131.50us 96 1.3690us 127ns 47.376us cuDeviceGetAttribute\n",
" 0.05% 94.987us 3 31.662us 4.9660us 79.056us cudaFree\n",
" 0.02% 27.053us 1 27.053us 27.053us 27.053us cudaLaunchKernel\n",
" 0.01% 15.460us 1 15.460us 15.460us 15.460us cuDeviceGetName\n",
" 0.00% 2.6340us 1 2.6340us 2.6340us 2.6340us cuDeviceGetPCIBusId\n",
" 0.00% 1.7800us 3 593ns 186ns 1.0060us cuDeviceGetCount\n",
" 0.00% 827ns 2 413ns 248ns 579ns cuDeviceGet\n",
" 0.00% 255ns 1 255ns 255ns 255ns cuDeviceGetUuid\n"
]
}
],
"source": [
"!nvcc add2.cu -o out\n",
"!nvprof ./out"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"id": "bgHifWBCOmms",
"outputId": "53a3e591-7a07-4e2f-ad7e-265ad9c63a39"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Writing add3.cu\n"
]
}
],
"source": [
"%%writefile add3.cu\n",
"#include <stdio.h>\n",
"\n",
"#define N 10000\n",
"#define MAX_ER 1e-6\n",
"\n",
"__global__ void vector_add(float *out, float *a, float *b, int n) {\n",
" int index = threadIdx.x;\n",
" int stride = blockDim.x;\n",
"\n",
" for(int i = index; i < n; i += stride){\n",
" out[i] = a[i] + b[i];\n",
" }\n",
"}\n",
"\n",
"int main(){\n",
" float *a, *b, *out;\n",
" float *d_a, *d_b, *d_out;\n",
"\n",
" // Allocate host memory\n",
" a = (float*)malloc(sizeof(float) * N);\n",
" b = (float*)malloc(sizeof(float) * N);\n",
" out = (float*)malloc(sizeof(float) * N);\n",
"\n",
" // Initialize host arrays\n",
" for(int i = 0; i < N; i++){\n",
" a[i] = i+1;\n",
" b[i] = 26;\n",
" }\n",
"\n",
" // Allocate device memory\n",
" cudaMalloc((void**)&d_a, sizeof(float) * N);\n",
" cudaMalloc((void**)&d_b, sizeof(float) * N);\n",
" cudaMalloc((void**)&d_out, sizeof(float) * N);\n",
"\n",
" // Transfer data from host to device memory\n",
" cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice);\n",
" cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice);\n",
"\n",
" // Executing kernel\n",
" vector_add<<<1,512>>>(d_out, d_a, d_b, N);\n",
"\n",
" // Transfer data back to host memory\n",
" cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost);\n",
"\n",
" // Verification\n",
" //for(int i = 0; i < N; i++){\n",
" // printf(\"%f\\n\",out[i]);\n",
"// }\n",
"\n",
"\n",
"\n",
" // Deallocate device memory\n",
" cudaFree(d_a);\n",
" cudaFree(d_b);\n",
" cudaFree(d_out);\n",
"\n",
" // Deallocate host memory\n",
" free(a);\n",
" free(b);\n",
" free(out);\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"id": "hqZBkrd8x0k4",
"outputId": "48dfa7c6-35ed-43c0-b029-c30e810be629"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Linux\n"
]
}
],
"source": [
"! uname"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 319
},
"id": "tj8bqWhN6_ei",
"outputId": "bd822a17-d13a-486f-d111-ea3d300dfdbb"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==186== NVPROF is profiling process 186, command: ./out\n",
"==186== Profiling application: ./out\n",
"==186== Profiling result:\n",
" Type Time(%) Time Calls Avg Min Max Name\n",
" GPU activities: 50.83% 14.591us 2 7.2950us 7.2950us 7.2960us [CUDA memcpy HtoD]\n",
" 34.45% 9.8880us 1 9.8880us 9.8880us 9.8880us vector_add(float*, float*, float*, int)\n",
" 14.72% 4.2240us 1 4.2240us 4.2240us 4.2240us [CUDA memcpy DtoH]\n",
" API calls: 99.74% 261.61ms 3 87.202ms 5.2700us 261.59ms cudaMalloc\n",
" 0.09% 239.89us 96 2.4980us 127ns 159.76us cuDeviceGetAttribute\n",
" 0.07% 184.03us 1 184.03us 184.03us 184.03us cuDeviceTotalMem\n",
" 0.04% 104.40us 3 34.800us 28.086us 48.080us cudaMemcpy\n",
" 0.03% 87.576us 3 29.192us 5.0130us 72.747us cudaFree\n",
" 0.01% 27.161us 1 27.161us 27.161us 27.161us cudaLaunchKernel\n",
" 0.01% 21.881us 1 21.881us 21.881us 21.881us cuDeviceGetName\n",
" 0.00% 3.0560us 1 3.0560us 3.0560us 3.0560us cuDeviceGetPCIBusId\n",
" 0.00% 1.6590us 3 553ns 140ns 1.2640us cuDeviceGetCount\n",
" 0.00% 1.2970us 2 648ns 241ns 1.0560us cuDeviceGet\n",
" 0.00% 251ns 1 251ns 251ns 251ns cuDeviceGetUuid\n"
]
}
],
"source": [
"!nvcc add3.cu -o out\n",
"!nvprof ./out"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7Azzx2U3_DPG",
"outputId": "db632a58-afc9-4c5e-d53a-8639e6b9b287"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting add4.cu\n"
]
}
],
"source": [
"%%writefile add4.cu\n",
"#include <stdio.h>\n",
"\n",
"#define N 10000\n",
"\n",
"__global__ void vector_add(float *out, float *a, float *b, int n) {\n",
" int tid = blockIdx.x * blockDim.x + threadIdx.x;\n",
" out[tid] = a[tid] + b[tid];\n",
"\n",
"}\n",
"\n",
"int main(){\n",
" float *a, *b, *out;\n",
" float *d_a, *d_b, *d_out;\n",
"\n",
" // Allocate host memory\n",
" a = (float*)malloc(sizeof(float) * N);\n",
" b = (float*)malloc(sizeof(float) * N);\n",
" out = (float*)malloc(sizeof(float) * N);\n",
"\n",
" // Initialize host arrays\n",
" for(int i = 0; i < N; i++){\n",
" a[i] = i+1;\n",
" b[i] = 26;\n",
" }\n",
"\n",
" // Allocate device memory\n",
" cudaMalloc((void**)&d_a, sizeof(float) * N);\n",
" cudaMalloc((void**)&d_b, sizeof(float) * N);\n",
" cudaMalloc((void**)&d_out, sizeof(float) * N);\n",
"\n",
" // Transfer data from host to device memory\n",
" cudaMemcpy(d_a, a, sizeof(float) * N, cudaMemcpyHostToDevice);\n",
" cudaMemcpy(d_b, b, sizeof(float) * N, cudaMemcpyHostToDevice);\n",
"\n",
" // Executing kernel\n",
" int block_size = 32;\n",
" int grid_size = ((N + block_size) / block_size);\n",
" vector_add<<<grid_size,block_size>>>(d_out, d_a, d_b, N);\n",
"\n",
" // Transfer data back to host memory\n",
" cudaMemcpy(out, d_out, sizeof(float) * N, cudaMemcpyDeviceToHost);\n",
"\n",
" // Verification\n",
" //for(int i = 0; i < N; i++){\n",
" // printf(\"%f\\n\",out[i]);\n",
"// }\n",
"\n",
"\n",
"\n",
" // Deallocate device memory\n",
" cudaFree(d_a);\n",
" cudaFree(d_b);\n",
" cudaFree(d_out);\n",
"\n",
" // Deallocate host memory\n",
" free(a);\n",
" free(b);\n",
" free(out);\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6rUKHYj-_hle",
"outputId": "9615f4ee-05a7-4a1c-da7c-2431745fd9b9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==2018== NVPROF is profiling process 2018, command: ./out\n",
"==2018== Profiling application: ./out\n",
"==2018== Profiling result:\n",
" Type Time(%) Time Calls Avg Min Max Name\n",
" GPU activities: 54.56% 10.720us 2 5.3600us 5.2160us 5.5040us [CUDA memcpy HtoD]\n",
" 25.08% 4.9280us 1 4.9280us 4.9280us 4.9280us [CUDA memcpy DtoH]\n",
" 20.36% 4.0000us 1 4.0000us 4.0000us 4.0000us vector_add(float*, float*, float*, int)\n",
" API calls: 99.68% 203.91ms 3 67.971ms 5.1070us 203.90ms cudaMalloc\n",
" 0.11% 214.95us 1 214.95us 214.95us 214.95us cudaLaunchKernel\n",
" 0.07% 137.26us 3 45.753us 5.9300us 121.30us cudaFree\n",
" 0.07% 134.98us 114 1.1840us 144ns 53.102us cuDeviceGetAttribute\n",
" 0.06% 132.93us 3 44.310us 28.425us 62.880us cudaMemcpy\n",
" 0.01% 12.321us 1 12.321us 12.321us 12.321us cuDeviceGetName\n",
" 0.00% 5.5190us 1 5.5190us 5.5190us 5.5190us cuDeviceTotalMem\n",
" 0.00% 5.2280us 1 5.2280us 5.2280us 5.2280us cuDeviceGetPCIBusId\n",
" 0.00% 1.8590us 3 619ns 237ns 1.3680us cuDeviceGetCount\n",
" 0.00% 1.1470us 2 573ns 173ns 974ns cuDeviceGet\n",
" 0.00% 708ns 1 708ns 708ns 708ns cuModuleGetLoadingMode\n",
" 0.00% 228ns 1 228ns 228ns 228ns cuDeviceGetUuid\n"
]
}
],
"source": [
"!nvcc add4.cu -o out\n",
"!nvprof ./out"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fvbifMBvgpMX"
},
"outputs": [],
"source": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
{
"cells": [
{
"cell_type": "markdown",
"id": "7efd9f5e-2046-4727-b611-fefdd2f05238",
"metadata": {},
"source": [
"# Your First GPU Kernel"
]
},
{
"cell_type": "raw",
"id": "8d1a297e-6e49-4cbc-9142-442dc60f6598",
"metadata": {},
"source": [
"ref: https://carpentries-incubator.github.io/lesson-gpu-programming/first_program.html"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d8d4a6a3-aeb6-4cb4-a61d-8bb52f4849dc",
"metadata": {},
"outputs": [],
"source": [
"def vector_add(A, B, C, size):\n",
" for item in range(0, size):\n",
" C[item] = A[item] + B[item]\n",
" \n",
" return C"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "54efe984-e538-47bf-a3ad-5fc808f7b1c7",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:base] *",
"language": "python",
"name": "conda-base-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment