Skip to content

Commit 9c15dde

Browse files
author
bitoollearner
committed
LeetCode Pyspark Solution
1 parent 4f75a0c commit 9c15dde

8 files changed

+775
-56
lines changed

Solved/1651. Hopper Company Queries III (Hard)-(Solved).ipynb

Lines changed: 205 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "f6ac9266-658c-4507-8a37-ecc81334b06d",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "d6194993-9312-4b46-8365-d6a73d8cb0b0",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "23f6a680-8f01-4976-83fa-88103e49c6b8",
4655
"showTitle": false,
@@ -176,15 +185,27 @@
176185
"execution_count": 0,
177186
"metadata": {
178187
"application/vnd.databricks.v1+cell": {
179-
"cellMetadata": {},
188+
"cellMetadata": {
189+
"byteLimit": 2048000,
190+
"rowLimit": 10000
191+
},
180192
"inputWidgets": {},
181193
"nuid": "a762f84d-f622-4403-b695-4066e5bdbaf0",
182194
"showTitle": false,
183195
"tableResultSettingsMap": {},
184196
"title": ""
185197
}
186198
},
187-
"outputs": [],
199+
"outputs": [
200+
{
201+
"output_type": "stream",
202+
"name": "stdout",
203+
"output_type": "stream",
204+
"text": [
205+
"+-------+---------+-------------+-------------+\n|ride_id|driver_id|ride_distance|ride_duration|\n+-------+---------+-------------+-------------+\n| 10| 10| 63| 38|\n| 13| 10| 73| 96|\n| 7| 8| 100| 28|\n| 17| 7| 119| 68|\n| 20| 1| 121| 92|\n| 5| 7| 42| 101|\n| 2| 4| 6| 38|\n| 11| 8| 37| 43|\n| 15| 8| 108| 82|\n| 12| 8| 38| 34|\n| 14| 1| 90| 74|\n+-------+---------+-------------+-------------+\n\n+-------+-------+------------+\n|ride_id|user_id|requested_at|\n+-------+-------+------------+\n| 10| 63| 2020-03-04|\n| 13| 52| 2020-06-22|\n| 7| 69| 2020-07-16|\n| 17| 70| 2020-08-25|\n| 20| 81| 2020-11-02|\n| 5| 57| 2020-11-09|\n| 2| 42| 2020-12-09|\n| 11| 68| 2021-01-11|\n| 15| 32| 2021-01-17|\n| 12| 11| 2021-01-19|\n| 14| 18| 2021-01-27|\n+-------+-------+------------+\n\n"
206+
]
207+
}
208+
],
188209
"source": [
189210
"accepted_rides_data_1651 = [\n",
190211
" (10, 10, 63, 38), (13, 10, 73, 96), (7, 8, 100, 28),\n",
@@ -208,15 +229,192 @@
208229
"rides_df_1651 = spark.createDataFrame(data_rides_1651, columns_rides_1651)\n",
209230
"rides_df_1651.show()"
210231
]
232+
},
233+
{
234+
"cell_type": "code",
235+
"execution_count": 0,
236+
"metadata": {
237+
"application/vnd.databricks.v1+cell": {
238+
"cellMetadata": {
239+
"byteLimit": 2048000,
240+
"rowLimit": 10000
241+
},
242+
"inputWidgets": {},
243+
"nuid": "ca7fc839-d4ba-4e80-8d94-08762bdb684b",
244+
"showTitle": false,
245+
"tableResultSettingsMap": {},
246+
"title": ""
247+
}
248+
},
249+
"outputs": [],
250+
"source": [
251+
"rides_2020_df_1651 = rides_df_1651\\\n",
252+
" .withColumn( \"requested_at\",\n",
253+
" coalesce(\n",
254+
" to_date(\"requested_at\", \"yyyy-MM-dd\"),\n",
255+
" to_date(\"requested_at\", \"yyyy-M-d\")\n",
256+
" )\n",
257+
" )\\\n",
258+
" .filter(year(\"requested_at\") == 2020)\\\n",
259+
" .withColumn(\"month\", month(\"requested_at\"))\\\n",
260+
" .select(\"ride_id\", \"month\")"
261+
]
262+
},
263+
{
264+
"cell_type": "code",
265+
"execution_count": 0,
266+
"metadata": {
267+
"application/vnd.databricks.v1+cell": {
268+
"cellMetadata": {
269+
"byteLimit": 2048000,
270+
"rowLimit": 10000
271+
},
272+
"inputWidgets": {},
273+
"nuid": "a214a11d-f742-49dd-8c82-ce7ef7e307d8",
274+
"showTitle": false,
275+
"tableResultSettingsMap": {},
276+
"title": ""
277+
}
278+
},
279+
"outputs": [],
280+
"source": [
281+
"accepted_2020_df_1651 = accepted_rides_df_1651\\\n",
282+
" .join(rides_2020_df_1651, on=\"ride_id\", how=\"inner\")"
283+
]
284+
},
285+
{
286+
"cell_type": "code",
287+
"execution_count": 0,
288+
"metadata": {
289+
"application/vnd.databricks.v1+cell": {
290+
"cellMetadata": {
291+
"byteLimit": 2048000,
292+
"rowLimit": 10000
293+
},
294+
"inputWidgets": {},
295+
"nuid": "263f63ba-91b7-4be2-b472-ede18b99f80f",
296+
"showTitle": false,
297+
"tableResultSettingsMap": {},
298+
"title": ""
299+
}
300+
},
301+
"outputs": [],
302+
"source": [
303+
"monthly_totals_df_1651 = accepted_2020_df_1651\\\n",
304+
" .groupBy(\"month\")\\\n",
305+
" .agg(\n",
306+
" sum(\"ride_distance\").alias(\"total_distance\"),\n",
307+
" sum(\"ride_duration\").alias(\"total_duration\")\n",
308+
" )"
309+
]
310+
},
311+
{
312+
"cell_type": "code",
313+
"execution_count": 0,
314+
"metadata": {
315+
"application/vnd.databricks.v1+cell": {
316+
"cellMetadata": {
317+
"byteLimit": 2048000,
318+
"rowLimit": 10000
319+
},
320+
"inputWidgets": {},
321+
"nuid": "af94514f-f262-42ca-a280-be2a8ad3fd02",
322+
"showTitle": false,
323+
"tableResultSettingsMap": {},
324+
"title": ""
325+
}
326+
},
327+
"outputs": [],
328+
"source": [
329+
"months_df_1651 = spark.createDataFrame([(m,) for m in range(1, 13)], [\"month\"])\n",
330+
"\n",
331+
"monthly_full_df_1651 = months_df_1651\\\n",
332+
" .join(monthly_totals_df_1651, on=\"month\", how=\"left\")\\\n",
333+
" .na.fill({\"total_distance\": 0, \"total_duration\": 0}).orderBy(\"month\")\n"
334+
]
335+
},
336+
{
337+
"cell_type": "code",
338+
"execution_count": 0,
339+
"metadata": {
340+
"application/vnd.databricks.v1+cell": {
341+
"cellMetadata": {
342+
"byteLimit": 2048000,
343+
"rowLimit": 10000
344+
},
345+
"inputWidgets": {},
346+
"nuid": "cb979d51-7086-4db3-96bf-6ff5a624a61b",
347+
"showTitle": false,
348+
"tableResultSettingsMap": {},
349+
"title": ""
350+
}
351+
},
352+
"outputs": [],
353+
"source": [
354+
"windowSpec = Window.orderBy(\"month\").rowsBetween(0, 2)"
355+
]
356+
},
357+
{
358+
"cell_type": "code",
359+
"execution_count": 0,
360+
"metadata": {
361+
"application/vnd.databricks.v1+cell": {
362+
"cellMetadata": {
363+
"byteLimit": 2048000,
364+
"rowLimit": 10000
365+
},
366+
"inputWidgets": {},
367+
"nuid": "66ec2974-5f8b-4fa3-9260-53e68c288bdc",
368+
"showTitle": false,
369+
"tableResultSettingsMap": {},
370+
"title": ""
371+
}
372+
},
373+
"outputs": [
374+
{
375+
"output_type": "stream",
376+
"name": "stderr",
377+
"output_type": "stream",
378+
"text": [
379+
"/databricks/python/lib/python3.11/site-packages/pyspark/sql/connect/expressions.py:1017: UserWarning: WARN WindowExpression: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.\n warnings.warn(\n"
380+
]
381+
},
382+
{
383+
"output_type": "stream",
384+
"name": "stdout",
385+
"output_type": "stream",
386+
"text": [
387+
"+-----+---------------------+---------------------+\n|month|average_ride_distance|average_ride_duration|\n+-----+---------------------+---------------------+\n| 1| 21.0| 12.67|\n| 2| 21.0| 12.67|\n| 3| 21.0| 12.67|\n| 4| 24.33| 32.0|\n| 5| 57.67| 41.33|\n| 6| 97.33| 64.0|\n| 7| 73.0| 32.0|\n| 8| 39.67| 22.67|\n| 9| 54.33| 64.33|\n| 10| 56.33| 77.0|\n+-----+---------------------+---------------------+\n\n"
388+
]
389+
}
390+
],
391+
"source": [
392+
"monthly_full_df_1651\\\n",
393+
" .withColumn(\"sum_dist_3m\", sum(\"total_distance\").over(windowSpec))\\\n",
394+
" .withColumn(\"sum_dur_3m\", sum(\"total_duration\").over(windowSpec))\\\n",
395+
" .filter(F.col(\"month\") <= 10)\\\n",
396+
" .select(\n",
397+
" col(\"month\"),\n",
398+
" round(col(\"sum_dist_3m\") / lit(3), 2).alias(\"average_ride_distance\"),\n",
399+
" round(col(\"sum_dur_3m\") / lit(3), 2).alias(\"average_ride_duration\")\n",
400+
" )\\\n",
401+
" .orderBy(\"month\").show()"
402+
]
211403
}
212404
],
213405
"metadata": {
214406
"application/vnd.databricks.v1+notebook": {
215-
"computePreferences": null,
407+
"computePreferences": {
408+
"hardware": {
409+
"accelerator": null,
410+
"gpuPoolId": null,
411+
"memory": null
412+
}
413+
},
216414
"dashboards": [],
217415
"environmentMetadata": {
218416
"base_environment": "",
219-
"environment_version": "1"
417+
"environment_version": "2"
220418
},
221419
"inputWidgetPreferences": null,
222420
"language": "python",

0 commit comments

Comments
 (0)