Skip to content

Commit 0b98d21

Browse files
author
bitoollearner
committed
LeetCode Pyspark Solution
1 parent f9ed4a5 commit 0b98d21

5 files changed

+642
-35
lines changed

Solved/1532. The Most Recent Three Orders (Medium)-(Solved).ipynb

Lines changed: 162 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "44cd2565-3e27-417c-8c6c-7b2d8f1b94c5",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "e6da106f-4e0b-41e0-9f75-6e329535d5c0",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "f0267197-68d5-4180-8492-6999c905acb1",
4655
"showTitle": false,
@@ -135,15 +144,27 @@
135144
"execution_count": 0,
136145
"metadata": {
137146
"application/vnd.databricks.v1+cell": {
138-
"cellMetadata": {},
147+
"cellMetadata": {
148+
"byteLimit": 2048000,
149+
"rowLimit": 10000
150+
},
139151
"inputWidgets": {},
140152
"nuid": "e98c62a4-8cd4-48d6-9337-86cf54c19ddd",
141153
"showTitle": false,
142154
"tableResultSettingsMap": {},
143155
"title": ""
144156
}
145157
},
146-
"outputs": [],
158+
"outputs": [
159+
{
160+
"output_type": "stream",
161+
"name": "stdout",
162+
"output_type": "stream",
163+
"text": [
164+
"+-----------+---------+\n|customer_id| name|\n+-----------+---------+\n| 1| Winston|\n| 2| Jonathan|\n| 3|Annabelle|\n| 4| Marwan|\n| 5| Khaled|\n+-----------+---------+\n\n+--------+----------+-----------+----+\n|order_id|order_date|customer_id|cost|\n+--------+----------+-----------+----+\n| 1|2020-07-31| 1| 30|\n| 2|2020-07-30| 2| 40|\n| 3|2020-07-31| 3| 70|\n| 4|2020-07-29| 4| 100|\n| 5|2020-06-10| 1|1010|\n| 6|2020-08-01| 2| 102|\n| 7|2020-08-01| 3| 111|\n| 8|2020-08-03| 1| 99|\n| 9|2020-08-07| 2| 32|\n| 10|2020-07-15| 1| 2|\n+--------+----------+-----------+----+\n\n"
165+
]
166+
}
167+
],
147168
"source": [
148169
"customers_data_1532 = [\n",
149170
" (1, \"Winston\"),\n",
@@ -173,15 +194,149 @@
173194
"orders_df_1532 = spark.createDataFrame(orders_data_1532, orders_columns_1532)\n",
174195
"orders_df_1532.show()"
175196
]
197+
},
198+
{
199+
"cell_type": "code",
200+
"execution_count": 0,
201+
"metadata": {
202+
"application/vnd.databricks.v1+cell": {
203+
"cellMetadata": {
204+
"byteLimit": 2048000,
205+
"rowLimit": 10000
206+
},
207+
"inputWidgets": {},
208+
"nuid": "6d88aa97-0574-4357-a6dd-60243fd246a9",
209+
"showTitle": false,
210+
"tableResultSettingsMap": {},
211+
"title": ""
212+
}
213+
},
214+
"outputs": [],
215+
"source": [
216+
"orders_df_1532 = orders_df_1532\\\n",
217+
" .withColumn(\"order_date\", to_date(\"order_date\"))"
218+
]
219+
},
220+
{
221+
"cell_type": "code",
222+
"execution_count": 0,
223+
"metadata": {
224+
"application/vnd.databricks.v1+cell": {
225+
"cellMetadata": {
226+
"byteLimit": 2048000,
227+
"rowLimit": 10000
228+
},
229+
"inputWidgets": {},
230+
"nuid": "e0086902-35f1-437b-954c-c8805537020e",
231+
"showTitle": false,
232+
"tableResultSettingsMap": {},
233+
"title": ""
234+
}
235+
},
236+
"outputs": [],
237+
"source": [
238+
"df_1532 = customers_df_1532\\\n",
239+
" .join(orders_df_1532, on=\"customer_id\", how=\"inner\")"
240+
]
241+
},
242+
{
243+
"cell_type": "code",
244+
"execution_count": 0,
245+
"metadata": {
246+
"application/vnd.databricks.v1+cell": {
247+
"cellMetadata": {
248+
"byteLimit": 2048000,
249+
"rowLimit": 10000
250+
},
251+
"inputWidgets": {},
252+
"nuid": "ab9e9b75-e19d-456c-8bf8-32308152dcbe",
253+
"showTitle": false,
254+
"tableResultSettingsMap": {},
255+
"title": ""
256+
}
257+
},
258+
"outputs": [],
259+
"source": [
260+
"window_spec = Window.partitionBy(\"customer_id\").orderBy(col(\"order_date\").desc())"
261+
]
262+
},
263+
{
264+
"cell_type": "code",
265+
"execution_count": 0,
266+
"metadata": {
267+
"application/vnd.databricks.v1+cell": {
268+
"cellMetadata": {
269+
"byteLimit": 2048000,
270+
"rowLimit": 10000
271+
},
272+
"inputWidgets": {},
273+
"nuid": "169bb6d6-a4a1-4a95-8ebd-bb52719e9243",
274+
"showTitle": false,
275+
"tableResultSettingsMap": {},
276+
"title": ""
277+
}
278+
},
279+
"outputs": [],
280+
"source": [
281+
"df_ranked_1532 = df_1532\\\n",
282+
" .withColumn(\"rank\", row_number().over(window_spec)) \\\n",
283+
" .filter(col(\"rank\") <= 3)"
284+
]
285+
},
286+
{
287+
"cell_type": "code",
288+
"execution_count": 0,
289+
"metadata": {
290+
"application/vnd.databricks.v1+cell": {
291+
"cellMetadata": {
292+
"byteLimit": 2048000,
293+
"rowLimit": 10000
294+
},
295+
"inputWidgets": {},
296+
"nuid": "241762cc-944b-414f-b2b1-83cea03eca71",
297+
"showTitle": false,
298+
"tableResultSettingsMap": {},
299+
"title": ""
300+
}
301+
},
302+
"outputs": [
303+
{
304+
"output_type": "stream",
305+
"name": "stdout",
306+
"output_type": "stream",
307+
"text": [
308+
"+-------------+-----------+--------+----------+\n|customer_name|customer_id|order_id|order_date|\n+-------------+-----------+--------+----------+\n| Annabelle| 3| 7|2020-08-01|\n| Annabelle| 3| 3|2020-07-31|\n| Jonathan| 2| 9|2020-08-07|\n| Jonathan| 2| 6|2020-08-01|\n| Jonathan| 2| 2|2020-07-30|\n| Marwan| 4| 4|2020-07-29|\n| Winston| 1| 8|2020-08-03|\n| Winston| 1| 1|2020-07-31|\n| Winston| 1| 10|2020-07-15|\n+-------------+-----------+--------+----------+\n\n"
309+
]
310+
}
311+
],
312+
"source": [
313+
"df_ranked_1532\\\n",
314+
" .select(\n",
315+
" col(\"name\").alias(\"customer_name\"),\n",
316+
" \"customer_id\",\n",
317+
" \"order_id\",\n",
318+
" \"order_date\"\n",
319+
" ).orderBy(\n",
320+
" col(\"customer_name\").asc(),\n",
321+
" col(\"customer_id\").asc(),\n",
322+
" col(\"order_date\").desc()\n",
323+
" ).show()"
324+
]
176325
}
177326
],
178327
"metadata": {
179328
"application/vnd.databricks.v1+notebook": {
180-
"computePreferences": null,
329+
"computePreferences": {
330+
"hardware": {
331+
"accelerator": null,
332+
"gpuPoolId": null,
333+
"memory": null
334+
}
335+
},
181336
"dashboards": [],
182337
"environmentMetadata": {
183338
"base_environment": "",
184-
"environment_version": "1"
339+
"environment_version": "2"
185340
},
186341
"inputWidgetPreferences": null,
187342
"language": "python",

Solved/1543. Fix Product Name Format (Easy)-(Solved).ipynb

Lines changed: 111 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "44cd2565-3e27-417c-8c6c-7b2d8f1b94c5",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "e6da106f-4e0b-41e0-9f75-6e329535d5c0",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "f0267197-68d5-4180-8492-6999c905acb1",
4655
"showTitle": false,
@@ -106,15 +115,27 @@
106115
"execution_count": 0,
107116
"metadata": {
108117
"application/vnd.databricks.v1+cell": {
109-
"cellMetadata": {},
118+
"cellMetadata": {
119+
"byteLimit": 2048000,
120+
"rowLimit": 10000
121+
},
110122
"inputWidgets": {},
111123
"nuid": "e98c62a4-8cd4-48d6-9337-86cf54c19ddd",
112124
"showTitle": false,
113125
"tableResultSettingsMap": {},
114126
"title": ""
115127
}
116128
},
117-
"outputs": [],
129+
"outputs": [
130+
{
131+
"output_type": "stream",
132+
"name": "stdout",
133+
"output_type": "stream",
134+
"text": [
135+
"+-------+------------+----------+\n|sale_id|product_name| sale_date|\n+-------+------------+----------+\n| 1| LCPHONE |2000-01-16|\n| 2| LCPhone|2000-01-17|\n| 3| LcPhOnE|2000-02-18|\n| 4| LCKeyCHAiN |2000-02-19|\n| 5| LCKeyChain|2000-02-28|\n| 6| Matryoshka|2000-03-31|\n+-------+------------+----------+\n\n"
136+
]
137+
}
138+
],
118139
"source": [
119140
"sales_data_1543 = [\n",
120141
" (1, \" LCPHONE \", \"2000-01-16\"),\n",
@@ -129,15 +150,98 @@
129150
"sales_df_1543 = spark.createDataFrame(sales_data_1543, sales_columns_1543)\n",
130151
"sales_df_1543.show()"
131152
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": 0,
157+
"metadata": {
158+
"application/vnd.databricks.v1+cell": {
159+
"cellMetadata": {
160+
"byteLimit": 2048000,
161+
"rowLimit": 10000
162+
},
163+
"inputWidgets": {},
164+
"nuid": "20a5c636-2bea-49d8-9293-676ce6ce0328",
165+
"showTitle": false,
166+
"tableResultSettingsMap": {},
167+
"title": ""
168+
}
169+
},
170+
"outputs": [],
171+
"source": [
172+
"sales_df_1543 = sales_df_1543\\\n",
173+
" .withColumn(\"sale_date\", to_date(\"sale_date\"))"
174+
]
175+
},
176+
{
177+
"cell_type": "code",
178+
"execution_count": 0,
179+
"metadata": {
180+
"application/vnd.databricks.v1+cell": {
181+
"cellMetadata": {
182+
"byteLimit": 2048000,
183+
"rowLimit": 10000
184+
},
185+
"inputWidgets": {},
186+
"nuid": "c30cac10-95ec-4675-8c28-34c06076d765",
187+
"showTitle": false,
188+
"tableResultSettingsMap": {},
189+
"title": ""
190+
}
191+
},
192+
"outputs": [],
193+
"source": [
194+
"cleaned_df_1543 = sales_df_1543\\\n",
195+
" .withColumn( \"product_name\", lower(trim(col(\"product_name\"))))\\\n",
196+
" .withColumn( \"sale_date\", date_format(\"sale_date\", \"yyyy-MM\"))"
197+
]
198+
},
199+
{
200+
"cell_type": "code",
201+
"execution_count": 0,
202+
"metadata": {
203+
"application/vnd.databricks.v1+cell": {
204+
"cellMetadata": {
205+
"byteLimit": 2048000,
206+
"rowLimit": 10000
207+
},
208+
"inputWidgets": {},
209+
"nuid": "615c899e-5e14-4252-ae05-145049c77a54",
210+
"showTitle": false,
211+
"tableResultSettingsMap": {},
212+
"title": ""
213+
}
214+
},
215+
"outputs": [
216+
{
217+
"output_type": "stream",
218+
"name": "stdout",
219+
"output_type": "stream",
220+
"text": [
221+
"+------------+---------+-----+\n|product_name|sale_date|total|\n+------------+---------+-----+\n| lckeychain| 2000-02| 2|\n| lcphone| 2000-01| 2|\n| lcphone| 2000-02| 1|\n| matryoshka| 2000-03| 1|\n+------------+---------+-----+\n\n"
222+
]
223+
}
224+
],
225+
"source": [
226+
"cleaned_df_1543\\\n",
227+
" .groupBy( \"product_name\", \"sale_date\").agg( count(\"*\").alias(\"total\"))\\\n",
228+
" .orderBy( col(\"product_name\").asc(), col(\"sale_date\").asc()).show()"
229+
]
132230
}
133231
],
134232
"metadata": {
135233
"application/vnd.databricks.v1+notebook": {
136-
"computePreferences": null,
234+
"computePreferences": {
235+
"hardware": {
236+
"accelerator": null,
237+
"gpuPoolId": null,
238+
"memory": null
239+
}
240+
},
137241
"dashboards": [],
138242
"environmentMetadata": {
139243
"base_environment": "",
140-
"environment_version": "1"
244+
"environment_version": "2"
141245
},
142246
"inputWidgetPreferences": null,
143247
"language": "python",

0 commit comments

Comments
 (0)