Skip to content

Commit f9ed4a5

Browse files
author
bitoollearner
committed
Leetcode Pyspark Solution
1 parent 49a6033 commit f9ed4a5

11 files changed

+1356
-72
lines changed

Solved/1454. Active Users (Medium)-(Solved).ipynb

Lines changed: 223 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
"cell_type": "markdown",
55
"metadata": {
66
"application/vnd.databricks.v1+cell": {
7-
"cellMetadata": {},
7+
"cellMetadata": {
8+
"byteLimit": 2048000,
9+
"rowLimit": 10000
10+
},
811
"inputWidgets": {},
912
"nuid": "24f8e8c0-d56b-4821-a900-bbe934dce4d7",
1013
"showTitle": false,
@@ -21,7 +24,10 @@
2124
"execution_count": 0,
2225
"metadata": {
2326
"application/vnd.databricks.v1+cell": {
24-
"cellMetadata": {},
27+
"cellMetadata": {
28+
"byteLimit": 2048000,
29+
"rowLimit": 10000
30+
},
2531
"inputWidgets": {},
2632
"nuid": "243b190c-af0b-4bd1-9507-8e2be2fceca3",
2733
"showTitle": false,
@@ -40,7 +46,10 @@
4046
"cell_type": "markdown",
4147
"metadata": {
4248
"application/vnd.databricks.v1+cell": {
43-
"cellMetadata": {},
49+
"cellMetadata": {
50+
"byteLimit": 2048000,
51+
"rowLimit": 10000
52+
},
4453
"inputWidgets": {},
4554
"nuid": "2d9dc49c-3f2c-4ff6-8e98-5e72a86b1bac",
4655
"showTitle": false,
@@ -119,15 +128,27 @@
119128
"execution_count": 0,
120129
"metadata": {
121130
"application/vnd.databricks.v1+cell": {
122-
"cellMetadata": {},
131+
"cellMetadata": {
132+
"byteLimit": 2048000,
133+
"rowLimit": 10000
134+
},
123135
"inputWidgets": {},
124136
"nuid": "5d66d20c-58e2-4ee9-8356-d736216372c3",
125137
"showTitle": false,
126138
"tableResultSettingsMap": {},
127139
"title": ""
128140
}
129141
},
130-
"outputs": [],
142+
"outputs": [
143+
{
144+
"output_type": "stream",
145+
"name": "stdout",
146+
"output_type": "stream",
147+
"text": [
148+
"+---+--------+\n| id| name|\n+---+--------+\n| 1| Winston|\n| 7|Jonathan|\n+---+--------+\n\n+---+----------+\n| id|login_date|\n+---+----------+\n| 7|2020-05-30|\n| 1|2020-05-30|\n| 7|2020-05-31|\n| 7|2020-06-01|\n| 7|2020-06-02|\n| 7|2020-06-02|\n| 7|2020-06-03|\n| 1|2020-06-07|\n| 7|2020-06-10|\n+---+----------+\n\n"
149+
]
150+
}
151+
],
131152
"source": [
132153
"accounts_data_1454 = [\n",
133154
" (1, \"Winston\"),\n",
@@ -154,15 +175,210 @@
154175
"logins_df_1454 = spark.createDataFrame(logins_data_1454, logins_columns_1454)\n",
155176
"logins_df_1454.show()"
156177
]
178+
},
179+
{
180+
"cell_type": "code",
181+
"execution_count": 0,
182+
"metadata": {
183+
"application/vnd.databricks.v1+cell": {
184+
"cellMetadata": {
185+
"byteLimit": 2048000,
186+
"rowLimit": 10000
187+
},
188+
"inputWidgets": {},
189+
"nuid": "61e43104-e7f2-478a-bcb0-d03828987e08",
190+
"showTitle": false,
191+
"tableResultSettingsMap": {},
192+
"title": ""
193+
}
194+
},
195+
"outputs": [],
196+
"source": [
197+
"logins_df_1454 = logins_df_1454\\\n",
198+
" .withColumn(\"login_date\", to_date(\"login_date\"))"
199+
]
200+
},
201+
{
202+
"cell_type": "code",
203+
"execution_count": 0,
204+
"metadata": {
205+
"application/vnd.databricks.v1+cell": {
206+
"cellMetadata": {
207+
"byteLimit": 2048000,
208+
"rowLimit": 10000
209+
},
210+
"inputWidgets": {},
211+
"nuid": "9d34d2c0-23ef-4484-ae33-7c78db84d4be",
212+
"showTitle": false,
213+
"tableResultSettingsMap": {},
214+
"title": ""
215+
}
216+
},
217+
"outputs": [],
218+
"source": [
219+
"n = 5"
220+
]
221+
},
222+
{
223+
"cell_type": "code",
224+
"execution_count": 0,
225+
"metadata": {
226+
"application/vnd.databricks.v1+cell": {
227+
"cellMetadata": {
228+
"byteLimit": 2048000,
229+
"rowLimit": 10000
230+
},
231+
"inputWidgets": {},
232+
"nuid": "069478cc-d700-46d5-a3f9-b98df6b595b5",
233+
"showTitle": false,
234+
"tableResultSettingsMap": {},
235+
"title": ""
236+
}
237+
},
238+
"outputs": [],
239+
"source": [
240+
"logins_df_1454 = logins_df_1454.dropDuplicates([\"id\", \"login_date\"])"
241+
]
242+
},
243+
{
244+
"cell_type": "code",
245+
"execution_count": 0,
246+
"metadata": {
247+
"application/vnd.databricks.v1+cell": {
248+
"cellMetadata": {
249+
"byteLimit": 2048000,
250+
"rowLimit": 10000
251+
},
252+
"inputWidgets": {},
253+
"nuid": "2880faa5-14e9-486a-9243-9e8998a604c2",
254+
"showTitle": false,
255+
"tableResultSettingsMap": {},
256+
"title": ""
257+
}
258+
},
259+
"outputs": [],
260+
"source": [
261+
"windowSpec = Window.partitionBy(\"id\").orderBy(\"login_date\")"
262+
]
263+
},
264+
{
265+
"cell_type": "code",
266+
"execution_count": 0,
267+
"metadata": {
268+
"application/vnd.databricks.v1+cell": {
269+
"cellMetadata": {
270+
"byteLimit": 2048000,
271+
"rowLimit": 10000
272+
},
273+
"inputWidgets": {},
274+
"nuid": "245f6ff2-793d-46c8-8324-c5c339f44a74",
275+
"showTitle": false,
276+
"tableResultSettingsMap": {},
277+
"title": ""
278+
}
279+
},
280+
"outputs": [],
281+
"source": [
282+
"logins_df_1454 = logins_df_1454\\\n",
283+
" .withColumn(\"grp\", \n",
284+
" datediff(\"login_date\", lag(\"login_date\", 1).over(windowSpec))\n",
285+
" )\\\n",
286+
" .withColumn(\"grp\", when(col(\"grp\").isNull() | (col(\"grp\") != 1), 0).otherwise(1)\n",
287+
" )\\\n",
288+
" .withColumn(\"streak_grp\", sum(when(col(\"grp\") == 0, 1).otherwise(0)).over(windowSpec))\n"
289+
]
290+
},
291+
{
292+
"cell_type": "code",
293+
"execution_count": 0,
294+
"metadata": {
295+
"application/vnd.databricks.v1+cell": {
296+
"cellMetadata": {
297+
"byteLimit": 2048000,
298+
"rowLimit": 10000
299+
},
300+
"inputWidgets": {},
301+
"nuid": "953ecab5-5c28-4956-a835-e2b6c7a033ab",
302+
"showTitle": false,
303+
"tableResultSettingsMap": {},
304+
"title": ""
305+
}
306+
},
307+
"outputs": [],
308+
"source": [
309+
"streak_counts_1454 = logins_df_1454\\\n",
310+
" .groupBy(\"id\", \"streak_grp\").agg(count(\"*\").alias(\"streak_days\")\n",
311+
" )"
312+
]
313+
},
314+
{
315+
"cell_type": "code",
316+
"execution_count": 0,
317+
"metadata": {
318+
"application/vnd.databricks.v1+cell": {
319+
"cellMetadata": {
320+
"byteLimit": 2048000,
321+
"rowLimit": 10000
322+
},
323+
"inputWidgets": {},
324+
"nuid": "03857c59-e667-4aa5-956e-768fcb3de627",
325+
"showTitle": false,
326+
"tableResultSettingsMap": {},
327+
"title": ""
328+
}
329+
},
330+
"outputs": [],
331+
"source": [
332+
"active_users_df_1454 = streak_counts_1454\\\n",
333+
" .filter(col(\"streak_days\") >= n)\\\n",
334+
" .select(\"id\").distinct()"
335+
]
336+
},
337+
{
338+
"cell_type": "code",
339+
"execution_count": 0,
340+
"metadata": {
341+
"application/vnd.databricks.v1+cell": {
342+
"cellMetadata": {
343+
"byteLimit": 2048000,
344+
"rowLimit": 10000
345+
},
346+
"inputWidgets": {},
347+
"nuid": "39f7fb38-ef7d-49fc-878d-ecfdd1e96972",
348+
"showTitle": false,
349+
"tableResultSettingsMap": {},
350+
"title": ""
351+
}
352+
},
353+
"outputs": [
354+
{
355+
"output_type": "stream",
356+
"name": "stdout",
357+
"output_type": "stream",
358+
"text": [
359+
"+---+--------+\n| id| name|\n+---+--------+\n| 7|Jonathan|\n+---+--------+\n\n"
360+
]
361+
}
362+
],
363+
"source": [
364+
"active_users_df_1454\\\n",
365+
" .join(accounts_df_1454, \"id\").orderBy(\"id\").show()"
366+
]
157367
}
158368
],
159369
"metadata": {
160370
"application/vnd.databricks.v1+notebook": {
161-
"computePreferences": null,
371+
"computePreferences": {
372+
"hardware": {
373+
"accelerator": null,
374+
"gpuPoolId": null,
375+
"memory": null
376+
}
377+
},
162378
"dashboards": [],
163379
"environmentMetadata": {
164380
"base_environment": "",
165-
"environment_version": "1"
381+
"environment_version": "2"
166382
},
167383
"inputWidgetPreferences": null,
168384
"language": "python",

0 commit comments

Comments
 (0)