import pandas as pd
movies = pd.read_csv('movies.csv')
movies.head()


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="white")

sns.relplot(x="year", y="budget_2013$", hue="binary", size="domgross_2013$",
            sizes=(10, 600), alpha=.5, palette="muted",
            height=6, data=movies, legend='brief')

plt.gcf().set_size_inches(15, 8)

plt.xlabel('')
plt.ylabel('Budget in 2013 $Billion', fontsize=14)
plt.title('Movies, Bechdel Test result, Budget, Domestic Revenue', fontsize=18)

Text(0.5, 1.0, 'Movies, Bechdel Test result, Budget, Domestic Revenue')


count_per_year = movies.pivot_table('test', index = 'year', columns = 'binary', aggfunc = 'count')
count_per_year['Total'] = count_per_year['FAIL'] + count_per_year['PASS']
count_per_year.head()


import numpy as np
import matplotlib.pyplot as plt

N = len(count_per_year)
ind = np.arange(N)

plt.figure(figsize=(16,8))
plt.grid(visible=False, which='both', axis='x')
plt.grid(visible=True, which='both', axis='y', color='lightgrey', linestyle='-', linewidth=0.5)

p1 = plt.bar(ind, count_per_year['Total'], color = 'mediumaquamarine')
p2 = plt.bar(ind, count_per_year['FAIL'], color = 'grey')

plt.ylabel('Count of movies', fontsize=14)
plt.title('Movies that passed and failed Bechdel Test by year', fontsize=20)
plt.xticks(ind, count_per_year.index.values, rotation=45, fontsize=12)
plt.yticks(np.arange(0, 140, 10), fontsize=13)
plt.legend((p1[0], p2[0]), ('Passed', 'Failed'), fontsize=16, frameon=True, 
           facecolor='white', edgecolor="white", borderpad=1, ncol=2)

plt.figtext(0.18, 0.69, 'Requirements to Pass:', fontsize=18, fontweight='bold', color='grey', backgroundcolor='white')
plt.figtext(0.18, 0.58, ' 1. At least 2 named female characters\n 2. They talk to eachother\n 3. About something other than men',
            fontsize=17, color='grey', linespacing=2, wrap=True, backgroundcolor='white')

plt.show()


budget = movies.pivot_table('budget_2013$', index = 'year', columns = 'binary', aggfunc='sum')
budget.head()


import numpy as np
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(16, 8))

plt.grid(visible=False, which='both', axis='x')
plt.grid(visible=True, which='major', axis='y', color='lightgrey', linestyle='-', linewidth=1)
plt.grid(visible=True, which='minor', axis='y', color='lightgrey', linestyle='-', linewidth=0.5)

line1, = ax.plot(budget.index.values, budget['FAIL'], label='Failed', color = 'grey', linewidth=4)
line2, = ax.plot(budget.index.values, budget['PASS'], label='Passed', color= 'mediumaquamarine', linewidth=2)

def currency(x, pos):
    
    if x >= 1e6:
        s = '${:1.1f}B'.format(x*1e-9)
    else:
        s = '${:1.0f}K'.format(x*1e-3)
    return s

ax.yaxis.set_major_formatter(currency)
plt.title('Budgets of Movies that passed and failed Bechdel Test by year', fontsize=20)
plt.ylabel('Budget of movies (in 2013 dollars)', fontsize=14)
plt.xticks(budget.index.values, rotation=45, fontsize=12)
plt.yticks(fontsize=13)

plt.figtext(0.15, 0.65, 'Takeaway: the movie industry continues to spend \nfar more on movies that fail the Bechdel Test', 
            fontsize=18, color='grey', backgroundcolor='white', linespacing=2)

ax.legend(fontsize=16, frameon=True, loc='upper center',
           facecolor='white', edgecolor='white', borderpad=1, ncol=2)
plt.show()

	year	imdb	title	test	clean_test	binary	budget	domgross	intgross	code	budget_2013$	domgross_2013$	intgross_2013$	period code	decade code
0	1970	tt0065466	Beyond the Valley of the Dolls	ok	ok	PASS	1000000	9000000.0	9000000.0	1970PASS	5997631	53978683.0	53978683.0	NaN	NaN
1	1971	tt0067065	Escape from the Planet of the Apes	notalk	notalk	FAIL	2500000	12300000.0	12300000.0	1971FAIL	14386286	70780525.0	70780525.0	NaN	NaN
2	1971	tt0067741	Shaft	notalk	notalk	FAIL	53012938	70327868.0	107190108.0	1971FAIL	305063707	404702718.0	616827003.0	NaN	NaN
3	1971	tt0067800	Straw Dogs	notalk	notalk	FAIL	25000000	10324441.0	11253821.0	1971FAIL	143862856	59412143.0	64760273.0	NaN	NaN
4	1971	tt0067116	The French Connection	notalk	notalk	FAIL	2200000	41158757.0	41158757.0	1971FAIL	12659931	236848653.0	236848653.0	NaN	NaN

binary	FAIL	PASS	Total
year
1970	NaN	1.0	NaN
1971	5.0	NaN	NaN
1972	2.0	1.0	3.0
1973	4.0	1.0	5.0
1974	5.0	2.0	7.0

binary	FAIL	PASS
year
1970	NaN	5997631.0
1971	493236323.0	NaN
1972	61293532.0	66866.0
1973	125732851.0	62926730.0
1974	34388727.0	103921974.0

Women In Movies: Bechdel Test Results Over Time¶

Jennifer Smith¶

June 7, 2022¶