diff --git a/scripts/chapter_1_introduction/tutorial_3_fitting.py b/scripts/chapter_1_introduction/tutorial_3_fitting.py index 058acdc..2883940 100644 --- a/scripts/chapter_1_introduction/tutorial_3_fitting.py +++ b/scripts/chapter_1_introduction/tutorial_3_fitting.py @@ -60,7 +60,7 @@ import sys subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], + [sys.executable, "scripts/simulators/simple.py"], check=True, ) diff --git a/scripts/chapter_2_modeling/tutorial_3_realism_and_complexity.py b/scripts/chapter_2_modeling/tutorial_3_realism_and_complexity.py index 9f25102..98dc18f 100644 --- a/scripts/chapter_2_modeling/tutorial_3_realism_and_complexity.py +++ b/scripts/chapter_2_modeling/tutorial_3_realism_and_complexity.py @@ -55,7 +55,7 @@ import sys subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], + [sys.executable, "scripts/simulators/simple.py"], check=True, ) diff --git a/scripts/chapter_2_modeling/tutorial_4_dealing_with_failure.py b/scripts/chapter_2_modeling/tutorial_4_dealing_with_failure.py index 2a09dec..81212d1 100644 --- a/scripts/chapter_2_modeling/tutorial_4_dealing_with_failure.py +++ b/scripts/chapter_2_modeling/tutorial_4_dealing_with_failure.py @@ -56,7 +56,7 @@ import sys subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], + [sys.executable, "scripts/simulators/simple.py"], check=True, ) diff --git a/scripts/chapter_2_modeling/tutorial_5_linear_profiles.py b/scripts/chapter_2_modeling/tutorial_5_linear_profiles.py index b790b09..2f03466 100644 --- a/scripts/chapter_2_modeling/tutorial_5_linear_profiles.py +++ b/scripts/chapter_2_modeling/tutorial_5_linear_profiles.py @@ -70,7 +70,7 @@ import sys subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], + [sys.executable, "scripts/simulators/simple.py"], check=True, ) diff --git a/scripts/chapter_3_search_chaining/tutorial_1_search_chaining.py b/scripts/chapter_3_search_chaining/tutorial_1_search_chaining.py index e17b05c..52f7a05 100644 --- a/scripts/chapter_3_search_chaining/tutorial_1_search_chaining.py +++ b/scripts/chapter_3_search_chaining/tutorial_1_search_chaining.py @@ -76,7 +76,7 @@ import sys subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], + [sys.executable, "scripts/simulators/simple.py"], check=True, ) diff --git a/scripts/chapter_3_search_chaining/tutorial_2_prior_passing.py b/scripts/chapter_3_search_chaining/tutorial_2_prior_passing.py index 49112b0..34ff1e7 100644 --- a/scripts/chapter_3_search_chaining/tutorial_2_prior_passing.py +++ b/scripts/chapter_3_search_chaining/tutorial_2_prior_passing.py @@ -55,7 +55,7 @@ import sys subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], + [sys.executable, "scripts/simulators/simple.py"], check=True, ) diff --git a/scripts/chapter_3_search_chaining/tutorial_3_x2_galaxies.py b/scripts/chapter_3_search_chaining/tutorial_3_x2_galaxies.py index eaaf748..b52b39b 100644 --- a/scripts/chapter_3_search_chaining/tutorial_3_x2_galaxies.py +++ b/scripts/chapter_3_search_chaining/tutorial_3_x2_galaxies.py @@ -58,7 +58,7 @@ import sys subprocess.run( - [sys.executable, "scripts/guides/plot/simulator.py"], + [sys.executable, "scripts/simulators/sersic_x2.py"], check=True, ) diff --git a/scripts/chapter_4_pixelizations/tutorial_2_mappers.py b/scripts/chapter_4_pixelizations/tutorial_2_mappers.py index 2f24007..c9b6d00 100644 --- a/scripts/chapter_4_pixelizations/tutorial_2_mappers.py +++ b/scripts/chapter_4_pixelizations/tutorial_2_mappers.py @@ -1,166 +1,166 @@ -""" -Tutorial 2: Mappers -=================== - -In the previous tutorial, we used a pixelization to create made a `Mapper`. However, it was not clear what a `Mapper` -does, why it was called a mapper and whether it was mapping anything at all! - -Therefore, in this tutorial, we'll cover mappers in more detail. - -__Contents__ - -- **Initial Setup:** Load the dataset for illustration. -- **Mappers:** Understand how mappers map image-plane pixels to pixelization pixels. -- **Mask:** Apply a mask and see how it affects the mapper. -- **Wrap Up:** Summary of mapper concepts. -""" - +""" +Tutorial 2: Mappers +=================== + +In the previous tutorial, we used a pixelization to create made a `Mapper`. However, it was not clear what a `Mapper` +does, why it was called a mapper and whether it was mapping anything at all! + +Therefore, in this tutorial, we'll cover mappers in more detail. + +__Contents__ + +- **Initial Setup:** Load the dataset for illustration. +- **Mappers:** Understand how mappers map image-plane pixels to pixelization pixels. +- **Mask:** Apply a mask and see how it affects the mapper. +- **Wrap Up:** Summary of mapper concepts. +""" + # from autoconf import setup_notebook; setup_notebook() - -from pathlib import Path -import autogalaxy as ag -import autogalaxy.plot as aplt -import autoarray.plot as aaplt - -""" -__Initial Setup__ - -we'll use complex galaxy data, where: - - - The galaxy's bulge is an `Sersic`. - - The galaxy's disk is an `Exponential`. - - The galaxy's has four star forming clumps which are `Sersic` profiles. -""" -dataset_name = "simple" -dataset_path = Path("dataset") / "imaging" / dataset_name - -""" -__Dataset Auto-Simulation__ - -If the dataset does not already exist on your system, it will be created by running the corresponding -simulator script. This ensures that all example scripts can be run without manually simulating data first. -""" -if not dataset_path.exists(): - import subprocess - import sys - - subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], - check=True, - ) - -dataset = ag.Imaging.from_fits( - data_path=dataset_path / "data.fits", - noise_map_path=dataset_path / "noise_map.fits", - psf_path=dataset_path / "psf.fits", - pixel_scales=0.1, -) - -""" -Now, lets set up our `Grid2D` (using the image above). -""" -grid = ag.Grid2D.uniform( - shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales -) - -""" -__Mappers__ - -We now setup a `Pixelization` and use it to create a `Mapper` via the plane`s source-plane grid, just like we did in -the previous tutorial. - -We will make its pixelization resolution half that of the grid above. -""" -mesh = ag.mesh.RectangularAdaptDensity( - shape=(dataset.shape_native[0] / 2, dataset.shape_native[1] / 2) -) - -pixelization = ag.Pixelization(mesh=mesh) - -interpolator = mesh.interpolator_from( - source_plane_data_grid=grid, source_plane_mesh_grid=None -) - -mapper = ag.Mapper(interpolator=interpolator) - -""" -We now plot the `Mapper` alongside the image we used to generate the source-plane grid. - -Using the `Visuals2D` object we are also going to highlight specific grid coordinates certain colors, such that we -can see how they map from the image grid to the pixelization grid. -""" -indexes = [range(250), [150, 250, 350, 450, 550, 650, 750, 850, 950, 1050]] - -aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) - - -""" -Using a mapper, we can now make these mappings appear the other way round. That is, we can input a pixelization pixel -index (of our rectangular grid) and highlight how all of the image-pixels that it contains map to the image-plane. - -Lets map source pixel 313, the central source-pixel, to the image. We observe that for a given rectangular pixelization -pixel, there are four image pixels. -""" -pix_indexes = [[312]] - -indexes = mapper.slim_indexes_for_pix_indexes(pix_indexes=pix_indexes) - -aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) - -""" -Okay, so I think we can agree, mapper's map things! More specifically, they map pixelization pixels to multiple pixels -in the observed image of a galaxy. - -__Mask__ - -Finally, lets repeat the steps that we performed above, but now using a masked image. By applying a `Mask2D`, the -mapper only maps image-pixels that are not removed by the mask. This removes the (many) image pixels at the edge of the -image, where the galaxy is not present. - -Lets just have a quick look at these edges pixels: - -Lets use an circular `Mask2D`, which will capture the central galaxy light and clumps. -""" -mask = ag.Mask2D.circular( - shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales, radius=2.0 -) - -dataset = dataset.apply_mask(mask=mask) -aplt.plot_array(array=dataset.data, title="Data") - -""" -We can now use the masked grid to create a new `Mapper` (using the same rectangular 25 x 25 pixelization -as before). -""" -interpolator = mesh.interpolator_from( - source_plane_data_grid=dataset.grids.pixelization, source_plane_mesh_grid=None -) -mapper = ag.Mapper(interpolator=interpolator) - -""" -Lets plot it. -""" -aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) - -""" -First, We can see a red circle of dots in both the image and pixelization, showing where the edge of the mask -maps too in the pixelization. - -Now lets show that when we plot pixelization pixel indexes, they still appear in the same place in the image. -""" -pix_indexes = [[312], [314], [316], [318]] - -indexes = mapper.slim_indexes_for_pix_indexes(pix_indexes=pix_indexes) - -aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) - -""" -__Wrap Up__ - -In this tutorial, we learnt about mappers, and we used them to understand how the image and pixelization map to one -another. Your exercises are: - - 1) Think about how this could help us actually model galaxies. We have said we're going to reconstruct our galaxies - on the pixel-grid. So, how does knowing how each pixel maps to the image actually help us? If you`ve not got - any bright ideas, then worry not, that exactly what we're going to cover in the next tutorial. -""" + +from pathlib import Path +import autogalaxy as ag +import autogalaxy.plot as aplt +import autoarray.plot as aaplt + +""" +__Initial Setup__ + +we'll use complex galaxy data, where: + + - The galaxy's bulge is an `Sersic`. + - The galaxy's disk is an `Exponential`. + - The galaxy's has four star forming clumps which are `Sersic` profiles. +""" +dataset_name = "simple" +dataset_path = Path("dataset") / "imaging" / dataset_name + +""" +__Dataset Auto-Simulation__ + +If the dataset does not already exist on your system, it will be created by running the corresponding +simulator script. This ensures that all example scripts can be run without manually simulating data first. +""" +if not dataset_path.exists(): + import subprocess + import sys + + subprocess.run( + [sys.executable, "scripts/simulators/simple.py"], + check=True, + ) + +dataset = ag.Imaging.from_fits( + data_path=dataset_path / "data.fits", + noise_map_path=dataset_path / "noise_map.fits", + psf_path=dataset_path / "psf.fits", + pixel_scales=0.1, +) + +""" +Now, lets set up our `Grid2D` (using the image above). +""" +grid = ag.Grid2D.uniform( + shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales +) + +""" +__Mappers__ + +We now setup a `Pixelization` and use it to create a `Mapper` via the plane`s source-plane grid, just like we did in +the previous tutorial. + +We will make its pixelization resolution half that of the grid above. +""" +mesh = ag.mesh.RectangularAdaptDensity( + shape=(dataset.shape_native[0] / 2, dataset.shape_native[1] / 2) +) + +pixelization = ag.Pixelization(mesh=mesh) + +interpolator = mesh.interpolator_from( + source_plane_data_grid=grid, source_plane_mesh_grid=None +) + +mapper = ag.Mapper(interpolator=interpolator) + +""" +We now plot the `Mapper` alongside the image we used to generate the source-plane grid. + +Using the `Visuals2D` object we are also going to highlight specific grid coordinates certain colors, such that we +can see how they map from the image grid to the pixelization grid. +""" +indexes = [range(250), [150, 250, 350, 450, 550, 650, 750, 850, 950, 1050]] + +aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) + + +""" +Using a mapper, we can now make these mappings appear the other way round. That is, we can input a pixelization pixel +index (of our rectangular grid) and highlight how all of the image-pixels that it contains map to the image-plane. + +Lets map source pixel 313, the central source-pixel, to the image. We observe that for a given rectangular pixelization +pixel, there are four image pixels. +""" +pix_indexes = [[312]] + +indexes = mapper.slim_indexes_for_pix_indexes(pix_indexes=pix_indexes) + +aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) + +""" +Okay, so I think we can agree, mapper's map things! More specifically, they map pixelization pixels to multiple pixels +in the observed image of a galaxy. + +__Mask__ + +Finally, lets repeat the steps that we performed above, but now using a masked image. By applying a `Mask2D`, the +mapper only maps image-pixels that are not removed by the mask. This removes the (many) image pixels at the edge of the +image, where the galaxy is not present. + +Lets just have a quick look at these edges pixels: + +Lets use an circular `Mask2D`, which will capture the central galaxy light and clumps. +""" +mask = ag.Mask2D.circular( + shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales, radius=2.0 +) + +dataset = dataset.apply_mask(mask=mask) +aplt.plot_array(array=dataset.data, title="Data") + +""" +We can now use the masked grid to create a new `Mapper` (using the same rectangular 25 x 25 pixelization +as before). +""" +interpolator = mesh.interpolator_from( + source_plane_data_grid=dataset.grids.pixelization, source_plane_mesh_grid=None +) +mapper = ag.Mapper(interpolator=interpolator) + +""" +Lets plot it. +""" +aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) + +""" +First, We can see a red circle of dots in both the image and pixelization, showing where the edge of the mask +maps too in the pixelization. + +Now lets show that when we plot pixelization pixel indexes, they still appear in the same place in the image. +""" +pix_indexes = [[312], [314], [316], [318]] + +indexes = mapper.slim_indexes_for_pix_indexes(pix_indexes=pix_indexes) + +aaplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) + +""" +__Wrap Up__ + +In this tutorial, we learnt about mappers, and we used them to understand how the image and pixelization map to one +another. Your exercises are: + + 1) Think about how this could help us actually model galaxies. We have said we're going to reconstruct our galaxies + on the pixel-grid. So, how does knowing how each pixel maps to the image actually help us? If you`ve not got + any bright ideas, then worry not, that exactly what we're going to cover in the next tutorial. +""" diff --git a/scripts/chapter_4_pixelizations/tutorial_3_inversions.py b/scripts/chapter_4_pixelizations/tutorial_3_inversions.py index 8b8653a..2e79428 100644 --- a/scripts/chapter_4_pixelizations/tutorial_3_inversions.py +++ b/scripts/chapter_4_pixelizations/tutorial_3_inversions.py @@ -1,225 +1,225 @@ -""" -Tutorial 3: Inversions -====================== - -In the previous two tutorials, we introduced: - - - `Pixelization`'s: which place a pixel-grid over the image data. - - `Mappers`'s: which describe how each pixelization pixel maps to one or more image pixels. - -However, non of this has actually helped us fit galaxy data or reconstruct the galaxy. This is the subject -of this tutorial, where the process of reconstructing the galaxy's light on the pixelization is called an `Inversion`. - -__Contents__ - -- **Initial Setup:** Load the dataset for illustration. -- **Pixelization:** Create a pixelization and perform an inversion to reconstruct the galaxy. -- **Positive Only Solver:** Ensure the reconstruction has only positive intensity values. -- **Wrap Up:** Summary of inversion concepts. -- **Detailed Explanation:** In-depth explanation of the linear algebra behind inversions. -""" - +""" +Tutorial 3: Inversions +====================== + +In the previous two tutorials, we introduced: + + - `Pixelization`'s: which place a pixel-grid over the image data. + - `Mappers`'s: which describe how each pixelization pixel maps to one or more image pixels. + +However, non of this has actually helped us fit galaxy data or reconstruct the galaxy. This is the subject +of this tutorial, where the process of reconstructing the galaxy's light on the pixelization is called an `Inversion`. + +__Contents__ + +- **Initial Setup:** Load the dataset for illustration. +- **Pixelization:** Create a pixelization and perform an inversion to reconstruct the galaxy. +- **Positive Only Solver:** Ensure the reconstruction has only positive intensity values. +- **Wrap Up:** Summary of inversion concepts. +- **Detailed Explanation:** In-depth explanation of the linear algebra behind inversions. +""" + # from autoconf import setup_notebook; setup_notebook() - -from pathlib import Path -import autogalaxy as ag -import autogalaxy.plot as aplt -from autoarray.inversion.plot.inversion_plots import subplot_of_mapper - -""" -__Initial Setup__ - -we'll use the same complex galaxy data as the previous tutorial, where: - - - The galaxy's bulge is an `Sersic`. - - The galaxy's disk is an `Exponential`. - - The galaxy's has four star forming clumps which are `Sersic` profiles. -""" -dataset_name = "simple" -dataset_path = Path("dataset") / "imaging" / dataset_name - -""" -__Dataset Auto-Simulation__ - -If the dataset does not already exist on your system, it will be created by running the corresponding -simulator script. This ensures that all example scripts can be run without manually simulating data first. -""" -if not dataset_path.exists(): - import subprocess - import sys - - subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], - check=True, - ) - -dataset = ag.Imaging.from_fits( - data_path=dataset_path / "data.fits", - noise_map_path=dataset_path / "noise_map.fits", - psf_path=dataset_path / "psf.fits", - pixel_scales=0.1, -) - -""" -Lets create a circular mask which contains the galaxy's emission: -""" -mask = ag.Mask2D.circular( - shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales, radius=2.0 -) - -aplt.plot_array(array=dataset.data, title="Data", mask=mask) - -""" -We now create the masked imaging, as we did in the previous tutorial. -""" -dataset = dataset.apply_mask(mask=mask) - -""" -we again use the rectangular pixelization to create the mapper. - -(Ignore the regularization input below for now, we will cover this in the next tutorial). -""" -mesh = ag.mesh.RectangularAdaptDensity(shape=dataset.shape_native) - -pixelization = ag.Pixelization(mesh=mesh) - -interpolator = mesh.interpolator_from( - source_plane_data_grid=dataset.grids.pixelization, - source_plane_mesh_grid=None, -) - -mapper = ag.Mapper( - interpolator=interpolator, - regularization=ag.reg.Constant(coefficient=1.0), -) - -aplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) - -""" -__Pixelization__ - -Finally, we can now use the `Mapper` to reconstruct the galaxy via an `Inversion`. I'll explain how this works in a -second, but lets just go ahead and create the inversion first. -""" -inversion = ag.Inversion(dataset=dataset, linear_obj_list=[mapper]) - -""" -The inversion has reconstructed the galaxy's light on the rectangular pixel grid, which is called the -`reconstruction`. - -This reconstruction can be mapped back to the same resolution as the image to produce the `mapped_reconstructed_operated_data`. -""" -print(inversion.reconstruction) -print(inversion.mapped_reconstructed_operated_data) - -""" -Both of these can be plotted using `subplot_of_mapper`. - -It is possible for an inversion to have multiple `Mapper`'s, therefore for certain figures we specify the index -of the mapper we wish to plot. In this case, because we only have one mapper we specify the index 0. -""" -aplt.plot_array(array=inversion.reconstruction_to_native, title="Reconstruction") -subplot_of_mapper(inversion=inversion, mapper_index=0) - -""" -There we have it, we have successfully reconstructed the galaxy using a rectangular pixel-grid. This has reconstructed -the complex blobs of light of the galaxy. - -Pretty great, huh? If you ran the complex source pipeline in chapter 3, you'll remember that getting a model image -that looked this good simply *was not possible*. With an inversion, we can do this with ease and without having to -perform model-fitting with 20+ parameters for the galaxy's light! - -We will now briefly discuss how an inversion actually works, however the explanation I give in this tutorial will be -overly-simplified. To be good at modeling you do not need to understand the details of how an inversion works, you -simply need to be able to use an inversion to model a galaxy. - -To begin, lets consider some random mappings between our mapper`s pixelization pixels and the image. -""" -pix_indexes = [[445], [285], [313], [132], [11]] - -indexes = mapper.slim_indexes_for_pix_indexes(pix_indexes=pix_indexes) - -aplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) - -""" -These mappings are known before the inversion reconstructs the galaxy, which means before this inversion is performed -we know two key pieces of information: - - 1) The mappings between every pixelization pixel and a set of image-pixels. - 2) The flux values in every observed image-pixel, which are the values we want to fit successfully. - -It turns out that with these two pieces of information we can linearly solve for the set of pixelization pixel fluxes -that best-fit (e.g. maximize the log likelihood) our observed image. Essentially, we set up the mappings between -pixelization and image pixels as a large matrix and solve for the pixelization pixel fluxes in an analogous fashion to -how you would solve a set of simultaneous linear equations. This process is called a `linear inversion`. - -There are three more things about a linear inversion that are worth knowing: - - 1) When performing fits using light profiles, we discussed how a `model_image` was generated by convolving the light - profile images with the data's PSF. A similar blurring operation is incorporated into the inversion, such that it - reconstructs a galaxy (and therefore image) which fully accounts for the telescope optics and effect of the PSF. - - 2) You may be familiar with image sub-gridding, which splits each image-pixel into a sub-pixel (if you are not - familiar then feel free to checkout the optional **HowToGalaxy** tutorial on sub-gridding. If a sub-grid is used, it is - the mapping between every sub-pixel -pixel that is computed and used to perform the inversion. This prevents - aliasing effects degrading the image reconstruction. By default **PyAutoGalaxy** uses sub-gridding of degree 4x4. - - 3) The inversion`s solution is regularized. But wait, that`s what we'll cover in the next tutorial! - -Finally, let me show you how easy it is to fit an image with an `Inversion` using a `FitImaging` object. Instead of -giving the galaxy a light profile, we simply pass it a `Pixelization` and regularization, and pass it to a -galaxies. -""" -pixelization = ag.Pixelization( - mesh=ag.mesh.RectangularAdaptDensity(shape=(25, 25)), - regularization=ag.reg.Constant(coefficient=1.0), -) - -galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) - -galaxies = ag.Galaxies(galaxies=[galaxy]) - -""" -Then, like before, we pass the imaging and galaxies `FitImaging` object. - -We see some pretty good looking residuals, albeit there is faint flux leftover. We will consider how we can address -this in the next tutorial. - -We can use the `subplot_of_galaxies` method to specifically visualize the inversion and plot the reconstruction. -""" -fit = ag.FitImaging(dataset=dataset, galaxies=galaxies) - -aplt.subplot_fit_imaging(fit=fit) - -""" -__Positive Only Solver__ - -All pixelized source reconstructions use a positive-only solver, meaning that every source-pixel is only allowed -to reconstruct positive flux values. This ensures that the source reconstruction is physical and that we don't -reconstruct negative flux values that don't exist in the real source galaxy (a common systematic solution in lens -analysis). - -It may be surprising to hear that this is a feature worth pointing out, but it turns out setting up the linear algebra -to enforce positive reconstructions is difficult to make efficient. A lot of development time went into making this -possible, where a bespoke fast non-negative linear solver was developed to achieve this. - -Other methods in the literature often do not use a positive only solver, and therefore suffer from these -unphysical solutions, which can degrade the results of lens model in general. - -__Wrap Up__ - -And, we're done, here are a few questions to get you thinking about inversions: - - 1) The inversion provides the maximum log likelihood solution to the observed image. Is there a problem with seeking - the highest likelihood solution? Is there a risk that we're going to fit other things in the image than just the - galaxy? What happens if you reduce the `coefficient` of the regularization object above to zero? - - 2) The exterior pixels in the rectangular pixel-grid have no image-pixels in them. However, they are still given a - reconstructed flux. Given these pixels do not map to the data, where is this value coming from? - -__Detailed Explanation__ - -If you are interested in a more detailed description of how inversions work, then checkout the file -`autogalaxy_workspace/*/imaging/log_likelihood_function/inversion.ipynb` which gives a visual step-by-step -guide of the process alongside equations and references to literature on the subject. -""" + +from pathlib import Path +import autogalaxy as ag +import autogalaxy.plot as aplt +from autoarray.inversion.plot.inversion_plots import subplot_of_mapper + +""" +__Initial Setup__ + +we'll use the same complex galaxy data as the previous tutorial, where: + + - The galaxy's bulge is an `Sersic`. + - The galaxy's disk is an `Exponential`. + - The galaxy's has four star forming clumps which are `Sersic` profiles. +""" +dataset_name = "simple" +dataset_path = Path("dataset") / "imaging" / dataset_name + +""" +__Dataset Auto-Simulation__ + +If the dataset does not already exist on your system, it will be created by running the corresponding +simulator script. This ensures that all example scripts can be run without manually simulating data first. +""" +if not dataset_path.exists(): + import subprocess + import sys + + subprocess.run( + [sys.executable, "scripts/simulators/simple.py"], + check=True, + ) + +dataset = ag.Imaging.from_fits( + data_path=dataset_path / "data.fits", + noise_map_path=dataset_path / "noise_map.fits", + psf_path=dataset_path / "psf.fits", + pixel_scales=0.1, +) + +""" +Lets create a circular mask which contains the galaxy's emission: +""" +mask = ag.Mask2D.circular( + shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales, radius=2.0 +) + +aplt.plot_array(array=dataset.data, title="Data", mask=mask) + +""" +We now create the masked imaging, as we did in the previous tutorial. +""" +dataset = dataset.apply_mask(mask=mask) + +""" +we again use the rectangular pixelization to create the mapper. + +(Ignore the regularization input below for now, we will cover this in the next tutorial). +""" +mesh = ag.mesh.RectangularAdaptDensity(shape=dataset.shape_native) + +pixelization = ag.Pixelization(mesh=mesh) + +interpolator = mesh.interpolator_from( + source_plane_data_grid=dataset.grids.pixelization, + source_plane_mesh_grid=None, +) + +mapper = ag.Mapper( + interpolator=interpolator, + regularization=ag.reg.Constant(coefficient=1.0), +) + +aplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) + +""" +__Pixelization__ + +Finally, we can now use the `Mapper` to reconstruct the galaxy via an `Inversion`. I'll explain how this works in a +second, but lets just go ahead and create the inversion first. +""" +inversion = ag.Inversion(dataset=dataset, linear_obj_list=[mapper]) + +""" +The inversion has reconstructed the galaxy's light on the rectangular pixel grid, which is called the +`reconstruction`. + +This reconstruction can be mapped back to the same resolution as the image to produce the `mapped_reconstructed_operated_data`. +""" +print(inversion.reconstruction) +print(inversion.mapped_reconstructed_operated_data) + +""" +Both of these can be plotted using `subplot_of_mapper`. + +It is possible for an inversion to have multiple `Mapper`'s, therefore for certain figures we specify the index +of the mapper we wish to plot. In this case, because we only have one mapper we specify the index 0. +""" +aplt.plot_array(array=inversion.reconstruction_to_native, title="Reconstruction") +subplot_of_mapper(inversion=inversion, mapper_index=0) + +""" +There we have it, we have successfully reconstructed the galaxy using a rectangular pixel-grid. This has reconstructed +the complex blobs of light of the galaxy. + +Pretty great, huh? If you ran the complex source pipeline in chapter 3, you'll remember that getting a model image +that looked this good simply *was not possible*. With an inversion, we can do this with ease and without having to +perform model-fitting with 20+ parameters for the galaxy's light! + +We will now briefly discuss how an inversion actually works, however the explanation I give in this tutorial will be +overly-simplified. To be good at modeling you do not need to understand the details of how an inversion works, you +simply need to be able to use an inversion to model a galaxy. + +To begin, lets consider some random mappings between our mapper`s pixelization pixels and the image. +""" +pix_indexes = [[445], [285], [313], [132], [11]] + +indexes = mapper.slim_indexes_for_pix_indexes(pix_indexes=pix_indexes) + +aplt.subplot_image_and_mapper(mapper=mapper, image=dataset.data) + +""" +These mappings are known before the inversion reconstructs the galaxy, which means before this inversion is performed +we know two key pieces of information: + + 1) The mappings between every pixelization pixel and a set of image-pixels. + 2) The flux values in every observed image-pixel, which are the values we want to fit successfully. + +It turns out that with these two pieces of information we can linearly solve for the set of pixelization pixel fluxes +that best-fit (e.g. maximize the log likelihood) our observed image. Essentially, we set up the mappings between +pixelization and image pixels as a large matrix and solve for the pixelization pixel fluxes in an analogous fashion to +how you would solve a set of simultaneous linear equations. This process is called a `linear inversion`. + +There are three more things about a linear inversion that are worth knowing: + + 1) When performing fits using light profiles, we discussed how a `model_image` was generated by convolving the light + profile images with the data's PSF. A similar blurring operation is incorporated into the inversion, such that it + reconstructs a galaxy (and therefore image) which fully accounts for the telescope optics and effect of the PSF. + + 2) You may be familiar with image sub-gridding, which splits each image-pixel into a sub-pixel (if you are not + familiar then feel free to checkout the optional **HowToGalaxy** tutorial on sub-gridding. If a sub-grid is used, it is + the mapping between every sub-pixel -pixel that is computed and used to perform the inversion. This prevents + aliasing effects degrading the image reconstruction. By default **PyAutoGalaxy** uses sub-gridding of degree 4x4. + + 3) The inversion`s solution is regularized. But wait, that`s what we'll cover in the next tutorial! + +Finally, let me show you how easy it is to fit an image with an `Inversion` using a `FitImaging` object. Instead of +giving the galaxy a light profile, we simply pass it a `Pixelization` and regularization, and pass it to a +galaxies. +""" +pixelization = ag.Pixelization( + mesh=ag.mesh.RectangularAdaptDensity(shape=(25, 25)), + regularization=ag.reg.Constant(coefficient=1.0), +) + +galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) + +galaxies = ag.Galaxies(galaxies=[galaxy]) + +""" +Then, like before, we pass the imaging and galaxies `FitImaging` object. + +We see some pretty good looking residuals, albeit there is faint flux leftover. We will consider how we can address +this in the next tutorial. + +We can use the `subplot_of_galaxies` method to specifically visualize the inversion and plot the reconstruction. +""" +fit = ag.FitImaging(dataset=dataset, galaxies=galaxies) + +aplt.subplot_fit_imaging(fit=fit) + +""" +__Positive Only Solver__ + +All pixelized source reconstructions use a positive-only solver, meaning that every source-pixel is only allowed +to reconstruct positive flux values. This ensures that the source reconstruction is physical and that we don't +reconstruct negative flux values that don't exist in the real source galaxy (a common systematic solution in lens +analysis). + +It may be surprising to hear that this is a feature worth pointing out, but it turns out setting up the linear algebra +to enforce positive reconstructions is difficult to make efficient. A lot of development time went into making this +possible, where a bespoke fast non-negative linear solver was developed to achieve this. + +Other methods in the literature often do not use a positive only solver, and therefore suffer from these +unphysical solutions, which can degrade the results of lens model in general. + +__Wrap Up__ + +And, we're done, here are a few questions to get you thinking about inversions: + + 1) The inversion provides the maximum log likelihood solution to the observed image. Is there a problem with seeking + the highest likelihood solution? Is there a risk that we're going to fit other things in the image than just the + galaxy? What happens if you reduce the `coefficient` of the regularization object above to zero? + + 2) The exterior pixels in the rectangular pixel-grid have no image-pixels in them. However, they are still given a + reconstructed flux. Given these pixels do not map to the data, where is this value coming from? + +__Detailed Explanation__ + +If you are interested in a more detailed description of how inversions work, then checkout the file +`autogalaxy_workspace/*/imaging/log_likelihood_function/inversion.ipynb` which gives a visual step-by-step +guide of the process alongside equations and references to literature on the subject. +""" diff --git a/scripts/chapter_4_pixelizations/tutorial_4_bayesian_regularization.py b/scripts/chapter_4_pixelizations/tutorial_4_bayesian_regularization.py index 82d37ad..ee3d55b 100644 --- a/scripts/chapter_4_pixelizations/tutorial_4_bayesian_regularization.py +++ b/scripts/chapter_4_pixelizations/tutorial_4_bayesian_regularization.py @@ -1,289 +1,289 @@ -""" -Tutorial 4: Bayesian Regularization -=================================== - -So far, we have: - - - Used pixelizations and mappers to map pixelization pixels to image-pixels and visa versa. - - Successfully used an inversion to reconstruct a galaxy. - - Seen that this reconstruction provides a good fit of the observed image, providing a high likelihood solution. - -The explanation of *how* an inversion works has so far been overly simplified. You'll have noted the regularization -inputs which we have not so far discussed. This will be the topic of this tutorial, and where inversions become more -conceptually challenging! - -__Contents__ - -- **Initial Setup:** Load the dataset for illustration. -- **Convenience Function:** A helper function for performing inversions. -- **Pixelization:** Perform inversions with different regularization coefficients. -- **Regularization:** Understand how regularization smooths the reconstruction. -- **Bayesian Evidence:** Use the Bayesian evidence to objectively choose the regularization coefficient. -- **Non-Linear and Linear:** Discussion of how regularization interacts with the non-linear search. -- **Detailed Description:** In-depth explanation of how the Bayesian evidence penalizes overfitting. -""" - +""" +Tutorial 4: Bayesian Regularization +=================================== + +So far, we have: + + - Used pixelizations and mappers to map pixelization pixels to image-pixels and visa versa. + - Successfully used an inversion to reconstruct a galaxy. + - Seen that this reconstruction provides a good fit of the observed image, providing a high likelihood solution. + +The explanation of *how* an inversion works has so far been overly simplified. You'll have noted the regularization +inputs which we have not so far discussed. This will be the topic of this tutorial, and where inversions become more +conceptually challenging! + +__Contents__ + +- **Initial Setup:** Load the dataset for illustration. +- **Convenience Function:** A helper function for performing inversions. +- **Pixelization:** Perform inversions with different regularization coefficients. +- **Regularization:** Understand how regularization smooths the reconstruction. +- **Bayesian Evidence:** Use the Bayesian evidence to objectively choose the regularization coefficient. +- **Non-Linear and Linear:** Discussion of how regularization interacts with the non-linear search. +- **Detailed Description:** In-depth explanation of how the Bayesian evidence penalizes overfitting. +""" + # from autoconf import setup_notebook; setup_notebook() - -from pathlib import Path -import autogalaxy as ag -import autogalaxy.plot as aplt -from autoarray.inversion.plot.inversion_plots import subplot_of_mapper - -""" -__Initial Setup__ - -we'll use the same complex galaxy data as the previous tutorial, where: - - - The galaxy's bulge is an `Sersic`. - - The galaxy's disk is an `Exponential`. - - The galaxy's has four star forming clumps which are `Sersic` profiles. -""" -dataset_name = "simple" -dataset_path = Path("dataset") / "imaging" / dataset_name - -""" -__Dataset Auto-Simulation__ - -If the dataset does not already exist on your system, it will be created by running the corresponding -simulator script. This ensures that all example scripts can be run without manually simulating data first. -""" -if not dataset_path.exists(): - import subprocess - import sys - - subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], - check=True, - ) - -dataset = ag.Imaging.from_fits( - data_path=dataset_path / "data.fits", - noise_map_path=dataset_path / "noise_map.fits", - psf_path=dataset_path / "psf.fits", - pixel_scales=0.1, -) - -""" -__Convenience Function__ - -we're going to perform a lot of fits using an `Inversion` this tutorial. This would create a lot of code, so to keep -things tidy, I've setup this function which handles it all for us. -""" - - -def perform_fit_with_galaxy(dataset, galaxy): - mask = ag.Mask2D.circular( - shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales, radius=2.0 - ) - - dataset = dataset.apply_mask(mask=mask) - - galaxies = ag.Galaxies(galaxies=[galaxy]) - - return ag.FitImaging(dataset=dataset, galaxies=galaxies) - - -""" -__Pixelization__ - -Okay, so lets look at our fit from the previous tutorial in more detail. -""" -pixelization = ag.Pixelization( - mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), - regularization=ag.reg.Constant(coefficient=1.0), -) - -galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) - -fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) - -aplt.subplot_fit_imaging(fit=fit) -subplot_of_mapper(inversion=fit.inversion, mapper_index=0) - -""" -__Regularization__ - -The galaxy reconstruction looks pretty good! - -However, the high quality of this solution was possible because I chose a `coefficient` for the regularization input of -1.0. If we reduce this `coefficient` to 0.01, the galaxy reconstruction goes *very* weird. -""" -pixelization = ag.Pixelization( - mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), - regularization=ag.reg.Constant(coefficient=0.01), -) - -galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) - -no_regularization_fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) - -aplt.subplot_fit_imaging(fit=no_regularization_fit) -subplot_of_mapper(inversion=no_regularization_fit.inversion, mapper_index=0) - -""" -So, what is happening here? Why does reducing the `coefficient` do this to our reconstruction? First, we need -to understand what regularization actually does! - -When the inversion reconstructs the galaxy, it does not *only* compute the set of pixelization pixel fluxes that -best-fit the image. It also regularizes this solution, whereby it goes to every pixel on the rectangular grid -and computes the different between the reconstructed flux values of every pixel with its 4 neighboring pixels. -If the difference in flux is large the solution is penalized, reducing its log likelihood. You can think of this as -us applying a 'smoothness prior' on the reconstructed galaxy's light. - -This smoothing adds a 'penalty term' to the log likelihood of an inversion which is the summed difference between the -reconstructed fluxes of every pixelization pixel pair multiplied by the `coefficient`. By setting the regularization -coefficient to zero, we set this penalty term to zero, meaning that regularization is completely omitted. - -Why do we need to regularize our solution? We just saw why, if we do not apply this smoothness prior to the galaxy -reconstruction, we `over-fit` the image and reconstruct a noisy galaxy with lots of extraneous features. This is what -the aliasing chequer-board effect is caused by. If the inversions's sole aim is to maximize the log likelihood, it can -do this by fitting *everything* accurately, including the noise. - -Over-fitting is why regularization is necessary. Solutions like this will completely ruin our attempts to model a -galaxy. By smoothing our galaxy reconstruction we ensure it does not over fit noise in the image. - -So, what happens if we apply a high value for the regularization coefficient? -""" -pixelization = ag.Pixelization( - mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), - regularization=ag.reg.Constant(coefficient=100.0), -) - -galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) - -high_regularization_fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) - -aplt.subplot_fit_imaging(fit=high_regularization_fit) -subplot_of_mapper(inversion=high_regularization_fit.inversion, mapper_index=0) - -""" -The figure above shows that we completely remove over-fitting. However, we now fit the image data less poorly, -due to the much higher level of smoothing. - -So, we now understand what regularization is and why it is necessary. There is one nagging question that remains, how -do I choose the regularization coefficient value? We can not use the log likelihood, as decreasing the regularization -coefficient will always increase the log likelihood, because less smoothing allows the reconstruction to fit -the data better. -""" -print("Likelihood Without Regularization:") -print(no_regularization_fit.log_likelihood_with_regularization) -print("Likelihood With Normal Regularization:") -print(fit.log_likelihood_with_regularization) -print("Likelihood With High Regularization:") -print(high_regularization_fit.log_likelihood_with_regularization) - -""" -__Bayesian Evidence__ - -For inversions, we therefore need a different goodness-of-fit measure to choose the appropriate level of regularization. - -For this, we invoke the `Bayesian Evidence`, which quantifies the goodness of the fit as follows: - - - It requires that the residuals of the fit are consistent with Gaussian noise (which is the type of noise expected - in the imaging data). If this Gaussian pattern is not visible in the residuals, the noise must have been over-fitted - by the inversion. The Bayesian evidence will therefore decrease. If the image is fitted poorly due to over smoothing, - the residuals will again not appear Gaussian either, again producing a decrease in the Bayesian evidence value. - - - There can be many solutions which fit the data to the noise level, without over-fitting. To determine the best - solutions from these solutions, the Bayesian evidence therefore also quantifies the complexity of the galaxy - reconstruction. If an inversion requires many pixels and a low level of regularization to achieve a good fit, the - Bayesian evidence will decrease. The evidence penalizes solutions which are complex, which, in a Bayesian sense, are - less probable (you may want to look up `Occam`s Razor`). - -The Bayesian evidence therefore ensures we only invoke a more complex galaxy reconstruction when the data absolutely -necessitates it. - -Lets take a look at the Bayesian evidence of the fits that we performed above, which is accessible from a `FitImaging` -object via the `log_evidence` property: -""" -print("Bayesian Evidence Without Regularization:") -print(no_regularization_fit.log_evidence) -print("Bayesian Evidence With Normal Regularization:") -print(fit.log_evidence) -print("Bayesian Evidence With High Regularization:") -print(high_regularization_fit.log_evidence) - -""" -As expected, the solution that we could see `by-eye` was the best solution corresponds to the highest log evidence -solution. - -__Non-Linear and Linear__ - -Before we end, lets consider which aspects of an inversion are linear and which are non-linear. - -The linear part of the inversion is the step that solves for the reconstruct pixelization pixel fluxes, including -accounting for the smoothing via regularizaton. We do not have to perform a non-linear search to determine the pixel -fluxes or compute the Bayesian evidence discussed above. - -However, determining the regularization `coefficient` that maximizes the Bayesian log evidence is a non-linear problem -that requires a non-linear search. The Bayesian evidence also depends on the grid resolution, which means the -pixel-grid's `shape` parameter may also now become dimensions of non linear parameter space (albeit it is common -practise for us to simply use the resolution of the image data, or a multiple of this). - -Nevertheless, these total only 3 non-linear parameters, far fewer than the 20+ that are required when modeling such a -complex galaxy using light profiles for every individual clump! - -Here are a few questions for you to think about. - - 1) We maximize the log evidence by using simpler galaxy reconstructions. Therefore, decreasing the pixel-grid - size should provide a higher log_evidence, provided it still has sufficiently high resolution to fit the image well - (and provided that the regularization coefficient is set to an appropriate value). Can you increase the log evidence - from the value above by changing these parameters, I've set you up with a code to do so below. -""" -pixelization = ag.Pixelization( - mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), - regularization=ag.reg.Constant(coefficient=1.0), -) - -galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) - -fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) - -print("Previous Bayesian Evidence:") -print(3988.0716851250163) -print("New Bayesian Evidence:") -print(fit.log_evidence) - -aplt.subplot_fit_imaging(fit=fit) - -""" -__Detailed Description__ - -Below, I provide a more detailed discussion of the Bayesian evidence. It is not paramount that you understand this to -use **PyAutoGalaxy**, but I recommend you give it a read to get an intuition for how the evidence works. - -The Bayesian log evidence quantifies the following 3 aspects of a fit to galaxy imaging data: - -1) *The quality of the image reconstruction:* The galaxy reconstruction is a linear inversion which uses the observed - values in the image-data to fit it and reconstruct the galaxy. It is in principle able to perfectly reconstruct the - image regardless of the image’s noise or the accuracy of the model (e.g. at infinite resolution without - regularization). The problem is therefore ‘ill-posed’ and this is why regularization is necessary. - - However, this raises the question of what constitutes a ‘good’ solution? The Bayesian evidence defines this by - assuming that the image data consists of independent Gaussian noise in every image pixel. A ‘good’ solution is one - whose chi-squared residuals are consistent with Gaussian noise, producing a reduced chi-squared near 1.0 .Solutions - which give a reduced chi squared below 1 are penalized for being overly complex and fitting the image’s noise, whereas - solutions with a reduced chi-squared above are penalized for not invoking a more complex galaxy model when the data it - is necessary to fit the data bettter. In both circumstances, these penalties reduce the inferred Bayesian evidence! - -2) *The complexity of the galaxy reconstruction:* The log evidence estimates the number of pixelization pixels that are used - to reconstruct the image, after accounting for their correlation with one another due to regularization. Solutions that - require fewer correlated galaxy pixels increase the Bayesian evidence. Thus, simpler and less complex galaxy - reconstructions are favoured. - -3) *The signal-to-noise (S/N) of the image that is fitted:* The Bayesian evidence favours models which fit higher S/N - realizations of the observed data (where the S/N is determined using the image-pixel variances, e.g. the noise-map). Up - to now, all **PyAutoGalaxy** fits assumed fixed variances, meaning that this aspect of the Bayeisan evidence has no impact - on the inferred evidence values. - - The premise is that whilst increasing the variances of image pixels lowers their S/N values and therefore also - decreases the log evidence, doing so may produce a net increase in log evidence. This occurs when the chi-squared - values of the image pixels whose variances are increased were initially very high (e.g. they were fit poorly by the - model). - -In summary, the log evidence is maximized for solutions which most accurately reconstruct the highest S/N realization of -the observed image, without over-fitting its noise and using the fewest correlated pixelization pixels. By employing -this framework throughout, **PyAutoGalaxy** objectively determines the final model following the principles of Bayesian -analysis and Occam’s Razor. -""" + +from pathlib import Path +import autogalaxy as ag +import autogalaxy.plot as aplt +from autoarray.inversion.plot.inversion_plots import subplot_of_mapper + +""" +__Initial Setup__ + +we'll use the same complex galaxy data as the previous tutorial, where: + + - The galaxy's bulge is an `Sersic`. + - The galaxy's disk is an `Exponential`. + - The galaxy's has four star forming clumps which are `Sersic` profiles. +""" +dataset_name = "simple" +dataset_path = Path("dataset") / "imaging" / dataset_name + +""" +__Dataset Auto-Simulation__ + +If the dataset does not already exist on your system, it will be created by running the corresponding +simulator script. This ensures that all example scripts can be run without manually simulating data first. +""" +if not dataset_path.exists(): + import subprocess + import sys + + subprocess.run( + [sys.executable, "scripts/simulators/simple.py"], + check=True, + ) + +dataset = ag.Imaging.from_fits( + data_path=dataset_path / "data.fits", + noise_map_path=dataset_path / "noise_map.fits", + psf_path=dataset_path / "psf.fits", + pixel_scales=0.1, +) + +""" +__Convenience Function__ + +we're going to perform a lot of fits using an `Inversion` this tutorial. This would create a lot of code, so to keep +things tidy, I've setup this function which handles it all for us. +""" + + +def perform_fit_with_galaxy(dataset, galaxy): + mask = ag.Mask2D.circular( + shape_native=dataset.shape_native, pixel_scales=dataset.pixel_scales, radius=2.0 + ) + + dataset = dataset.apply_mask(mask=mask) + + galaxies = ag.Galaxies(galaxies=[galaxy]) + + return ag.FitImaging(dataset=dataset, galaxies=galaxies) + + +""" +__Pixelization__ + +Okay, so lets look at our fit from the previous tutorial in more detail. +""" +pixelization = ag.Pixelization( + mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), + regularization=ag.reg.Constant(coefficient=1.0), +) + +galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) + +fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) + +aplt.subplot_fit_imaging(fit=fit) +subplot_of_mapper(inversion=fit.inversion, mapper_index=0) + +""" +__Regularization__ + +The galaxy reconstruction looks pretty good! + +However, the high quality of this solution was possible because I chose a `coefficient` for the regularization input of +1.0. If we reduce this `coefficient` to 0.01, the galaxy reconstruction goes *very* weird. +""" +pixelization = ag.Pixelization( + mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), + regularization=ag.reg.Constant(coefficient=0.01), +) + +galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) + +no_regularization_fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) + +aplt.subplot_fit_imaging(fit=no_regularization_fit) +subplot_of_mapper(inversion=no_regularization_fit.inversion, mapper_index=0) + +""" +So, what is happening here? Why does reducing the `coefficient` do this to our reconstruction? First, we need +to understand what regularization actually does! + +When the inversion reconstructs the galaxy, it does not *only* compute the set of pixelization pixel fluxes that +best-fit the image. It also regularizes this solution, whereby it goes to every pixel on the rectangular grid +and computes the different between the reconstructed flux values of every pixel with its 4 neighboring pixels. +If the difference in flux is large the solution is penalized, reducing its log likelihood. You can think of this as +us applying a 'smoothness prior' on the reconstructed galaxy's light. + +This smoothing adds a 'penalty term' to the log likelihood of an inversion which is the summed difference between the +reconstructed fluxes of every pixelization pixel pair multiplied by the `coefficient`. By setting the regularization +coefficient to zero, we set this penalty term to zero, meaning that regularization is completely omitted. + +Why do we need to regularize our solution? We just saw why, if we do not apply this smoothness prior to the galaxy +reconstruction, we `over-fit` the image and reconstruct a noisy galaxy with lots of extraneous features. This is what +the aliasing chequer-board effect is caused by. If the inversions's sole aim is to maximize the log likelihood, it can +do this by fitting *everything* accurately, including the noise. + +Over-fitting is why regularization is necessary. Solutions like this will completely ruin our attempts to model a +galaxy. By smoothing our galaxy reconstruction we ensure it does not over fit noise in the image. + +So, what happens if we apply a high value for the regularization coefficient? +""" +pixelization = ag.Pixelization( + mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), + regularization=ag.reg.Constant(coefficient=100.0), +) + +galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) + +high_regularization_fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) + +aplt.subplot_fit_imaging(fit=high_regularization_fit) +subplot_of_mapper(inversion=high_regularization_fit.inversion, mapper_index=0) + +""" +The figure above shows that we completely remove over-fitting. However, we now fit the image data less poorly, +due to the much higher level of smoothing. + +So, we now understand what regularization is and why it is necessary. There is one nagging question that remains, how +do I choose the regularization coefficient value? We can not use the log likelihood, as decreasing the regularization +coefficient will always increase the log likelihood, because less smoothing allows the reconstruction to fit +the data better. +""" +print("Likelihood Without Regularization:") +print(no_regularization_fit.log_likelihood_with_regularization) +print("Likelihood With Normal Regularization:") +print(fit.log_likelihood_with_regularization) +print("Likelihood With High Regularization:") +print(high_regularization_fit.log_likelihood_with_regularization) + +""" +__Bayesian Evidence__ + +For inversions, we therefore need a different goodness-of-fit measure to choose the appropriate level of regularization. + +For this, we invoke the `Bayesian Evidence`, which quantifies the goodness of the fit as follows: + + - It requires that the residuals of the fit are consistent with Gaussian noise (which is the type of noise expected + in the imaging data). If this Gaussian pattern is not visible in the residuals, the noise must have been over-fitted + by the inversion. The Bayesian evidence will therefore decrease. If the image is fitted poorly due to over smoothing, + the residuals will again not appear Gaussian either, again producing a decrease in the Bayesian evidence value. + + - There can be many solutions which fit the data to the noise level, without over-fitting. To determine the best + solutions from these solutions, the Bayesian evidence therefore also quantifies the complexity of the galaxy + reconstruction. If an inversion requires many pixels and a low level of regularization to achieve a good fit, the + Bayesian evidence will decrease. The evidence penalizes solutions which are complex, which, in a Bayesian sense, are + less probable (you may want to look up `Occam`s Razor`). + +The Bayesian evidence therefore ensures we only invoke a more complex galaxy reconstruction when the data absolutely +necessitates it. + +Lets take a look at the Bayesian evidence of the fits that we performed above, which is accessible from a `FitImaging` +object via the `log_evidence` property: +""" +print("Bayesian Evidence Without Regularization:") +print(no_regularization_fit.log_evidence) +print("Bayesian Evidence With Normal Regularization:") +print(fit.log_evidence) +print("Bayesian Evidence With High Regularization:") +print(high_regularization_fit.log_evidence) + +""" +As expected, the solution that we could see `by-eye` was the best solution corresponds to the highest log evidence +solution. + +__Non-Linear and Linear__ + +Before we end, lets consider which aspects of an inversion are linear and which are non-linear. + +The linear part of the inversion is the step that solves for the reconstruct pixelization pixel fluxes, including +accounting for the smoothing via regularizaton. We do not have to perform a non-linear search to determine the pixel +fluxes or compute the Bayesian evidence discussed above. + +However, determining the regularization `coefficient` that maximizes the Bayesian log evidence is a non-linear problem +that requires a non-linear search. The Bayesian evidence also depends on the grid resolution, which means the +pixel-grid's `shape` parameter may also now become dimensions of non linear parameter space (albeit it is common +practise for us to simply use the resolution of the image data, or a multiple of this). + +Nevertheless, these total only 3 non-linear parameters, far fewer than the 20+ that are required when modeling such a +complex galaxy using light profiles for every individual clump! + +Here are a few questions for you to think about. + + 1) We maximize the log evidence by using simpler galaxy reconstructions. Therefore, decreasing the pixel-grid + size should provide a higher log_evidence, provided it still has sufficiently high resolution to fit the image well + (and provided that the regularization coefficient is set to an appropriate value). Can you increase the log evidence + from the value above by changing these parameters, I've set you up with a code to do so below. +""" +pixelization = ag.Pixelization( + mesh=ag.mesh.RectangularAdaptDensity(shape=(50, 50)), + regularization=ag.reg.Constant(coefficient=1.0), +) + +galaxy = ag.Galaxy(redshift=1.0, pixelization=pixelization) + +fit = perform_fit_with_galaxy(dataset=dataset, galaxy=galaxy) + +print("Previous Bayesian Evidence:") +print(3988.0716851250163) +print("New Bayesian Evidence:") +print(fit.log_evidence) + +aplt.subplot_fit_imaging(fit=fit) + +""" +__Detailed Description__ + +Below, I provide a more detailed discussion of the Bayesian evidence. It is not paramount that you understand this to +use **PyAutoGalaxy**, but I recommend you give it a read to get an intuition for how the evidence works. + +The Bayesian log evidence quantifies the following 3 aspects of a fit to galaxy imaging data: + +1) *The quality of the image reconstruction:* The galaxy reconstruction is a linear inversion which uses the observed + values in the image-data to fit it and reconstruct the galaxy. It is in principle able to perfectly reconstruct the + image regardless of the image’s noise or the accuracy of the model (e.g. at infinite resolution without + regularization). The problem is therefore ‘ill-posed’ and this is why regularization is necessary. + + However, this raises the question of what constitutes a ‘good’ solution? The Bayesian evidence defines this by + assuming that the image data consists of independent Gaussian noise in every image pixel. A ‘good’ solution is one + whose chi-squared residuals are consistent with Gaussian noise, producing a reduced chi-squared near 1.0 .Solutions + which give a reduced chi squared below 1 are penalized for being overly complex and fitting the image’s noise, whereas + solutions with a reduced chi-squared above are penalized for not invoking a more complex galaxy model when the data it + is necessary to fit the data bettter. In both circumstances, these penalties reduce the inferred Bayesian evidence! + +2) *The complexity of the galaxy reconstruction:* The log evidence estimates the number of pixelization pixels that are used + to reconstruct the image, after accounting for their correlation with one another due to regularization. Solutions that + require fewer correlated galaxy pixels increase the Bayesian evidence. Thus, simpler and less complex galaxy + reconstructions are favoured. + +3) *The signal-to-noise (S/N) of the image that is fitted:* The Bayesian evidence favours models which fit higher S/N + realizations of the observed data (where the S/N is determined using the image-pixel variances, e.g. the noise-map). Up + to now, all **PyAutoGalaxy** fits assumed fixed variances, meaning that this aspect of the Bayeisan evidence has no impact + on the inferred evidence values. + + The premise is that whilst increasing the variances of image pixels lowers their S/N values and therefore also + decreases the log evidence, doing so may produce a net increase in log evidence. This occurs when the chi-squared + values of the image pixels whose variances are increased were initially very high (e.g. they were fit poorly by the + model). + +In summary, the log evidence is maximized for solutions which most accurately reconstruct the highest S/N realization of +the observed image, without over-fitting its noise and using the fewest correlated pixelization pixels. By employing +this framework throughout, **PyAutoGalaxy** objectively determines the final model following the principles of Bayesian +analysis and Occam’s Razor. +""" diff --git a/scripts/chapter_4_pixelizations/tutorial_5_model_fit.py b/scripts/chapter_4_pixelizations/tutorial_5_model_fit.py index c20b258..2d2d403 100644 --- a/scripts/chapter_4_pixelizations/tutorial_5_model_fit.py +++ b/scripts/chapter_4_pixelizations/tutorial_5_model_fit.py @@ -59,7 +59,7 @@ import sys subprocess.run( - [sys.executable, "scripts/imaging/simulator.py"], + [sys.executable, "scripts/simulators/simple.py"], check=True, ) diff --git a/scripts/simulators/sersic_x2.py b/scripts/simulators/sersic_x2.py new file mode 100644 index 0000000..8c9d6d7 --- /dev/null +++ b/scripts/simulators/sersic_x2.py @@ -0,0 +1,170 @@ +""" +Simulator: Sersic x2 +==================== + +This script simulates `Imaging` of two galaxies where: + + - The first galaxy's bulge is an `Sersic`. + - The second galaxy's bulge is an `Sersic`. + +This dataset is used in chapter 3 of the **HowToGalaxy** lectures. + +__Contents__ + +- **Dataset Paths:** Set the output path for the simulated dataset. +- **Grid:** Create a 2D grid with adaptive over-sampling for simulation. +- **Galaxies:** Define the galaxy light profiles used for simulation. +- **Output:** Save the simulated dataset to FITS files. +- **Visualize:** Output subplot and image PNGs of the simulated dataset. +- **Plane Output:** Save the Galaxies object as a JSON file. + +__Advanced__ + +This is an advanced simulator script, meaning that detailed explanations of certain code are omitted. Refer to +simulators not in the `advanced` folder for more detailed comments. + +__Start Here Notebook__ + +If any code in this script is unclear, refer to the `simulators/start_here.ipynb` notebook. +""" + +# from autoconf import setup_notebook; setup_notebook() + +from pathlib import Path +import autogalaxy as ag +import autogalaxy.plot as aplt + +""" +__Dataset Paths__ + +The `dataset_type` describes the type of data being simulated and `dataset_name` gives it a descriptive name. +""" +dataset_type = "imaging" +dataset_name = "sersic_x2" +dataset_path = Path("dataset", dataset_type, dataset_name) + +""" +__Grid__ + +Simulate the image using a (y,x) grid with the adaptive over sampling scheme. +""" +grid = ag.Grid2D.uniform( + shape_native=(150, 150), + pixel_scales=0.1, +) + +over_sample_size = ag.util.over_sample.over_sample_size_via_radial_bins_from( + grid=grid, + sub_size_list=[32, 8, 2], + radial_list=[0.3, 0.6], + centre_list=[(0.0, 0.0)], +) + +grid = grid.apply_over_sampling(over_sample_size=over_sample_size) + +""" +Simulate a simple Gaussian PSF for the image. +""" +psf = ag.Convolver.from_gaussian( + shape_native=(11, 11), sigma=0.1, pixel_scales=grid.pixel_scales +) + +""" +Create the simulator for the imaging data, which defines the exposure time, background sky, noise levels and psf. +""" +simulator = ag.SimulatorImaging( + exposure_time=300.0, + psf=psf, + background_sky_level=0.1, + add_poisson_noise_to_data=True, +) + +""" +__Galaxies__ + +Setup the two galaxy's both with a bulge (elliptical Sersic). +""" +galaxy_0 = ag.Galaxy( + redshift=0.5, + bulge=ag.lp.Sersic( + centre=(0.0, -1.0), + ell_comps=(0.25, 0.1), + intensity=0.1, + effective_radius=0.8, + sersic_index=2.5, + ), +) + +galaxy_1 = ag.Galaxy( + redshift=0.5, + bulge=ag.lp.Sersic( + centre=(0.0, 1.0), + ell_comps=(0.0, 0.1), + intensity=0.1, + effective_radius=0.6, + sersic_index=3.0, + ), +) + +""" +Use these galaxies to generate the image for the simulated `Imaging` dataset. +""" +galaxies = ag.Galaxies(galaxies=[galaxy_0, galaxy_1]) + +""" +We can now pass this simulator galaxies, which creates the image plotted above and simulates it as an +imaging dataset. +""" +dataset = simulator.via_galaxies_from(galaxies=galaxies, grid=grid) + +""" +__Output__ + +Output the simulated dataset to the dataset path as .fits files. +""" +aplt.fits_imaging( + dataset=dataset, + data_path=dataset_path / "data.fits", + psf_path=dataset_path / "psf.fits", + noise_map_path=dataset_path / "noise_map.fits", + overwrite=True, +) + +""" +__Visualize__ + +Output a subplot of the simulated dataset, the image and the galaxies quantities to the dataset path as .png files. +""" +aplt.subplot_imaging_dataset( + dataset=dataset, output_path=dataset_path, output_format="png" +) +aplt.plot_array( + array=dataset.data, + title="Data", + output_path=dataset_path, + output_format="png", +) + +aplt.subplot_galaxies( + galaxies=galaxies, + grid=grid, + output_path=dataset_path, + output_format="png", +) + +""" +__Plane Output__ + +Save the `Galaxies` in the dataset folder as a .json file, ensuring the true light profiles and galaxies +are safely stored and available to check how the dataset was simulated in the future. + +This can be loaded via the method `galaxies = ag.from_json()`. +""" +ag.output_to_json( + obj=galaxies, + file_path=Path(dataset_path, "galaxies.json"), +) + +""" +The dataset can be viewed in the folder `autogalaxy_workspace/imaging/sersic_x2`. +""" diff --git a/scripts/simulators/simple.py b/scripts/simulators/simple.py new file mode 100644 index 0000000..4508111 --- /dev/null +++ b/scripts/simulators/simple.py @@ -0,0 +1,225 @@ +""" +Simulator: Sersic + Exp +======================= + +This script simulates `Imaging` of a galaxy using light profiles where: + + - The galaxy's bulge is an `Sersic`. + - The galaxy's disk is an `Exponential`. + +__Contents__ + +- **Dataset Paths:** Defining the output path for the simulated dataset. +- **Grid:** Setting up the 2D grid of coordinates for image evaluation. +- **Over Sampling:** Applying adaptive over-sampling for accurate image simulation. +- **Galaxies:** Defining the galaxy with Sersic bulge and Exponential disk light profiles. +- **Output:** Saving the simulated dataset to FITS files. +- **Visualize:** Outputting subplot and image visualizations as PNG files. +- **Plane Output:** Saving the Galaxies object as a JSON file for future reference. +""" + +# from autoconf import setup_notebook; setup_notebook() + +from pathlib import Path +import autogalaxy as ag +import autogalaxy.plot as aplt + +""" +__Dataset Paths__ + +The `dataset_type` describes the type of data being simulated and `dataset_name` gives it a descriptive name. They define the folder the dataset is output to on your hard-disk: + + - The image will be output to `/autogalaxy_workspace/dataset/dataset_type/dataset_name/image.fits`. + - The noise-map will be output to `/autogalaxy_workspace/dataset/dataset_type/dataset_name/noise_map.fits`. + - The psf will be output to `/autogalaxy_workspace/dataset/dataset_type/dataset_name/psf.fits`. +""" +dataset_type = "imaging" +dataset_name = "simple" + +""" +The path where the dataset will be output. + +In this example, this is: `/autogalaxy_workspace/dataset/imaging/simple` +""" +dataset_path = Path("dataset", dataset_type, dataset_name) + +""" +__Grid__ + +Define the 2d grid of (y,x) coordinates that the galaxy images are evaluated and therefore simulated on, via +the inputs: + + - `shape_native`: The (y_pixels, x_pixels) 2D shape of the grid defining the shape of the data that is simulated. + - `pixel_scales`: The arc-second to pixel conversion factor of the grid and data. +""" +grid = ag.Grid2D.uniform( + shape_native=(100, 100), + pixel_scales=0.1, +) + +""" +__Over Sampling__ + +Over sampling is a numerical technique where the images of light profiles and galaxies are evaluated +on a higher resolution grid than the image data to ensure the calculation is accurate. + +An adaptive oversampling scheme is implemented, evaluating the central regions at (0.0", 0.0") of the light profile at a +resolution of 32x32, transitioning to 8x8 in intermediate areas, and 2x2 in the outskirts. This ensures precise and +accurate image simulation while focusing computational resources on the bright regions that demand higher oversampling. + +Once you are more experienced, you should read up on over-sampling in more detail via +the `autogalaxy_workspace/*/guides/over_sampling.ipynb` notebook. +""" +over_sample_size = ag.util.over_sample.over_sample_size_via_radial_bins_from( + grid=grid, + sub_size_list=[32, 8, 2], + radial_list=[0.3, 0.6], + centre_list=[(0.0, 0.0)], +) + +grid = grid.apply_over_sampling(over_sample_size=over_sample_size) + +""" +Simulate a simple Gaussian PSF for the image. +""" +psf = ag.Convolver.from_gaussian( + shape_native=(11, 11), sigma=0.1, pixel_scales=grid.pixel_scales +) + +""" +To simulate the `Imaging` dataset we first create a simulator, which defines the exposure time, background sky, +noise levels and psf of the dataset that is simulated. +""" +simulator = ag.SimulatorImaging( + exposure_time=300.0, + psf=psf, + background_sky_level=0.1, + add_poisson_noise_to_data=True, +) + +""" +__Galaxies__ + +Setup the galaxy with a bulge (elliptical Sersic) and disk (elliptical exponential) for this simulation. + +For modeling, defining ellipticity in terms of the `ell_comps` improves the model-fitting procedure. + +However, for simulating a galaxy you may find it more intuitive to define the elliptical geometry using the +axis-ratio of the profile (axis_ratio = semi-major axis / semi-minor axis = b/a) and position angle, where angle is +in degrees and defined counter clockwise from the positive x-axis. + +We can use the `convert` module to determine the elliptical components from the axis-ratio and angle. +""" +galaxy = ag.Galaxy( + redshift=0.5, + bulge=ag.lp.Sersic( + centre=(0.0, 0.0), + ell_comps=ag.convert.ell_comps_from(axis_ratio=0.9, angle=45.0), + intensity=1.0, + effective_radius=0.6, + sersic_index=3.0, + ), + disk=ag.lp.Exponential( + centre=(0.0, 0.0), + ell_comps=ag.convert.ell_comps_from(axis_ratio=0.7, angle=30.0), + intensity=0.5, + effective_radius=1.6, + ), +) + +""" +Use these galaxies to generate the image for the simulated `Imaging` dataset. +""" +galaxies = ag.Galaxies(galaxies=[galaxy]) +aplt.plot_array(array=galaxies.image_2d_from(grid=grid), title="Image") + +""" +Pass the simulator galaxies, which creates the image which is simulated as an imaging dataset. +""" +dataset = simulator.via_galaxies_from(galaxies=galaxies, grid=grid) + +""" +Plot the simulated `Imaging` dataset before outputting it to fits. +""" +aplt.subplot_imaging_dataset(dataset=dataset) + +""" +__Output__ + +Output the simulated dataset to the dataset path as .fits files. +""" +aplt.fits_imaging( + dataset=dataset, + data_path=dataset_path / "data.fits", + psf_path=dataset_path / "psf.fits", + noise_map_path=dataset_path / "noise_map.fits", + overwrite=True, +) + +""" +__Visualize__ + +Output a subplot of the simulated dataset, the image and the galaxies quantities to the dataset path as .png files. +""" +aplt.subplot_imaging_dataset( + dataset=dataset, output_path=dataset_path, output_format="png" +) +aplt.plot_array( + array=dataset.data, title="Data", output_path=dataset_path, output_format="png" +) +aplt.subplot_galaxies( + galaxies=galaxies, grid=grid, output_path=dataset_path, output_format="png" +) + +""" +__Plane Output__ + +Save the `Galaxies` in the dataset folder as a .json file, ensuring the true light profiles and galaxies +are safely stored and available to check how the dataset was simulated in the future. + +This can be loaded via the method `galaxies = ag.from_json()`. +""" +ag.output_to_json( + obj=galaxies, + file_path=Path(dataset_path, "galaxies.json"), +) + +""" +The dataset can be viewed in the folder `autogalaxy_workspace/imaging/simple`. + +__JAX Variant__ + +For an order-of-magnitude speedup on large or repeated simulations +(parameter sweeps, mock-data studies, batch figure generation), construct +the simulator with `use_jax=True` and wrap your call in `@jax.jit`. The +simulator handles pytree registration internally. + +```python +import jax + +simulator_jax = ag.SimulatorImaging( + exposure_time=300.0, + psf=psf, + background_sky_level=0.1, + add_poisson_noise_to_data=True, + use_jax=True, +) + +@jax.jit +def simulate(galaxies): + return simulator_jax.via_galaxies_from(galaxies=galaxies, grid=grid) + +dataset_jax = simulate(galaxies) # Imaging with jax.Array data +``` + +The `dataset_jax.data.array` is a `jax.Array`; `aplt.fits_imaging` and the +plotters call `numpy.asarray()` internally, so saving / plotting works +without manual conversion. + +Note: eager `simulator_jax.via_galaxies_from(galaxies, grid)` (no `@jax.jit`) +already runs on JAX and is sufficient for one-off simulations. The +`@jax.jit` wrap is only beneficial when you call the function many times. + +See `scripts/guides/api/data_structures.py` for the broader "JIT-it- +yourself" pattern. +"""